commit
184d50aa0b
25 changed files with 1436 additions and 0 deletions
@ -0,0 +1,39 @@ |
|||
target/ |
|||
!.mvn/wrapper/maven-wrapper.jar |
|||
!**/src/main/**/target/ |
|||
!**/src/test/**/target/ |
|||
.kotlin |
|||
|
|||
### IntelliJ IDEA ### |
|||
.idea/modules.xml |
|||
.idea/jarRepositories.xml |
|||
.idea/compiler.xml |
|||
.idea/libraries/ |
|||
*.iws |
|||
*.iml |
|||
*.ipr |
|||
|
|||
### Eclipse ### |
|||
.apt_generated |
|||
.classpath |
|||
.factorypath |
|||
.project |
|||
.settings |
|||
.springBeans |
|||
.sts4-cache |
|||
|
|||
### NetBeans ### |
|||
/nbproject/private/ |
|||
/nbbuild/ |
|||
/dist/ |
|||
/nbdist/ |
|||
/.nb-gradle/ |
|||
build/ |
|||
!**/src/main/**/build/ |
|||
!**/src/test/**/build/ |
|||
|
|||
### VS Code ### |
|||
.vscode/ |
|||
|
|||
### Mac OS ### |
|||
.DS_Store |
|||
@ -0,0 +1,10 @@ |
|||
# Default ignored files |
|||
/shelf/ |
|||
/workspace.xml |
|||
# Editor-based HTTP Client requests |
|||
/httpRequests/ |
|||
# Ignored default folder with query files |
|||
/queries/ |
|||
# Datasource local storage ignored files |
|||
/dataSources/ |
|||
/dataSources.local.xml |
|||
@ -0,0 +1,7 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="Encoding"> |
|||
<file url="file://$PROJECT_DIR$/src/main/java" charset="UTF-8" /> |
|||
<file url="file://$PROJECT_DIR$/src/main/resources" charset="UTF-8" /> |
|||
</component> |
|||
</project> |
|||
@ -0,0 +1,14 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="ExternalStorageConfigurationManager" enabled="true" /> |
|||
<component name="MavenProjectsManager"> |
|||
<option name="originalFiles"> |
|||
<list> |
|||
<option value="$PROJECT_DIR$/pom.xml" /> |
|||
</list> |
|||
</option> |
|||
</component> |
|||
<component name="ProjectRootManager" version="2" languageLevel="JDK_21" default="true" project-jdk-name="21" project-jdk-type="JavaSDK"> |
|||
<output url="file://$PROJECT_DIR$/out" /> |
|||
</component> |
|||
</project> |
|||
@ -0,0 +1,6 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="VcsDirectoryMappings"> |
|||
<mapping directory="$PROJECT_DIR$" vcs="Git" /> |
|||
</component> |
|||
</project> |
|||
@ -0,0 +1,22 @@ |
|||
[ { |
|||
"title" : "Example Domain", |
|||
"url" : "https://example.com", |
|||
"content" : "Example Domain This domain is for use in documentation examples without needing permission. Avoid use in operations. Learn more", |
|||
"crawled_at" : [ 2026, 6, 4, 5, 50, 59, 489282900 ], |
|||
"website_name" : "example.com", |
|||
"word_count" : 19 |
|||
}, { |
|||
"title" : "No Title", |
|||
"url" : "https://httpbin.org/html", |
|||
"content" : "Herman Melville - Moby-Dick Availing himself of the mild, summer-cool weather that now reigned in these latitudes, and in preparation for the peculiarly active pursuits shortly to be anticipated, Perth, the begrimed, blistered old blacksmith, had not removed his portable forge to the hold again, after concluding his contributory work for Ahab's leg, but still retained it on deck, fast lashed to ringbolts by the foremast; being now almost incessantly invoked by the headsmen, and harpooneers, and ...", |
|||
"crawled_at" : [ 2026, 6, 4, 5, 51, 0, 805255900 ], |
|||
"website_name" : "httpbin.org", |
|||
"word_count" : 78 |
|||
}, { |
|||
"title" : "Quotes to Scrape", |
|||
"url" : "https://quotes.toscrape.com", |
|||
"content" : "Quotes to Scrape Login “The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.” by Albert Einstein (about) Tags: change deep-thoughts thinking world “It is our choices, Harry, that show what we truly are, far more than our abilities.” by J.K. Rowling (about) Tags: abilities choices “There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.” by Albert Einstein (about)...", |
|||
"crawled_at" : [ 2026, 6, 4, 5, 51, 1, 895941300 ], |
|||
"website_name" : "quotes.toscrape.com", |
|||
"word_count" : 87 |
|||
} ] |
|||
@ -0,0 +1,246 @@ |
|||
2026-06-04 04:19:28.967 [main] INFO com.abod.crawler.Main - Web Crawler Application Started |
|||
2026-06-04 04:19:28.970 [main] INFO com.abod.crawler.Main - Web Crawler Application Shutdown |
|||
2026-06-04 04:36:38.625 [main] INFO com.abod.crawler.Main - Web Crawler Application Started |
|||
2026-06-04 04:36:38.628 [main] INFO c.y.crawler.model.ArticleRepository - ArticleRepository initialized |
|||
2026-06-04 04:36:38.636 [main] INFO com.abod.crawler.Main - Web Crawler Application Shutdown |
|||
2026-06-04 04:54:49.725 [main] INFO com.abod.crawler.Main - Web Crawler Application Started |
|||
2026-06-04 04:54:49.728 [main] INFO c.y.crawler.model.ArticleRepository - ArticleRepository initialized |
|||
2026-06-04 04:54:49.746 [main] INFO c.y.c.controller.CrawlerController - CrawlerController initialized with 7 commands |
|||
2026-06-04 05:02:07.337 [main] INFO com.abod.crawler.Main - Web Crawler Application Started |
|||
2026-06-04 05:02:07.340 [main] INFO c.y.crawler.model.ArticleRepository - ArticleRepository initialized |
|||
2026-06-04 05:02:07.358 [main] INFO c.y.c.controller.CrawlerController - CrawlerController initialized with 7 commands |
|||
2026-06-04 05:03:00.376 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://example.com |
|||
2026-06-04 05:03:01.514 [main] INFO c.yourname.crawler.view.ConsoleView - Crawled successfully! |
|||
2026-06-04 05:03:14.499 [main] INFO c.yourname.crawler.view.ConsoleView - Showing all 1 articles (newest first) |
|||
2026-06-04 05:03:32.683 [main] INFO c.yourname.crawler.view.ConsoleView - Total articles in repository: 1 |
|||
2026-06-04 05:03:40.361 [main] INFO c.yourname.crawler.view.ConsoleView - Starting multi-URL crawl... |
|||
2026-06-04 05:03:40.361 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://example.com |
|||
2026-06-04 05:03:40.361 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://example.com |
|||
2026-06-04 05:03:40.361 [main] WARN c.yourname.crawler.view.ConsoleView - URL already crawled! Use import to load existing data. |
|||
2026-06-04 05:03:40.361 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://www.google.com |
|||
2026-06-04 05:03:40.362 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://www.google.com |
|||
2026-06-04 05:03:50.401 [main] ERROR c.y.crawler.command.CrawlCommand - Failed to crawl URL: https://www.google.com |
|||
java.net.SocketTimeoutException: Connect timed out |
|||
at java.base/sun.nio.ch.NioSocketImpl.timedFinishConnect(NioSocketImpl.java:546) |
|||
at java.base/sun.nio.ch.NioSocketImpl.connect(NioSocketImpl.java:592) |
|||
at java.base/java.net.SocksSocketImpl.connect(SocksSocketImpl.java:327) |
|||
at java.base/java.net.Socket.connect(Socket.java:751) |
|||
at java.base/sun.security.ssl.SSLSocketImpl.connect(SSLSocketImpl.java:304) |
|||
at java.base/sun.net.NetworkClient.doConnect(NetworkClient.java:178) |
|||
at java.base/sun.net.www.http.HttpClient.openServer(HttpClient.java:531) |
|||
at java.base/sun.net.www.http.HttpClient.openServer(HttpClient.java:636) |
|||
at java.base/sun.net.www.protocol.https.HttpsClient.<init>(HttpsClient.java:264) |
|||
at java.base/sun.net.www.protocol.https.HttpsClient.New(HttpsClient.java:377) |
|||
at java.base/sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.getNewHttpClient(AbstractDelegateHttpsURLConnection.java:193) |
|||
at java.base/sun.net.www.protocol.http.HttpURLConnection.plainConnect0(HttpURLConnection.java:1253) |
|||
at java.base/sun.net.www.protocol.http.HttpURLConnection.plainConnect(HttpURLConnection.java:1139) |
|||
at java.base/sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.connect(AbstractDelegateHttpsURLConnection.java:179) |
|||
at java.base/sun.net.www.protocol.https.HttpsURLConnectionImpl.connect(HttpsURLConnectionImpl.java:141) |
|||
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:848) |
|||
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:818) |
|||
at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:346) |
|||
at org.jsoup.helper.HttpConnection.get(HttpConnection.java:331) |
|||
at command.com.abod.crawler.CrawlCommand.execute(CrawlCommand.java:39) |
|||
at command.com.abod.crawler.CrawlMultipleCommand.execute(CrawlMultipleCommand.java:25) |
|||
at controller.com.abod.crawler.CrawlerController.start(CrawlerController.java:74) |
|||
at com.abod.crawler.Main.main(Main.java:15) |
|||
2026-06-04 05:03:50.403 [main] ERROR c.yourname.crawler.view.ConsoleView - Failed to crawl: Connect timed out |
|||
2026-06-04 05:03:50.403 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://www.github.com |
|||
2026-06-04 05:03:50.403 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://www.github.com |
|||
2026-06-04 05:03:56.409 [main] ERROR c.y.crawler.command.CrawlCommand - Failed to crawl URL: https://www.github.com |
|||
java.io.IOException: Underlying input stream returned zero bytes |
|||
at java.base/sun.nio.cs.StreamDecoder.readBytes(StreamDecoder.java:354) |
|||
at java.base/sun.nio.cs.StreamDecoder.implRead(StreamDecoder.java:393) |
|||
at java.base/sun.nio.cs.StreamDecoder.lockedRead(StreamDecoder.java:217) |
|||
at java.base/sun.nio.cs.StreamDecoder.read(StreamDecoder.java:171) |
|||
at java.base/java.io.InputStreamReader.read(InputStreamReader.java:188) |
|||
at java.base/java.io.BufferedReader.fill(BufferedReader.java:160) |
|||
at java.base/java.io.BufferedReader.read1(BufferedReader.java:225) |
|||
at java.base/java.io.BufferedReader.implRead(BufferedReader.java:314) |
|||
at java.base/java.io.BufferedReader.read(BufferedReader.java:296) |
|||
at org.jsoup.parser.CharacterReader.bufferUp(CharacterReader.java:87) |
|||
at org.jsoup.parser.CharacterReader.<init>(CharacterReader.java:43) |
|||
at org.jsoup.parser.CharacterReader.<init>(CharacterReader.java:47) |
|||
at org.jsoup.parser.TreeBuilder.initialiseParse(TreeBuilder.java:48) |
|||
at org.jsoup.parser.HtmlTreeBuilder.initialiseParse(HtmlTreeBuilder.java:79) |
|||
at org.jsoup.parser.TreeBuilder.parse(TreeBuilder.java:60) |
|||
at org.jsoup.parser.Parser.parseInput(Parser.java:57) |
|||
at org.jsoup.helper.DataUtil.parseInputStream(DataUtil.java:218) |
|||
at org.jsoup.helper.HttpConnection$Response.parse(HttpConnection.java:959) |
|||
at org.jsoup.helper.HttpConnection.get(HttpConnection.java:333) |
|||
at command.com.abod.crawler.CrawlCommand.execute(CrawlCommand.java:39) |
|||
at command.com.abod.crawler.CrawlMultipleCommand.execute(CrawlMultipleCommand.java:25) |
|||
at controller.com.abod.crawler.CrawlerController.start(CrawlerController.java:74) |
|||
at com.abod.crawler.Main.main(Main.java:15) |
|||
2026-06-04 05:03:56.409 [main] ERROR c.yourname.crawler.view.ConsoleView - Failed to crawl: Underlying input stream returned zero bytes |
|||
2026-06-04 05:03:56.411 [main] INFO c.yourname.crawler.view.ConsoleView - Completed! Crawled 3 URLs. |
|||
2026-06-04 05:03:56.411 [main] INFO c.yourname.crawler.view.ConsoleView - Total articles in repository: 1 |
|||
2026-06-04 05:04:10.838 [main] INFO c.yourname.crawler.view.ConsoleView - Starting multi-URL crawl... |
|||
2026-06-04 05:04:10.838 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://example.com |
|||
2026-06-04 05:04:10.838 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://example.com |
|||
2026-06-04 05:04:10.838 [main] WARN c.yourname.crawler.view.ConsoleView - URL already crawled! Use import to load existing data. |
|||
2026-06-04 05:04:10.838 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://www.google.com |
|||
2026-06-04 05:04:10.838 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://www.google.com |
|||
2026-06-04 05:04:20.847 [main] ERROR c.y.crawler.command.CrawlCommand - Failed to crawl URL: https://www.google.com |
|||
java.net.SocketTimeoutException: Connect timed out |
|||
at java.base/sun.nio.ch.NioSocketImpl.timedFinishConnect(NioSocketImpl.java:546) |
|||
at java.base/sun.nio.ch.NioSocketImpl.connect(NioSocketImpl.java:592) |
|||
at java.base/java.net.SocksSocketImpl.connect(SocksSocketImpl.java:327) |
|||
at java.base/java.net.Socket.connect(Socket.java:751) |
|||
at java.base/sun.security.ssl.SSLSocketImpl.connect(SSLSocketImpl.java:304) |
|||
at java.base/sun.net.NetworkClient.doConnect(NetworkClient.java:178) |
|||
at java.base/sun.net.www.http.HttpClient.openServer(HttpClient.java:531) |
|||
at java.base/sun.net.www.http.HttpClient.openServer(HttpClient.java:636) |
|||
at java.base/sun.net.www.protocol.https.HttpsClient.<init>(HttpsClient.java:264) |
|||
at java.base/sun.net.www.protocol.https.HttpsClient.New(HttpsClient.java:377) |
|||
at java.base/sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.getNewHttpClient(AbstractDelegateHttpsURLConnection.java:193) |
|||
at java.base/sun.net.www.protocol.http.HttpURLConnection.plainConnect0(HttpURLConnection.java:1253) |
|||
at java.base/sun.net.www.protocol.http.HttpURLConnection.plainConnect(HttpURLConnection.java:1139) |
|||
at java.base/sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.connect(AbstractDelegateHttpsURLConnection.java:179) |
|||
at java.base/sun.net.www.protocol.https.HttpsURLConnectionImpl.connect(HttpsURLConnectionImpl.java:141) |
|||
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:848) |
|||
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:818) |
|||
at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:346) |
|||
at org.jsoup.helper.HttpConnection.get(HttpConnection.java:331) |
|||
at command.com.abod.crawler.CrawlCommand.execute(CrawlCommand.java:39) |
|||
at command.com.abod.crawler.CrawlMultipleCommand.execute(CrawlMultipleCommand.java:25) |
|||
at controller.com.abod.crawler.CrawlerController.start(CrawlerController.java:74) |
|||
at com.abod.crawler.Main.main(Main.java:15) |
|||
2026-06-04 05:04:20.848 [main] ERROR c.yourname.crawler.view.ConsoleView - Failed to crawl: Connect timed out |
|||
2026-06-04 05:04:20.848 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://www.github.com |
|||
2026-06-04 05:04:20.848 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://www.github.com |
|||
2026-06-04 05:04:30.865 [main] ERROR c.y.crawler.command.CrawlCommand - Failed to crawl URL: https://www.github.com |
|||
java.net.SocketTimeoutException: Connect timed out |
|||
at java.base/sun.nio.ch.NioSocketImpl.timedFinishConnect(NioSocketImpl.java:546) |
|||
at java.base/sun.nio.ch.NioSocketImpl.connect(NioSocketImpl.java:592) |
|||
at java.base/java.net.SocksSocketImpl.connect(SocksSocketImpl.java:327) |
|||
at java.base/java.net.Socket.connect(Socket.java:751) |
|||
at java.base/sun.security.ssl.SSLSocketImpl.connect(SSLSocketImpl.java:304) |
|||
at java.base/sun.net.NetworkClient.doConnect(NetworkClient.java:178) |
|||
at java.base/sun.net.www.http.HttpClient.openServer(HttpClient.java:531) |
|||
at java.base/sun.net.www.http.HttpClient.openServer(HttpClient.java:636) |
|||
at java.base/sun.net.www.protocol.https.HttpsClient.<init>(HttpsClient.java:264) |
|||
at java.base/sun.net.www.protocol.https.HttpsClient.New(HttpsClient.java:377) |
|||
at java.base/sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.getNewHttpClient(AbstractDelegateHttpsURLConnection.java:193) |
|||
at java.base/sun.net.www.protocol.http.HttpURLConnection.plainConnect0(HttpURLConnection.java:1253) |
|||
at java.base/sun.net.www.protocol.http.HttpURLConnection.plainConnect(HttpURLConnection.java:1139) |
|||
at java.base/sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.connect(AbstractDelegateHttpsURLConnection.java:179) |
|||
at java.base/sun.net.www.protocol.https.HttpsURLConnectionImpl.connect(HttpsURLConnectionImpl.java:141) |
|||
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:848) |
|||
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:818) |
|||
at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:346) |
|||
at org.jsoup.helper.HttpConnection.get(HttpConnection.java:331) |
|||
at command.com.abod.crawler.CrawlCommand.execute(CrawlCommand.java:39) |
|||
at command.com.abod.crawler.CrawlMultipleCommand.execute(CrawlMultipleCommand.java:25) |
|||
at controller.com.abod.crawler.CrawlerController.start(CrawlerController.java:74) |
|||
at com.abod.crawler.Main.main(Main.java:15) |
|||
2026-06-04 05:04:30.865 [main] ERROR c.yourname.crawler.view.ConsoleView - Failed to crawl: Connect timed out |
|||
2026-06-04 05:04:30.865 [main] INFO c.yourname.crawler.view.ConsoleView - Completed! Crawled 3 URLs. |
|||
2026-06-04 05:04:30.865 [main] INFO c.yourname.crawler.view.ConsoleView - Total articles in repository: 1 |
|||
2026-06-04 05:04:56.866 [main] INFO c.yourname.crawler.view.ConsoleView - Showing 1 articles from example.com |
|||
2026-06-04 05:05:12.736 [main] INFO c.y.crawler.model.ArticleRepository - Repository cleared |
|||
2026-06-04 05:05:12.736 [main] INFO c.yourname.crawler.view.ConsoleView - All articles cleared from repository |
|||
2026-06-04 05:05:16.858 [main] INFO c.yourname.crawler.view.ConsoleView - Goodbye! |
|||
2026-06-04 05:06:54.222 [main] INFO com.abod.crawler.Main - Web Crawler Application Started |
|||
2026-06-04 05:06:54.227 [main] INFO c.y.crawler.model.ArticleRepository - ArticleRepository initialized |
|||
2026-06-04 05:06:54.244 [main] INFO c.y.c.controller.CrawlerController - CrawlerController initialized with 7 commands |
|||
2026-06-04 05:06:58.112 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://example.com |
|||
2026-06-04 05:06:59.193 [main] INFO c.yourname.crawler.view.ConsoleView - Crawled successfully! |
|||
2026-06-04 05:07:10.211 [main] INFO c.yourname.crawler.view.ConsoleView - Showing all 1 articles (newest first) |
|||
2026-06-04 05:08:15.454 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://www.github.com |
|||
2026-06-04 05:08:25.545 [main] ERROR c.y.crawler.command.CrawlCommand - Failed to crawl URL: https://www.github.com |
|||
java.net.SocketTimeoutException: Connect timed out |
|||
at java.base/sun.nio.ch.NioSocketImpl.timedFinishConnect(NioSocketImpl.java:546) |
|||
at java.base/sun.nio.ch.NioSocketImpl.connect(NioSocketImpl.java:592) |
|||
at java.base/java.net.SocksSocketImpl.connect(SocksSocketImpl.java:327) |
|||
at java.base/java.net.Socket.connect(Socket.java:751) |
|||
at java.base/sun.security.ssl.SSLSocketImpl.connect(SSLSocketImpl.java:304) |
|||
at java.base/sun.net.NetworkClient.doConnect(NetworkClient.java:178) |
|||
at java.base/sun.net.www.http.HttpClient.openServer(HttpClient.java:531) |
|||
at java.base/sun.net.www.http.HttpClient.openServer(HttpClient.java:636) |
|||
at java.base/sun.net.www.protocol.https.HttpsClient.<init>(HttpsClient.java:264) |
|||
at java.base/sun.net.www.protocol.https.HttpsClient.New(HttpsClient.java:377) |
|||
at java.base/sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.getNewHttpClient(AbstractDelegateHttpsURLConnection.java:193) |
|||
at java.base/sun.net.www.protocol.http.HttpURLConnection.plainConnect0(HttpURLConnection.java:1253) |
|||
at java.base/sun.net.www.protocol.http.HttpURLConnection.plainConnect(HttpURLConnection.java:1139) |
|||
at java.base/sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.connect(AbstractDelegateHttpsURLConnection.java:179) |
|||
at java.base/sun.net.www.protocol.https.HttpsURLConnectionImpl.connect(HttpsURLConnectionImpl.java:141) |
|||
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:848) |
|||
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:818) |
|||
at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:346) |
|||
at org.jsoup.helper.HttpConnection.get(HttpConnection.java:331) |
|||
at command.com.abod.crawler.CrawlCommand.execute(CrawlCommand.java:39) |
|||
at controller.com.abod.crawler.CrawlerController.start(CrawlerController.java:74) |
|||
at com.abod.crawler.Main.main(Main.java:15) |
|||
2026-06-04 05:08:25.547 [main] ERROR c.yourname.crawler.view.ConsoleView - Failed to crawl: Connect timed out |
|||
2026-06-04 05:09:21.026 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://httpbin.org/html |
|||
2026-06-04 05:09:22.177 [main] INFO c.yourname.crawler.view.ConsoleView - Crawled successfully! |
|||
2026-06-04 05:09:36.720 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://books.toscrape.com |
|||
2026-06-04 05:09:38.631 [main] INFO c.yourname.crawler.view.ConsoleView - Crawled successfully! |
|||
2026-06-04 05:10:22.093 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://www.example.com |
|||
2026-06-04 05:10:22.983 [main] INFO c.yourname.crawler.view.ConsoleView - Crawled successfully! |
|||
2026-06-04 05:10:46.164 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://quotes.toscrape.com |
|||
2026-06-04 05:10:47.608 [main] INFO c.yourname.crawler.view.ConsoleView - Crawled successfully! |
|||
2026-06-04 05:11:21.873 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://quotes.toscrape.com |
|||
2026-06-04 05:11:21.873 [main] WARN c.yourname.crawler.view.ConsoleView - URL already crawled! Use import to load existing data. |
|||
2026-06-04 05:12:26.629 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://httpbin.org/html |
|||
2026-06-04 05:12:26.629 [main] WARN c.yourname.crawler.view.ConsoleView - URL already crawled! Use import to load existing data. |
|||
2026-06-04 05:13:47.019 [main] INFO c.yourname.crawler.view.ConsoleView - Showing all 5 articles (newest first) |
|||
2026-06-04 05:13:59.278 [main] INFO c.yourname.crawler.view.ConsoleView - Total articles in repository: 5 |
|||
2026-06-04 05:20:17.775 [main] INFO c.yourname.crawler.view.ConsoleView - Showing all 5 articles (newest first) |
|||
2026-06-04 05:20:56.447 [main] ERROR c.yourname.crawler.view.ConsoleView - Unknown command: 'export'. Type 'help' for available commands. |
|||
2026-06-04 05:36:05.721 [main] ERROR c.yourname.crawler.view.ConsoleView - Unknown command: 'export'. Type 'help' for available commands. |
|||
2026-06-04 05:36:31.242 [main] INFO c.yourname.crawler.view.ConsoleView - Showing all 5 articles (newest first) |
|||
2026-06-04 05:36:43.324 [main] INFO c.yourname.crawler.view.ConsoleView - Showing all 5 articles (newest first) |
|||
2026-06-04 05:36:50.737 [main] ERROR c.yourname.crawler.view.ConsoleView - Unknown command: 'export'. Type 'help' for available commands. |
|||
2026-06-04 05:42:46.740 [main] INFO com.abod.crawler.Main - Web Crawler Application Started |
|||
2026-06-04 05:42:46.744 [main] INFO c.y.crawler.model.ArticleRepository - ArticleRepository initialized |
|||
2026-06-04 05:42:46.763 [main] INFO c.y.c.controller.CrawlerController - CrawlerController initialized with 9 commands |
|||
2026-06-04 05:42:53.748 [main] ERROR c.yourname.crawler.view.ConsoleView - No articles to export. Please crawl some websites first. |
|||
2026-06-04 05:43:38.606 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://example.com |
|||
2026-06-04 05:43:39.589 [main] INFO c.yourname.crawler.view.ConsoleView - Crawled successfully! |
|||
2026-06-04 05:43:51.022 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://httpbin.org/html |
|||
2026-06-04 05:43:52.399 [main] INFO c.yourname.crawler.view.ConsoleView - Crawled successfully! |
|||
2026-06-04 05:43:56.782 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://quotes.toscrape.com |
|||
2026-06-04 05:43:57.924 [main] INFO c.yourname.crawler.view.ConsoleView - Crawled successfully! |
|||
2026-06-04 05:44:01.371 [main] INFO c.yourname.crawler.view.ConsoleView - Showing all 3 articles (newest first) |
|||
2026-06-04 05:44:06.030 [main] INFO c.yourname.crawler.view.ConsoleView - Exporting 3 articles to JSON... |
|||
2026-06-04 05:44:06.215 [main] INFO util.com.abod.crawler.JsonUtil - Exported 3 articles to export_٢٠٢٦٠٦٠٤_٠٥٤٤٠٦.json |
|||
2026-06-04 05:44:06.217 [main] INFO c.yourname.crawler.view.ConsoleView - Exported 3 articles to: export_٢٠٢٦٠٦٠٤_٠٥٤٤٠٦.json |
|||
2026-06-04 05:44:06.217 [main] INFO c.yourname.crawler.view.ConsoleView - File location: C:\Users\2040a\IdeaProjects\web-crawler/export_٢٠٢٦٠٦٠٤_٠٥٤٤٠٦.json |
|||
2026-06-04 05:45:48.703 [main] INFO com.abod.crawler.Main - Web Crawler Application Started |
|||
2026-06-04 05:45:48.706 [main] INFO c.y.crawler.model.ArticleRepository - ArticleRepository initialized |
|||
2026-06-04 05:45:48.726 [main] INFO c.y.c.controller.CrawlerController - CrawlerController initialized with 9 commands |
|||
2026-06-04 05:45:50.197 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://example.com |
|||
2026-06-04 05:45:51.277 [main] INFO c.yourname.crawler.view.ConsoleView - Crawled successfully! |
|||
2026-06-04 05:45:51.279 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://httpbin.org/html |
|||
2026-06-04 05:45:52.419 [main] INFO c.yourname.crawler.view.ConsoleView - Crawled successfully! |
|||
2026-06-04 05:45:52.421 [main] INFO c.yourname.crawler.view.ConsoleView - Crawling: https://quotes.toscrape.com |
|||
2026-06-04 05:45:53.808 [main] INFO c.yourname.crawler.view.ConsoleView - Crawled successfully! |
|||
2026-06-04 05:45:53.812 [main] INFO c.yourname.crawler.view.ConsoleView - Showing all 3 articles (newest first) |
|||
2026-06-04 05:45:53.813 [main] INFO c.yourname.crawler.view.ConsoleView - Exporting 3 articles to JSON... |
|||
2026-06-04 05:45:53.987 [main] INFO util.com.abod.crawler.JsonUtil - Exported 3 articles to export_٢٠٢٦٠٦٠٤_٠٥٤٥٥٣.json |
|||
2026-06-04 05:45:53.989 [main] INFO c.yourname.crawler.view.ConsoleView - Exported 3 articles to: export_٢٠٢٦٠٦٠٤_٠٥٤٥٥٣.json |
|||
2026-06-04 05:45:53.990 [main] INFO c.yourname.crawler.view.ConsoleView - File location: C:\Users\2040a\IdeaProjects\web-crawler/export_٢٠٢٦٠٦٠٤_٠٥٤٥٥٣.json |
|||
2026-06-04 05:45:53.990 [main] INFO c.y.crawler.model.ArticleRepository - Repository cleared |
|||
2026-06-04 05:45:53.990 [main] INFO c.yourname.crawler.view.ConsoleView - All articles cleared from repository |
|||
2026-06-04 05:45:53.990 [main] INFO c.yourname.crawler.view.ConsoleView - Showing all 0 articles (newest first) |
|||
2026-06-04 05:45:53.990 [main] INFO c.yourname.crawler.view.ConsoleView - No articles found. |
|||
2026-06-04 05:50:50.985 [main] INFO com.abod.crawler.Main - Web Crawler Application Started |
|||
2026-06-04 05:50:50.988 [main] INFO c.a.crawler.model.ArticleRepository - ArticleRepository initialized |
|||
2026-06-04 05:50:51.008 [main] INFO c.a.c.controller.CrawlerController - CrawlerController initialized with 9 commands |
|||
2026-06-04 05:50:58.283 [main] INFO com.abod.crawler.view.ConsoleView - Crawling: https://example.com |
|||
2026-06-04 05:50:59.490 [main] INFO com.abod.crawler.view.ConsoleView - Crawled successfully! |
|||
2026-06-04 05:50:59.492 [main] INFO com.abod.crawler.view.ConsoleView - Crawling: https://httpbin.org/html |
|||
2026-06-04 05:51:00.805 [main] INFO com.abod.crawler.view.ConsoleView - Crawled successfully! |
|||
2026-06-04 05:51:00.807 [main] INFO com.abod.crawler.view.ConsoleView - Crawling: https://quotes.toscrape.com |
|||
2026-06-04 05:51:01.895 [main] INFO com.abod.crawler.view.ConsoleView - Crawled successfully! |
|||
2026-06-04 05:51:01.901 [main] INFO com.abod.crawler.view.ConsoleView - Showing all 3 articles (newest first) |
|||
2026-06-04 05:51:01.902 [main] INFO com.abod.crawler.view.ConsoleView - Exporting 3 articles to JSON... |
|||
2026-06-04 05:51:02.075 [main] INFO com.abod.crawler.util.JsonUtil - Exported 3 articles to export_٢٠٢٦٠٦٠٤_٠٥٥١٠٢.json |
|||
2026-06-04 05:51:02.078 [main] INFO com.abod.crawler.view.ConsoleView - Exported 3 articles to: export_٢٠٢٦٠٦٠٤_٠٥٥١٠٢.json |
|||
2026-06-04 05:51:02.078 [main] INFO com.abod.crawler.view.ConsoleView - File location: C:\Users\2040a\IdeaProjects\web-crawler/export_٢٠٢٦٠٦٠٤_٠٥٥١٠٢.json |
|||
2026-06-04 05:51:02.078 [main] INFO c.a.crawler.model.ArticleRepository - Repository cleared |
|||
2026-06-04 05:51:02.078 [main] INFO com.abod.crawler.view.ConsoleView - All articles cleared from repository |
|||
2026-06-04 05:51:02.078 [main] INFO com.abod.crawler.view.ConsoleView - Showing all 0 articles (newest first) |
|||
2026-06-04 05:51:02.078 [main] INFO com.abod.crawler.view.ConsoleView - No articles found. |
|||
@ -0,0 +1,82 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" |
|||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 |
|||
http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
|||
<modelVersion>4.0.0</modelVersion> |
|||
|
|||
<groupId>com.yourname.crawler</groupId> |
|||
<artifactId>web-crawler</artifactId> |
|||
<version>1.0-SNAPSHOT</version> |
|||
<packaging>jar</packaging> |
|||
|
|||
<properties> |
|||
<maven.compiler.source>17</maven.compiler.source> |
|||
<maven.compiler.target>17</maven.compiler.target> |
|||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
|||
</properties> |
|||
|
|||
<dependencies> |
|||
<!-- Jsoup: HTML parsing (for crawling websites) --> |
|||
<dependency> |
|||
<groupId>org.jsoup</groupId> |
|||
<artifactId>jsoup</artifactId> |
|||
<version>1.17.2</version> |
|||
</dependency> |
|||
|
|||
<!-- Jackson: JSON serialization (for export/import) --> |
|||
<dependency> |
|||
<groupId>com.fasterxml.jackson.core</groupId> |
|||
<artifactId>jackson-databind</artifactId> |
|||
<version>2.16.1</version> |
|||
</dependency> |
|||
|
|||
<!-- Jackson: Support for Java Time (LocalDateTime) --> |
|||
<dependency> |
|||
<groupId>com.fasterxml.jackson.datatype</groupId> |
|||
<artifactId>jackson-datatype-jsr310</artifactId> |
|||
<version>2.16.1</version> |
|||
</dependency> |
|||
|
|||
<!-- Logback: Logging framework --> |
|||
<dependency> |
|||
<groupId>ch.qos.logback</groupId> |
|||
<artifactId>logback-classic</artifactId> |
|||
<version>1.5.3</version> |
|||
</dependency> |
|||
|
|||
<!-- JUnit: Testing --> |
|||
<dependency> |
|||
<groupId>org.junit.jupiter</groupId> |
|||
<artifactId>junit-jupiter</artifactId> |
|||
<version>5.10.2</version> |
|||
<scope>test</scope> |
|||
</dependency> |
|||
</dependencies> |
|||
|
|||
<build> |
|||
<plugins> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-compiler-plugin</artifactId> |
|||
<version>3.11.0</version> |
|||
<configuration> |
|||
<source>17</source> |
|||
<target>17</target> |
|||
</configuration> |
|||
</plugin> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-jar-plugin</artifactId> |
|||
<version>3.3.0</version> |
|||
<configuration> |
|||
<archive> |
|||
<manifest> |
|||
<mainClass>com.abod.crawler.Main</mainClass> |
|||
</manifest> |
|||
</archive> |
|||
</configuration> |
|||
</plugin> |
|||
</plugins> |
|||
</build> |
|||
</project> |
|||
@ -0,0 +1,18 @@ |
|||
package com.abod.crawler; |
|||
|
|||
import com.abod.crawler.controller.CrawlerController; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
public class Main { |
|||
private static final Logger logger = LoggerFactory.getLogger(Main.class); |
|||
|
|||
public static void main(String[] args) { |
|||
logger.info("Web Crawler Application Started"); |
|||
|
|||
CrawlerController controller = new CrawlerController(); |
|||
controller.start(); |
|||
|
|||
logger.info("Web Crawler Application Shutdown"); |
|||
} |
|||
} |
|||
@ -0,0 +1,12 @@ |
|||
package com.abod.crawler.command; |
|||
|
|||
import com.abod.crawler.model.ArticleRepository; |
|||
import com.abod.crawler.view.ConsoleView; |
|||
|
|||
public class ClearCommand implements Command { |
|||
@Override |
|||
public void execute(String[] args, ArticleRepository repository, ConsoleView view) { |
|||
repository.clear(); |
|||
view.printSuccess("All articles cleared from repository"); |
|||
} |
|||
} |
|||
@ -0,0 +1,19 @@ |
|||
package com.abod.crawler.command; |
|||
|
|||
import com.abod.crawler.model.ArticleRepository; |
|||
import com.abod.crawler.view.ConsoleView; |
|||
|
|||
/** |
|||
* Command interface - part of the Command Design Pattern |
|||
* Each user command will implement this interface |
|||
*/ |
|||
public interface Command { |
|||
|
|||
/** |
|||
* Execute the command |
|||
* @param args Command arguments (split by space) |
|||
* @param repository The article data repository |
|||
* @param view The console view for output |
|||
*/ |
|||
void execute(String[] args, ArticleRepository repository, ConsoleView view); |
|||
} |
|||
@ -0,0 +1,12 @@ |
|||
package com.abod.crawler.command; |
|||
|
|||
import com.abod.crawler.model.ArticleRepository; |
|||
import com.abod.crawler.view.ConsoleView; |
|||
|
|||
public class CountCommand implements Command { |
|||
@Override |
|||
public void execute(String[] args, ArticleRepository repository, ConsoleView view) { |
|||
int count = repository.size(); |
|||
view.printSuccess("Total articles in repository: " + count); |
|||
} |
|||
} |
|||
@ -0,0 +1,126 @@ |
|||
package com.abod.crawler.command; |
|||
|
|||
import com.abod.crawler.model.Article; |
|||
import com.abod.crawler.model.ArticleRepository; |
|||
import com.abod.crawler.view.ConsoleView; |
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import java.io.IOException; |
|||
import java.net.SocketTimeoutException; |
|||
|
|||
public class CrawlCommand implements Command { |
|||
private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class); |
|||
|
|||
// List of known accessible test websites
|
|||
private static final String[] TEST_WEBSITES = { |
|||
"https://example.com", |
|||
"https://httpbin.org/html", |
|||
"https://books.toscrape.com", |
|||
"https://quotes.toscrape.com" |
|||
}; |
|||
|
|||
@Override |
|||
public void execute(String[] args, ArticleRepository repository, ConsoleView view) { |
|||
// Check if URL was provided
|
|||
if (args.length < 2) { |
|||
view.printError("Usage: crawl <url>"); |
|||
view.printInfo("Example: crawl https://example.com"); |
|||
view.printInfo("Test sites you can use:"); |
|||
for (String site : TEST_WEBSITES) { |
|||
view.printInfo(" - " + site); |
|||
} |
|||
return; |
|||
} |
|||
|
|||
String url = args[1]; |
|||
view.printInfo("Crawling: " + url); |
|||
|
|||
// Check if URL already exists
|
|||
if (repository.exists(url)) { |
|||
view.printWarning("URL already crawled!"); |
|||
return; |
|||
} |
|||
|
|||
try { |
|||
// Connect with longer timeout and better user agent
|
|||
Document document = Jsoup.connect(url) |
|||
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") |
|||
.timeout(15000) // Increased to 15 seconds
|
|||
.ignoreHttpErrors(true) // Don't fail on 404, 403, etc.
|
|||
.followRedirects(true) |
|||
.get(); |
|||
|
|||
// Extract basic information
|
|||
String title = document.title(); |
|||
if (title == null || title.isEmpty()) { |
|||
title = "No Title"; |
|||
} |
|||
|
|||
String content = extractContent(document); |
|||
String websiteName = extractWebsiteName(url); |
|||
|
|||
// Create article
|
|||
Article article = new Article(title, url, content, websiteName); |
|||
repository.save(article); |
|||
|
|||
view.printSuccess("Crawled successfully!"); |
|||
view.displayArticle(article); |
|||
|
|||
} catch (SocketTimeoutException e) { |
|||
logger.error("Timeout crawling URL: {}", url, e); |
|||
view.printError("Connection timed out. The website might be slow or blocking requests."); |
|||
view.printInfo("Try one of these test sites instead:"); |
|||
for (String site : TEST_WEBSITES) { |
|||
if (!site.equals(url)) { |
|||
view.printInfo(" - crawl " + site); |
|||
} |
|||
} |
|||
|
|||
} catch (IOException e) { |
|||
logger.error("Failed to crawl URL: {}", url, e); |
|||
view.printError("Failed to crawl: " + e.getMessage()); |
|||
view.printInfo("Try one of these accessible test sites:"); |
|||
for (String site : TEST_WEBSITES) { |
|||
view.printInfo(" - crawl " + site); |
|||
} |
|||
} catch (Exception e) { |
|||
logger.error("Unexpected error crawling URL: {}", url, e); |
|||
view.printError("Unexpected error: " + e.getMessage()); |
|||
} |
|||
} |
|||
|
|||
private String extractContent(Document document) { |
|||
try { |
|||
String bodyText = document.body() != null ? document.body().text() : ""; |
|||
String content = bodyText.replaceAll("\\s+", " ").trim(); |
|||
|
|||
if (content.length() > 500) { |
|||
content = content.substring(0, 500) + "..."; |
|||
} |
|||
|
|||
return content.isEmpty() ? "No content extracted" : content; |
|||
} catch (Exception e) { |
|||
return "Error extracting content"; |
|||
} |
|||
} |
|||
|
|||
private String extractWebsiteName(String url) { |
|||
try { |
|||
String domain = url.replace("https://", "").replace("http://", ""); |
|||
int slashIndex = domain.indexOf("/"); |
|||
if (slashIndex > 0) { |
|||
domain = domain.substring(0, slashIndex); |
|||
} |
|||
// Remove www. if present
|
|||
if (domain.startsWith("www.")) { |
|||
domain = domain.substring(4); |
|||
} |
|||
return domain; |
|||
} catch (Exception e) { |
|||
return "Unknown"; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,32 @@ |
|||
package com.abod.crawler.command; |
|||
|
|||
import com.abod.crawler.model.ArticleRepository; |
|||
import com.abod.crawler.view.ConsoleView; |
|||
|
|||
public class CrawlMultipleCommand implements Command { |
|||
|
|||
// Predefined URLs to crawl for testing
|
|||
private static final String[] DEFAULT_URLS = { |
|||
"https://example.com", |
|||
"https://www.google.com", |
|||
"https://www.github.com" |
|||
}; |
|||
|
|||
@Override |
|||
public void execute(String[] args, ArticleRepository repository, ConsoleView view) { |
|||
view.printInfo("Starting multi-URL crawl..."); |
|||
|
|||
CrawlCommand crawlCommand = new CrawlCommand(); |
|||
int successCount = 0; |
|||
|
|||
for (String url : DEFAULT_URLS) { |
|||
view.printInfo("Crawling: " + url); |
|||
String[] crawlArgs = {"crawl", url}; |
|||
crawlCommand.execute(crawlArgs, repository, view); |
|||
successCount++; |
|||
} |
|||
|
|||
view.printSuccess("Completed! Crawled " + successCount + " URLs."); |
|||
view.printInfo("Total articles in repository: " + repository.size()); |
|||
} |
|||
} |
|||
@ -0,0 +1,12 @@ |
|||
package com.abod.crawler.command; |
|||
|
|||
import com.abod.crawler.model.ArticleRepository; |
|||
import com.abod.crawler.view.ConsoleView; |
|||
|
|||
public class ExitCommand implements Command { |
|||
@Override |
|||
public void execute(String[] args, ArticleRepository repository, ConsoleView view) { |
|||
view.printSuccess("Goodbye!"); |
|||
System.exit(0); |
|||
} |
|||
} |
|||
@ -0,0 +1,32 @@ |
|||
package com.abod.crawler.command; |
|||
|
|||
import com.abod.crawler.model.Article; |
|||
import com.abod.crawler.model.ArticleRepository; |
|||
import com.abod.crawler.util.JsonUtil; |
|||
import com.abod.crawler.view.ConsoleView; |
|||
|
|||
import java.util.List; |
|||
|
|||
public class ExportCommand implements Command { |
|||
|
|||
@Override |
|||
public void execute(String[] args, ArticleRepository repository, ConsoleView view) { |
|||
List<Article> articles = repository.getAll(); |
|||
|
|||
if (articles.isEmpty()) { |
|||
view.printError("No articles to export. Please crawl some websites first."); |
|||
return; |
|||
} |
|||
|
|||
view.printInfo("Exporting " + articles.size() + " articles to JSON..."); |
|||
|
|||
String filename = JsonUtil.exportWithTimestamp(articles); |
|||
|
|||
if (filename != null) { |
|||
view.printSuccess("Exported " + articles.size() + " articles to: " + filename); |
|||
view.printInfo("File location: " + JsonUtil.getWorkingDirectory() + "/" + filename); |
|||
} else { |
|||
view.printError("Export failed"); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,11 @@ |
|||
package com.abod.crawler.command; |
|||
|
|||
import com.abod.crawler.model.ArticleRepository; |
|||
import com.abod.crawler.view.ConsoleView; |
|||
|
|||
public class HelpCommand implements Command { |
|||
@Override |
|||
public void execute(String[] args, ArticleRepository repository, ConsoleView view) { |
|||
view.showHelp(); |
|||
} |
|||
} |
|||
@ -0,0 +1,47 @@ |
|||
package com.abod.crawler.command; |
|||
|
|||
import com.abod.crawler.model.Article; |
|||
import com.abod.crawler.model.ArticleRepository; |
|||
import com.abod.crawler.util.JsonUtil; |
|||
import com.abod.crawler.view.ConsoleView; |
|||
|
|||
import java.util.List; |
|||
|
|||
public class ImportCommand implements Command { |
|||
|
|||
@Override |
|||
public void execute(String[] args, ArticleRepository repository, ConsoleView view) { |
|||
if (args.length < 2) { |
|||
view.printError("Usage: import <filename>"); |
|||
view.printInfo("Example: import export_20260604_143022.json"); |
|||
return; |
|||
} |
|||
|
|||
String filename = args[1]; |
|||
|
|||
if (!filename.endsWith(".json")) { |
|||
filename = filename + ".json"; |
|||
} |
|||
|
|||
view.printInfo("Importing articles from: " + filename); |
|||
|
|||
List<Article> imported = JsonUtil.importFromJson(filename); |
|||
|
|||
if (imported.isEmpty()) { |
|||
view.printError("No articles found in " + filename); |
|||
view.printInfo("File location should be: " + JsonUtil.getWorkingDirectory() + "/" + filename); |
|||
return; |
|||
} |
|||
|
|||
int newCount = 0; |
|||
for (Article article : imported) { |
|||
if (!repository.exists(article.getUrl())) { |
|||
repository.save(article); |
|||
newCount++; |
|||
} |
|||
} |
|||
|
|||
view.printSuccess("Imported " + newCount + " new articles from " + filename); |
|||
view.printInfo("Total articles in repository: " + repository.size()); |
|||
} |
|||
} |
|||
@ -0,0 +1,31 @@ |
|||
package com.abod.crawler.command; |
|||
|
|||
import com.abod.crawler.model.Article; |
|||
import com.abod.crawler.model.ArticleRepository; |
|||
import com.abod.crawler.view.ConsoleView; |
|||
|
|||
import java.util.List; |
|||
|
|||
public class ListCommand implements Command { |
|||
@Override |
|||
public void execute(String[] args, ArticleRepository repository, ConsoleView view) { |
|||
List<Article> articles; |
|||
|
|||
// Check if user wants to filter by website
|
|||
if (args.length > 1) { |
|||
String websiteName = args[1]; |
|||
articles = repository.getByWebsite(websiteName); |
|||
|
|||
if (articles.isEmpty()) { |
|||
view.printWarning("No articles found from website: " + websiteName); |
|||
return; |
|||
} |
|||
view.printInfo("Showing " + articles.size() + " articles from " + websiteName); |
|||
} else { |
|||
articles = repository.getNewestFirst(); |
|||
view.printInfo("Showing all " + articles.size() + " articles (newest first)"); |
|||
} |
|||
|
|||
view.displayArticles(articles); |
|||
} |
|||
} |
|||
@ -0,0 +1,77 @@ |
|||
package com.abod.crawler.controller; |
|||
|
|||
import com.abod.crawler.command.*; |
|||
import com.abod.crawler.command.*; |
|||
import com.abod.crawler.model.ArticleRepository; |
|||
import com.abod.crawler.view.ConsoleView; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import java.util.HashMap; |
|||
import java.util.Map; |
|||
|
|||
public class CrawlerController { |
|||
|
|||
private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class); |
|||
|
|||
private final ArticleRepository repository; |
|||
private final ConsoleView view; |
|||
private final Map<String, Command> commands; |
|||
|
|||
public CrawlerController() { |
|||
this.repository = new ArticleRepository(); |
|||
this.view = new ConsoleView(); |
|||
this.commands = new HashMap<>(); |
|||
|
|||
registerCommands(); |
|||
|
|||
logger.info("CrawlerController initialized with {} commands", commands.size()); |
|||
} |
|||
|
|||
private void registerCommands() { |
|||
commands.put("help", new HelpCommand()); |
|||
commands.put("exit", new ExitCommand()); |
|||
commands.put("list", new ListCommand()); |
|||
commands.put("count", new CountCommand()); |
|||
commands.put("clear", new ClearCommand()); |
|||
commands.put("crawl", new CrawlCommand()); |
|||
commands.put("crawl-multiple", new CrawlMultipleCommand()); |
|||
commands.put("export", new ExportCommand()); |
|||
commands.put("import", new ImportCommand()); |
|||
|
|||
logger.debug("Registered commands: {}", commands.keySet()); |
|||
} |
|||
|
|||
public void start() { |
|||
view.showWelcome(); |
|||
|
|||
while (true) { |
|||
try { |
|||
String input = view.readCommand(); |
|||
if (input == null || input.trim().isEmpty()) { |
|||
continue; |
|||
} |
|||
|
|||
String[] parts = input.trim().split("\\s+"); |
|||
String commandName = parts[0].toLowerCase(); |
|||
|
|||
if (commandName.equals("exit")) { |
|||
Command exitCommand = commands.get("exit"); |
|||
if (exitCommand != null) { |
|||
exitCommand.execute(parts, repository, view); |
|||
break; |
|||
} |
|||
} else if (commands.containsKey(commandName)) { |
|||
Command command = commands.get(commandName); |
|||
command.execute(parts, repository, view); |
|||
} else { |
|||
view.printError("Unknown command: '" + commandName + "'. Type 'help' for available commands."); |
|||
} |
|||
|
|||
} catch (Exception e) { |
|||
logger.error("Error processing command", e); |
|||
view.printError("An error occurred: " + e.getMessage()); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,180 @@ |
|||
package com.abod.crawler.model; |
|||
|
|||
import com.fasterxml.jackson.annotation.JsonProperty; |
|||
import java.time.LocalDateTime; |
|||
import java.time.format.DateTimeFormatter; |
|||
|
|||
/** |
|||
* Article Model class representing a crawled web article. |
|||
* This is the data structure that will be stored, exported, and imported. |
|||
*/ |
|||
public class Article { |
|||
|
|||
// ===== Fields =====
|
|||
|
|||
@JsonProperty("title") |
|||
private String title; |
|||
|
|||
@JsonProperty("url") |
|||
private String url; |
|||
|
|||
@JsonProperty("content") |
|||
private String content; |
|||
|
|||
@JsonProperty("crawled_at") |
|||
private LocalDateTime crawledAt; |
|||
|
|||
@JsonProperty("website_name") |
|||
private String websiteName; |
|||
|
|||
@JsonProperty("word_count") |
|||
private int wordCount; |
|||
|
|||
// ===== Constructors =====
|
|||
|
|||
/** |
|||
* Default constructor (required for Jackson JSON deserialization) |
|||
*/ |
|||
public Article() { |
|||
// Empty constructor needed for Jackson
|
|||
} |
|||
|
|||
/** |
|||
* Constructor with essential fields |
|||
* @param title Article title |
|||
* @param url Article URL |
|||
* @param content Article content/text |
|||
*/ |
|||
public Article(String title, String url, String content) { |
|||
this.title = title; |
|||
this.url = url; |
|||
this.content = content; |
|||
this.crawledAt = LocalDateTime.now(); // Auto-set current time
|
|||
this.wordCount = content != null ? content.split("\\s+").length : 0; |
|||
} |
|||
|
|||
/** |
|||
* Full constructor with all fields |
|||
* @param title Article title |
|||
* @param url Article URL |
|||
* @param content Article content |
|||
* @param websiteName Name of the source website |
|||
*/ |
|||
public Article(String title, String url, String content, String websiteName) { |
|||
this(title, url, content); |
|||
this.websiteName = websiteName; |
|||
} |
|||
|
|||
// ===== Getters and Setters =====
|
|||
|
|||
public String getTitle() { |
|||
return title; |
|||
} |
|||
|
|||
public void setTitle(String title) { |
|||
this.title = title; |
|||
} |
|||
|
|||
public String getUrl() { |
|||
return url; |
|||
} |
|||
|
|||
public void setUrl(String url) { |
|||
this.url = url; |
|||
} |
|||
|
|||
public String getContent() { |
|||
return content; |
|||
} |
|||
|
|||
public void setContent(String content) { |
|||
this.content = content; |
|||
this.wordCount = content != null ? content.split("\\s+").length : 0; |
|||
} |
|||
|
|||
public LocalDateTime getCrawledAt() { |
|||
return crawledAt; |
|||
} |
|||
|
|||
public void setCrawledAt(LocalDateTime crawledAt) { |
|||
this.crawledAt = crawledAt; |
|||
} |
|||
|
|||
public String getWebsiteName() { |
|||
return websiteName; |
|||
} |
|||
|
|||
public void setWebsiteName(String websiteName) { |
|||
this.websiteName = websiteName; |
|||
} |
|||
|
|||
public int getWordCount() { |
|||
return wordCount; |
|||
} |
|||
|
|||
// No setter for wordCount - it's calculated automatically from content
|
|||
|
|||
// ===== Utility Methods =====
|
|||
|
|||
/** |
|||
* Returns a formatted string for displaying in CLI |
|||
* @return Formatted article summary |
|||
*/ |
|||
public String toDisplayString() { |
|||
String formattedDate = crawledAt.format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")); |
|||
return String.format("[%s] %s\n URL: %s\n Words: %d | Crawled: %s\n", |
|||
websiteName != null ? websiteName : "Unknown", |
|||
truncate(title, 60), |
|||
truncate(url, 50), |
|||
wordCount, |
|||
formattedDate); |
|||
} |
|||
|
|||
/** |
|||
* Returns a short summary for list commands |
|||
* @return Short summary string |
|||
*/ |
|||
public String toShortString() { |
|||
return String.format("%d. %s (%s) - %d words", |
|||
-1, // Index will be added by the caller
|
|||
truncate(title, 40), |
|||
websiteName != null ? websiteName : "Unknown", |
|||
wordCount); |
|||
} |
|||
|
|||
/** |
|||
* Truncates a string to a maximum length |
|||
* @param str String to truncate |
|||
* @param maxLength Maximum length |
|||
* @return Truncated string with "..." if needed |
|||
*/ |
|||
private String truncate(String str, int maxLength) { |
|||
if (str == null) return "N/A"; |
|||
if (str.length() <= maxLength) return str; |
|||
return str.substring(0, maxLength - 3) + "..."; |
|||
} |
|||
|
|||
// ===== Equals and HashCode (for duplicate detection) =====
|
|||
|
|||
@Override |
|||
public boolean equals(Object obj) { |
|||
if (this == obj) return true; |
|||
if (obj == null || getClass() != obj.getClass()) return false; |
|||
|
|||
Article article = (Article) obj; |
|||
|
|||
// Two articles are considered equal if they have the same URL
|
|||
return url != null ? url.equals(article.url) : article.url == null; |
|||
} |
|||
|
|||
@Override |
|||
public int hashCode() { |
|||
return url != null ? url.hashCode() : 0; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return String.format("Article{title='%s', url='%s', wordCount=%d, crawledAt=%s}", |
|||
truncate(title, 30), truncate(url, 40), wordCount, crawledAt); |
|||
} |
|||
} |
|||
@ -0,0 +1,144 @@ |
|||
package com.abod.crawler.model; |
|||
|
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import java.util.*; |
|||
import java.util.concurrent.ConcurrentHashMap; |
|||
|
|||
/** |
|||
* Repository class for storing and managing Article objects. |
|||
* This acts as the data access layer (part of the Model in MVC). |
|||
*/ |
|||
public class ArticleRepository { |
|||
|
|||
private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class); |
|||
|
|||
// Thread-safe map for storing articles by URL
|
|||
private final Map<String, Article> articles; |
|||
|
|||
/** |
|||
* Default constructor |
|||
*/ |
|||
public ArticleRepository() { |
|||
this.articles = new ConcurrentHashMap<>(); |
|||
logger.info("ArticleRepository initialized"); |
|||
} |
|||
|
|||
/** |
|||
* Save or update an article |
|||
* @param article Article to save |
|||
*/ |
|||
public void save(Article article) { |
|||
if (article == null) { |
|||
logger.warn("Attempted to save null article"); |
|||
return; |
|||
} |
|||
|
|||
if (article.getUrl() == null) { |
|||
logger.warn("Article has no URL, cannot save: {}", article); |
|||
return; |
|||
} |
|||
|
|||
articles.put(article.getUrl(), article); |
|||
logger.debug("Saved article: {}", article.getTitle()); |
|||
} |
|||
|
|||
/** |
|||
* Find an article by its URL |
|||
* @param url Article URL |
|||
* @return Article if found, null otherwise |
|||
*/ |
|||
public Article findByUrl(String url) { |
|||
if (url == null) return null; |
|||
return articles.get(url); |
|||
} |
|||
|
|||
/** |
|||
* Check if an article with given URL already exists |
|||
* @param url URL to check |
|||
* @return true if exists, false otherwise |
|||
*/ |
|||
public boolean exists(String url) { |
|||
return url != null && articles.containsKey(url); |
|||
} |
|||
|
|||
/** |
|||
* Get all articles |
|||
* @return List of all articles (new list, safe to modify) |
|||
*/ |
|||
public List<Article> getAll() { |
|||
return new ArrayList<>(articles.values()); |
|||
} |
|||
|
|||
/** |
|||
* Get articles sorted by crawl time (newest first) |
|||
* @return Sorted list of articles |
|||
*/ |
|||
public List<Article> getNewestFirst() { |
|||
List<Article> list = getAll(); |
|||
list.sort((a1, a2) -> a2.getCrawledAt().compareTo(a1.getCrawledAt())); |
|||
return list; |
|||
} |
|||
|
|||
/** |
|||
* Get articles from a specific website |
|||
* @param websiteName Name of the website |
|||
* @return List of articles from that website |
|||
*/ |
|||
public List<Article> getByWebsite(String websiteName) { |
|||
if (websiteName == null) return Collections.emptyList(); |
|||
|
|||
List<Article> result = new ArrayList<>(); |
|||
for (Article article : articles.values()) { |
|||
if (websiteName.equals(article.getWebsiteName())) { |
|||
result.add(article); |
|||
} |
|||
} |
|||
return result; |
|||
} |
|||
|
|||
/** |
|||
* Get total number of articles |
|||
* @return Article count |
|||
*/ |
|||
public int size() { |
|||
return articles.size(); |
|||
} |
|||
|
|||
/** |
|||
* Clear all articles from repository |
|||
*/ |
|||
public void clear() { |
|||
articles.clear(); |
|||
logger.info("Repository cleared"); |
|||
} |
|||
|
|||
/** |
|||
* Get all URLs (for duplicate checking during crawling) |
|||
* @return Set of all URLs |
|||
*/ |
|||
public Set<String> getAllUrls() { |
|||
return new HashSet<>(articles.keySet()); |
|||
} |
|||
|
|||
/** |
|||
* Save multiple articles at once |
|||
* @param articleList List of articles to save |
|||
* @return Number of articles saved |
|||
*/ |
|||
public int saveAll(List<Article> articleList) { |
|||
if (articleList == null) return 0; |
|||
|
|||
int savedCount = 0; |
|||
for (Article article : articleList) { |
|||
if (article != null) { |
|||
save(article); |
|||
savedCount++; |
|||
} |
|||
} |
|||
|
|||
logger.info("Saved {} articles", savedCount); |
|||
return savedCount; |
|||
} |
|||
} |
|||
@ -0,0 +1,74 @@ |
|||
package com.abod.crawler.util; |
|||
|
|||
import com.fasterxml.jackson.databind.ObjectMapper; |
|||
import com.fasterxml.jackson.databind.SerializationFeature; |
|||
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; |
|||
import com.abod.crawler.model.Article; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import java.io.File; |
|||
import java.io.IOException; |
|||
import java.text.SimpleDateFormat; |
|||
import java.util.ArrayList; |
|||
import java.util.Date; |
|||
import java.util.List; |
|||
|
|||
public class JsonUtil { |
|||
|
|||
private static final Logger logger = LoggerFactory.getLogger(JsonUtil.class); |
|||
private static final ObjectMapper objectMapper; |
|||
|
|||
static { |
|||
objectMapper = new ObjectMapper(); |
|||
objectMapper.enable(SerializationFeature.INDENT_OUTPUT); |
|||
objectMapper.registerModule(new JavaTimeModule()); |
|||
} |
|||
|
|||
public static boolean exportToJson(List<Article> articles, String filePath) { |
|||
if (articles == null || articles.isEmpty()) { |
|||
logger.warn("No articles to export"); |
|||
return false; |
|||
} |
|||
try { |
|||
objectMapper.writeValue(new File(filePath), articles); |
|||
logger.info("Exported {} articles to {}", articles.size(), filePath); |
|||
return true; |
|||
} catch (IOException e) { |
|||
logger.error("Export failed", e); |
|||
return false; |
|||
} |
|||
} |
|||
|
|||
public static List<Article> importFromJson(String filePath) { |
|||
File file = new File(filePath); |
|||
if (!file.exists()) { |
|||
logger.warn("File not found: {}", filePath); |
|||
return new ArrayList<>(); |
|||
} |
|||
try { |
|||
List<Article> articles = objectMapper.readValue(file, |
|||
objectMapper.getTypeFactory().constructCollectionType(List.class, Article.class)); |
|||
logger.info("Imported {} articles from {}", articles.size(), filePath); |
|||
return articles; |
|||
} catch (IOException e) { |
|||
logger.error("Import failed", e); |
|||
return new ArrayList<>(); |
|||
} |
|||
} |
|||
|
|||
public static String exportWithTimestamp(List<Article> articles) { |
|||
if (articles == null || articles.isEmpty()) { |
|||
return null; |
|||
} |
|||
String timestamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()); |
|||
String filename = "export_" + timestamp + ".json"; |
|||
boolean success = exportToJson(articles, filename); |
|||
return success ? filename : null; |
|||
} |
|||
|
|||
// ADD THIS METHOD - it was missing!
|
|||
public static String getWorkingDirectory() { |
|||
return System.getProperty("user.dir"); |
|||
} |
|||
} |
|||
@ -0,0 +1,164 @@ |
|||
package com.abod.crawler.view; |
|||
|
|||
import com.abod.crawler.model.Article; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import java.util.List; |
|||
import java.util.Scanner; |
|||
|
|||
/** |
|||
* View layer - Handles all user input and output |
|||
* In MVC, the View ONLY handles display and user input collection |
|||
* It contains NO business logic |
|||
*/ |
|||
public class ConsoleView { |
|||
|
|||
private static final Logger logger = LoggerFactory.getLogger(ConsoleView.class); |
|||
private final Scanner scanner; |
|||
|
|||
public ConsoleView() { |
|||
this.scanner = new Scanner(System.in); |
|||
logger.debug("ConsoleView initialized"); |
|||
} |
|||
|
|||
/** |
|||
* Display welcome banner |
|||
*/ |
|||
public void showWelcome() { |
|||
System.out.println(); |
|||
System.out.println("╔══════════════════════════════════════════════════════════╗"); |
|||
System.out.println("║ WEB CRAWLER v1.0 ║"); |
|||
System.out.println("║ ║"); |
|||
System.out.println("║ A CLI web crawler with MVC + Command + Strategy ║"); |
|||
System.out.println("╚══════════════════════════════════════════════════════════╝"); |
|||
System.out.println(); |
|||
showHelp(); |
|||
} |
|||
|
|||
/** |
|||
* Display help menu |
|||
*/ |
|||
public void showHelp() { |
|||
System.out.println("📖 Available Commands:"); |
|||
System.out.println(" ┌─────────────────────────────────────────────────────────┐"); |
|||
System.out.println(" │ crawl <url> - Crawl a single URL │"); |
|||
System.out.println(" │ crawl-multiple - Crawl multiple predefined URLs │"); |
|||
System.out.println(" │ list - List all articles │"); |
|||
System.out.println(" │ list <website> - List articles from a specific website │"); |
|||
System.out.println(" │ export - Export all articles to JSON │"); |
|||
System.out.println(" │ import <file> - Import articles from JSON file │"); |
|||
System.out.println(" │ count - Show total article count │"); |
|||
System.out.println(" │ clear - Clear all articles │"); |
|||
System.out.println(" │ help - Show this help menu │"); |
|||
System.out.println(" │ exit - Exit the application │"); |
|||
System.out.println(" └─────────────────────────────────────────────────────────┘"); |
|||
System.out.println(); |
|||
} |
|||
|
|||
/** |
|||
* Display a single article |
|||
*/ |
|||
public void displayArticle(Article article) { |
|||
if (article == null) { |
|||
printError("Article not found"); |
|||
return; |
|||
} |
|||
System.out.println(); |
|||
System.out.println("┌─────────────────────────────────────────────────────────┐"); |
|||
System.out.println("│ 📄 ARTICLE DETAILS │"); |
|||
System.out.println("├─────────────────────────────────────────────────────────┤"); |
|||
System.out.printf ("│ Title: %-45s│%n", truncate(article.getTitle(), 45)); |
|||
System.out.printf ("│ Source: %-44s│%n", truncate(article.getWebsiteName(), 44)); |
|||
System.out.printf ("│ URL: %-48s│%n", truncate(article.getUrl(), 48)); |
|||
System.out.printf ("│ Words: %-46d│%n", article.getWordCount()); |
|||
System.out.printf ("│ Crawled: %-44s│%n", article.getCrawledAt()); |
|||
System.out.println("└─────────────────────────────────────────────────────────┘"); |
|||
System.out.println(); |
|||
} |
|||
|
|||
/** |
|||
* Display list of articles |
|||
*/ |
|||
public void displayArticles(List<Article> articles) { |
|||
if (articles == null || articles.isEmpty()) { |
|||
printInfo("No articles found."); |
|||
return; |
|||
} |
|||
|
|||
System.out.println(); |
|||
System.out.println("┌─────────────────────────────────────────────────────────┐"); |
|||
System.out.println("│ 📚 ARTICLES (" + articles.size() + " total) │"); |
|||
System.out.println("├─────────────────────────────────────────────────────────┤"); |
|||
|
|||
int index = 1; |
|||
for (Article article : articles) { |
|||
System.out.printf("│ %2d. %-50s │%n", index, truncate(article.getTitle(), 50)); |
|||
System.out.printf("│ 📍 %-48s │%n", truncate(article.getUrl(), 48)); |
|||
System.out.printf("│ 🏠 %-10s | 📝 %-4d words | 🕐 %-19s │%n", |
|||
truncate(article.getWebsiteName(), 10), |
|||
article.getWordCount(), |
|||
article.getCrawledAt().toString().substring(0, 19)); |
|||
index++; |
|||
} |
|||
System.out.println("└─────────────────────────────────────────────────────────┘"); |
|||
System.out.println(); |
|||
} |
|||
|
|||
/** |
|||
* Display success message |
|||
*/ |
|||
public void printSuccess(String message) { |
|||
System.out.println("✅ " + message); |
|||
logger.info(message); |
|||
} |
|||
|
|||
/** |
|||
* Display error message |
|||
*/ |
|||
public void printError(String message) { |
|||
System.out.println("❌ " + message); |
|||
logger.error(message); |
|||
} |
|||
|
|||
/** |
|||
* Display info message |
|||
*/ |
|||
public void printInfo(String message) { |
|||
System.out.println("ℹ️ " + message); |
|||
logger.info(message); |
|||
} |
|||
|
|||
/** |
|||
* Display warning message |
|||
*/ |
|||
public void printWarning(String message) { |
|||
System.out.println("⚠️ " + message); |
|||
logger.warn(message); |
|||
} |
|||
|
|||
/** |
|||
* Read user input (for CLI prompt) |
|||
*/ |
|||
public String readCommand() { |
|||
System.out.print("\n🐱 crawler> "); |
|||
return scanner.nextLine().trim(); |
|||
} |
|||
|
|||
/** |
|||
* Wait for user to press Enter (for pause between operations) |
|||
*/ |
|||
public void waitForEnter() { |
|||
System.out.print("Press Enter to continue..."); |
|||
scanner.nextLine(); |
|||
} |
|||
|
|||
/** |
|||
* Helper method to truncate long strings |
|||
*/ |
|||
private String truncate(String str, int maxLength) { |
|||
if (str == null) return "N/A"; |
|||
if (str.length() <= maxLength) return str; |
|||
return str.substring(0, maxLength - 3) + "..."; |
|||
} |
|||
} |
|||
@ -0,0 +1,19 @@ |
|||
<configuration> |
|||
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender"> |
|||
<encoder> |
|||
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern> |
|||
</encoder> |
|||
</appender> |
|||
|
|||
<appender name="FILE" class="ch.qos.logback.core.FileAppender"> |
|||
<file>logs/crawler.log</file> |
|||
<encoder> |
|||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern> |
|||
</encoder> |
|||
</appender> |
|||
|
|||
<root level="INFO"> |
|||
<appender-ref ref="CONSOLE"/> |
|||
<appender-ref ref="FILE"/> |
|||
</root> |
|||
</configuration> |
|||
Loading…
Reference in new issue