From 0a31ea6e19758a6b5c58ca4ebb97c3957a1a81f7 Mon Sep 17 00:00:00 2001 From: WangJunyue <2768762959@qq.com> Date: Thu, 14 May 2026 11:14:58 +0800 Subject: [PATCH] feat:w10 --- w10/w10ai架构审计.docx | Bin 0 -> 17377 bytes w10/w10datacollect/Main.java | 21 +++++ .../command/AnalyzeCommand.java | 76 ++++++++++++++++++ w10/w10datacollect/command/Command.java | 8 ++ w10/w10datacollect/command/CrawlCommand.java | 50 ++++++++++++ w10/w10datacollect/command/ExitCommand.java | 23 ++++++ w10/w10datacollect/command/HelpCommand.java | 22 +++++ w10/w10datacollect/command/ListCommand.java | 22 +++++ .../controller/CrawlerController.java | 48 +++++++++++ w10/w10datacollect/model/Article.java | 45 +++++++++++ .../repository/ArticleRepository.java | 40 +++++++++ w10/w10datacollect/strategy/BlogStrategy.java | 25 ++++++ .../strategy/CrawlStrategy.java | 10 +++ .../strategy/HnuNewsStrategy.java | 49 +++++++++++ w10/w10datacollect/strategy/NewsStrategy.java | 25 ++++++ .../strategy/StrategyFactory.java | 27 +++++++ w10/w10datacollect/view/ConsoleView.java | 42 ++++++++++ 17 files changed, 533 insertions(+) create mode 100644 w10/w10ai架构审计.docx create mode 100644 w10/w10datacollect/Main.java create mode 100644 w10/w10datacollect/command/AnalyzeCommand.java create mode 100644 w10/w10datacollect/command/Command.java create mode 100644 w10/w10datacollect/command/CrawlCommand.java create mode 100644 w10/w10datacollect/command/ExitCommand.java create mode 100644 w10/w10datacollect/command/HelpCommand.java create mode 100644 w10/w10datacollect/command/ListCommand.java create mode 100644 w10/w10datacollect/controller/CrawlerController.java create mode 100644 w10/w10datacollect/model/Article.java create mode 100644 w10/w10datacollect/repository/ArticleRepository.java create mode 100644 w10/w10datacollect/strategy/BlogStrategy.java create mode 100644 w10/w10datacollect/strategy/CrawlStrategy.java create mode 100644 w10/w10datacollect/strategy/HnuNewsStrategy.java create mode 100644 w10/w10datacollect/strategy/NewsStrategy.java create mode 100644 w10/w10datacollect/strategy/StrategyFactory.java create mode 100644 w10/w10datacollect/view/ConsoleView.java diff --git a/w10/w10ai架构审计.docx b/w10/w10ai架构审计.docx new file mode 100644 index 0000000000000000000000000000000000000000..f247f853fcd132a5779cf7a43d85fa3e75a925ee GIT binary patch literal 17377 zcmeIaV|XRo+BQ0aj%}-B+v(W0Z5thSY}>Y-j%{{qCmlQK_++iU_gUEMobUR6zi-W} z>Z+fB_%?001F?G1gYq1PA~q`~(0X10aAk1#NAdjBT8B zmA={;J8ILqSz8h0fdZ4~0DwR8|L^uc_zg59j9K*2BM3hPzWL8IElISJ7xs_jMH^$A zJpqL=%Lv^GzK1qEidlT z_!wc5ZYR)h@Q<*5vtn-Vz?5PIqW|337dN2@C7Y3-;g72J3lK$_7_CCgIfw}_;2n;6 z`K)u{D|vy`l2E+j4+~Bw7aAzOe3ItPMvMqkclzDDqF#bjfL370_G3=pI96?p(i$L4vFTs&PCQQtP?Bu|Vk> z=i;Xv%?v5^9W!iL$V3QT05EU9E#qjWE5^=oM13KsO#BsH`{0qUa>#7=E#>w|b&)jg zZuy44BMzkmWZARLD46%dvfkf80CIn+o_MiX%@-djG9R@M`lEX4Iv86y($W5Q{NHN% zKe#Ub>8n@7^?X!I7`_YtSO1v~`IR1wd>J~!=~awna0pFtDdg29i^aEB?xiJQ?IV5B zu^+SX)9#KLBFZoB;SYWZgLQ*EWk@qQu?)QtK&50a_cnj^kSxBP}TIsIjc*=}0nS4Jx3pF5730-SfMO2SNj> zA#))^>7G4Jw2(C34#N&Gd08(A)f;rTZLu@7`ENctssFAYz>hK>C(r-@GYA1LxMi<(1uLM9 zSdv6y$MN_PgsPO!OgMxHQQ{E9OnfQyJxgF;q~_vY5~^$7RV16p_7M78GWLN^j9r7NOE~hVX1qUrvFG^dB*r*VXqI%CLN&=R;^4i0 z_t51vk1ugGVimM)1kPutP@*fQV}Czdhv4wNz=$qBxA0zylj0g_lct&MF(UlpT!su& zm6xta3y*D|H3~FVmN5?WBK(IIJ`IO-%icGhh6J&PK0TP9jwBxD@+M#w7;~xoFI6(C zm&`GTIDCA$!JM%BJWNX6fu_z-!>+UhDf1M>?~gGXf?8XiVp+3dxQBw*St3wb@J9)p z#xhjwbo4lDrw0QtG<~H@yQ9-P@ao%)0O*E{&UlmAYXZGi5Hy&~18?@B2t6fK!d>Z1 z%-p`jZAF+9X?5}!R8y5bfrDRT^H*-igp3<6rBQln9@T-1&aEjxsfk9G6=uP%MsDLy z)L@HRM18>cUuBM^p_MOUIOwp}P8<22z6^frgt6pVq?2QnJ`=2>5$Tr$X<+hFbiqs- zf7uO5SPf!k5ONO4q1jmI4rg&5FSO#shwgTSB8V}v0)vMBY-Z3B$8s{r zZk}#pyp2$*m*Esyn;?z<>)hxVps>uu@7BJ+eF=}J6HHRLr*yHrMUDCWQca`C!@Sgj z8MJn_=yMH%mve|vkJ=aYJ5!8!4(!<%QjLCUzz>Dr;-Z;=9-IK6oiLEP-}z&&qV0!aFcxdyQoz@# zzla&*E|GAy5kR*06@y(>1K2zXebJ6A^#p;s$TC8}3}tpH1LQDu&*e$wpiWOFvmyDrYIC|ctw~_xE4&vUd%$Lf_}Dj za83LXq|j2GP_(gk4T^cT$1kjRn!ZM?#oXv)0&`i8K7Vy+W?OY1amUFnEGa3m{pl+O zHwX&in6K;dG?Oc}fG{wSk7TMeL-fQTf{L+z@%4!yGb`|q27Z}fUL09a;LJ)PxcT!E zRhzL1>AZX-%E@Jd(Tf`a8AOO?BzfC0!@1S#UtCF0+mRPh;chwHI4VJfsL zXlLxk&EO8jLKzlK1fV;_vpB>cgh@w1gt&NYq_|V@YcCk4(s!SU;rH)e$#T3k%u#oc z**n}-A!pFFxYM%8ijRAM(sV%7*|a9*zr^Q$pA!yOr?5hH$Jz9JUv~~>Z41W3p(rdi zyP$PxGi2iHIy!&oX4(AQ-YqqqfUVec&B35-x_5)@)i93DY8@r53U^!h+(K8a=qV9(C8c~|>$!qkZ*GtcakC$>R9 z`P2x}RPT(iWkOQ}{F@+$cf~u$rd(eQj<=u8XuI3CH~zKo{;w|>?3|vJ35%59vm1XH z2tu|ZJS^-#0cXggo1ECFj!wWm*DpKZ|DG&0cVSy{LpM`RWde3%0UfzF|0pPyPY9f2>N?Y(1Vysk zpXTiZdIiAHP@kkA<2pXoR|XUa)MK6?;VMxhnwQEz${kk@aQ69Z`#HS2R&v7!EHgB? zrChfdo)?`AqoZKVC2mXDz((SgI?~xMnANKCko&qL9S^wn;eCh79A_?}xU2fYam(b@SIRaF}`Se>%t>*n;KN3rta zc}GBIxqzFCm425d$GTgrKJ6#&;@Y_pMXnSY{=`h(o0lO|H%=q6Rp_J`r%Duh-O$=) zFmwjKy8(e}+queXXtDJwXK|_iBUiEX^6ms3kM+R57YvRMm$xE9rrh!*5&i3$IPG{G z8WfhgTojsp6Jv*D7`e5}7(eoS?8+6jdea1VS60%Vi#Qr4=d4R<2iL@C#M1o~*Sm3C z9SW2^x9jJzy5tpOJl-Xjqqyz7Q=`y?A>`0s1~l0u-g2mu)R+w(Az72<(f3qiKFkfB zC;CP)YP-Est0ckRjNwTtc9L%H=+eAGhR{!(v~UUeh`|YOvF2@i!23DEZgX?Zfh>1d zvWWwj&KAg#9fQuv+gJ^}-h-0xDvnb+OIHf=KJ*JS3OqMm_pT!6%HAtPR6IgCMht^8 z#Mk}g@Wi*3RS%s(M;)A5ipkQ0<{l&^EV$h;p$sgCy-)0$5}qn8X13v_wX6sn-#8l# z8XO|B1S=yoQ=YEL6&FS45*JIfh`l5|bk>o>2Z%Nfs!!Mi+a`pe6$1+^2rl!e)itb? zP0gH9C#2$ZzWMtz=44{#FaeKk(O{SW!;gI5#z_=Fe6if$*yE~uxg=K8h_hwT3PVht zlWq+T<4-GN_%6friY_f~J)g5O&$2L^umgeeEU}H4rbc$uxr+~HK=pJ*XVwt%U@CP;l^?&5 zrdhciPI^cn{CRO|i5x->PnRIZth7UB!$s6N9!e*y-lZf47Kb(DTG; z`cX!3a2D5&qjWgy7j)*wI>xq=i*td@ATGcx)995#;jP@}SBomuhzV6Rvc?_KNsWae zJ4S<42+pNi(-Yk7kf}px<_^Ed0QIfo3U4y4#_NVf!c#lZlA5l^)ydA3rBalp`4JAU z<8DNJF-G9GonGH88>FQ83bZgiW>!B}?xds<=Q?$H?Nq{IH@ZCK&)WyROOA!?#x^CZ zUkh-yQAG;rq2&^1Af4~k-W}CwCR0Y7KA+W5peA_N0h^e6s7ex?UJ>W<>xM6*%Cr-X z#vJcZTYA2Qu$|xiO4}IVu%!8Et5%$UeNy-34gBXcz2DN_G87$7s#PeKFqfKkc7t)v_C=+Wa>rIvimPWQK0>vpXQCnFbim8patb1MiJcg~rNttf%k>rh zpFD&5&T!mMA3j492mpWsfB^d4Gx(SL{Ad5*Kdv;;#|+m;?*G|aMf}8XGduwoft~(K z9yDCmf{?L}6uw%T4?sp9uYz}m1=1Q_UcwSc!C2@X1n;}mYy6Mj{=Xf}FQ~Hz8(;vyBmn?` z`jO*zGjcLBwl=2w{mAf}lXc@fa+H-H!Dte?PjV68xSK_{P+%yA5AAtbqIy=NS9vg6i6iswgpoY*Oue~*eG&ms7bq%+9g12(*8#sh)zOk>D7LLj8F-AL=$&KI9h5UDW77GAAURXK z#v#(M+v|kv;t+BnzL+Z}hVio{>&?C#ola>{K^j+{aqer)>uM7)-~% zZs_SAOZ04;fcX+DfhS1wR~7p03JXXz|FWSWv@X6&de3yXrJR&F=c))>TBXWo(P~0P z5jxH7Gm$<^I0rZ1vZOq!c-j3bHOXo1K~cc;q;On8l-A@GN+i(3X&Zdj>uq?wXdcFn zO_Ty@&LXrY{!Z2TrxRUkJ14ySIJ*TFG)?WWsLz8LUKPv3Zr=L!@>qX(Sz29ao zHoXmeT{$=@`{2d>0v%zq+1b>Ap&O*WMgIyH#~BPsAZaxxmJ3UW_{5V?>)Y8RgraW8 zW3_U{8f>Kx(%INziSQ-lQ2+`0(jo(++iTD)+WsW1s_gryidsRemqRD{t__isReD?3 z_Wrg3oW86TC2 z0Btbus&xFr)vLoR-JTAKuoUxxIVGz?JTPDLn5w5&9^AT0_TRM-|TU^6iGN;C%Y z=p?UQZt^Z05;#8S4sW3+eYgP8){Y@14E%`n!6ED(O-|56LC?^LjhPKgy_9S$Y}VUB z$08H)F}GhoP5Y=m@%diehSSk3%~H6Yb|+Ifx$MVHT$`ZV)R!DFeIcy&k!IoxzXc02 z><8wdGM|)4d-oErx8%)6Wi$;OS0^+?|N1FVO1|wUbk00W^g`Te`%Nm61pHpy4o_}5 z9*~C*qo2(Bw7>~f(N~!0M<+1a9Wh?6j1kw62zUeLU~^9%L1KsbSi1u$zU_jx%GP;+ zHu1An+~!s(l;oy`A0SS+3%%+&S0Y~BHeX%Kw`%@GrJ!0}6*S>AJceA_B(6kVj9E2v ziZEBzg_3#&4UMMIn+*Cj*5eZ#8F;D3RZSLypPhe97CEo+HPuB{Yq2Zltd@(Ox{jtj z53VdWvBb7X9C@HVOz@D&1I2TzcFSB&u~e;e{<1r2r;51IXSV4%D-N=nLkq$Z%>1uI znZ0bJZc+z1vL#Z1W(h89wd1g_wv1yL&>Vb3lX~vtnmz)=2XG{i{qD=K%rq^J{EC%V zEF_EfI=o{?(RJ}xPGLh1qOvmqwNjzZw~H)%5FxM@cRW#cZLDfwa?M}@NMhtQ?I z3P0BDzm+J?X7TE*6Y3f$?|JzvC$+LUYI`J}xMU*H`4A+OpvGg)p%_9p<}DU{m>s`) ztFzfT=VzH`=A{S?XTFMKiS-rt%896Rfj#&f4Q)9JX60WCz4AVH0*qszt*(jM=i;V4 z;zJ!5f$Q$P5YMR9sBlzw8tdf^y*8CUVEfW|VZ3aop*KF|53^tOS)C(#E*Foz?01}R zy8C8VNSrwuCMl&>`Il>`b#*_0tU|vCvYc~*QakcuJl*AB-%`E$ZkEjX^XLDtSZGc3 zp2h&x788h+f6!j!SkUWY!-x>kmovWwQhjLEFI+9Zql#RpL63OR_u2wtmkoNjzKvZq zFrE(g`4~CZ?(3qDW; zA*A98Bv6`DT$5t);#6J7VC!(4XdGdI<-P5ab&{=9|mcsUz9>WLK} zCFQRQE^bUGu5PAS#m+2e-pxvP)Agty0fzyQaP{YKN)Hn!yzg#}nUvF5^Ge_pEomyt z6`B^>vd>5~RXaXuiyX9IBR74l)t;wNfq-jM7@*O9l{yhaJ=KU3`Vqfl1 zI>Jm8$mPup8BvA#nJ%OC2zBeKnbbonO21jQ8R`NH;)AM^R4VJ0dndy7FveJUyM#y& z_-(kbyY<*N3mkZaC%2fwSWqy^LD7{M^ze_aM(&acJ$K03+4=}c1|IbJYU8?p`4UCo zEOeL^+Gdi;{iPmtOshk{I7YbWMcm%VLOpkVb$_71VpIeH5BiZZZdpRtjwH0uNf9zNEEm0G7Kmj2^=Db#zngje3IC#RaUnGrBG4@GWpO#Koas zA(@-hpkrUMMUJpXx1oNv&Mm%75=ZXJ*Jy0SWSZUjj)h`7xbewQ+L5inS82xXq6jF8 z>*|0FcIz(dTXk-V>D^u-b#tct#O=$m&zu%**?Q#S{v<4dzODPPWeNvieZGOIrmSN%jg4CMf0saK0>!$-MT6PWknKF3(<1d2-r!1@n;kY}o zx(VZ$%>f=f;3Z-7O}&?3-5=v}FB_mh?Rrh3MVjixm??=`rmj`T)Kb7T&HckluBNy} zas|IJ#?VHb!zr_JOgX8vALnVq~&y_*+ne7oOXNqd^sJ$Pe_e zIQ56xQ|-=&uR}%#4q!Io+HG@^i+$?JsU~d9CRm#eFJR4dN1WGcMzzEG_kl(SH6>^0 zmDEsmARY2Cks9haM)p+L;L9zE(tcJ3MkK7*p*L4}B}6KkySkr4zD!+kJH!Ao9}~5 zCa;K0Uz&-G`o| zW5>f-k{u);rH{XD!A z9n|gY%rUC-XAXz#oEf(iOy|RiBOjDyThWa!FXu});^;;cg9u_s2RfN61fu-7h!dM5<`$_#cB_6$CDdtgIPrQr7$6Y zehkAD@)FKSBM@4*b%qG)zJdXq)4k;rb1)rv&@%SUu_BL|e+3Z1eNfV}$a05#)?~ef zZw5a>l5+SS+~d%c@TWpZT)GZ^L{Q zV}?%HLvK_EnYlxO7`*Xf5G|FFV{q?Ce{m8aN}Y-+3`eTSahI>?&YyifuCMdDE79yO z&}^<~D}~d&CLS}^ToG=od&Gr=2DPVEnx&9Ri2^lUWv_GvXHFRO(}Y~kMAi#NQn$@c z!%bc(tvsv32?D8+e41a%Ef{~{o?+&UAQhdXX)Ul@D;@`}#q|8?7Q+l{KM|E44XQ@- z4ZSxiIM4_bRS+J&Jl%08YD~gmRDL_-7eU3cJ-lbS6bJ%p2SO6f%@+Jdd(UVd_sV_W z2*Z!Bnlb(~!^gskNNl)lz~W9={SJO4#1#)rlL4r5k z@WC`5Q?{}^YV&@!HUi7WBc7Xn31@rc89dzj0zI7in6>*{`c!sABH!M}NEv-XqA!IL zZXOlAmWW>jm85AVr{9Gb-{R_Uue`MDZ${1HZ?ea&RO4JXbL9Osobn5dl zvASK~6dog8qU6^4I<&U2Dmm>I+Bnby=Vwuun33rz{P+3&uHZUm-}Ex~Co(+Y*977@Qthz23xBllS5AVU>Xj5v40#5tBpJb5B+8S&Q{iQ{aZ ztlw^fecX1O#FHiBMiamLoZX|D!9w_Qys{ZUjpjI|>256YDjkCQYpk0o0aBh+C^bgE zjuJUfN{ptIk#IDC{Nps_w-Zkh!^ zuAcb8QSdS1LogQj7v0Cnmwd==Rlg0=e$7u!XVfo@`9MC9-yaS)eyA-7D#`-=D`$a! z#cw1El4buY@81Py&jeiFls7|OCHtQI3b&wtN=IwCNl6K0EKHp*YphVZN0e zv|^g`Quo7eCF@AWdX;=)v`vaZScX1u--6^=wm8btt;iYcIK%h)rVN&adNyk%>m(~k zL)z?-nr7?l{U6)Zl7-}WVgvwSFdqQ;nD+bEbd00ZS1aS+$DXI!6ArsAh@Hih7u`Q7 zINYq&>#SreUB~Vtm=sg-FNWhkd?X`%X+$XT$f0N8QY{AwN=K>vm{G#gc0Nkm`nJSo z2!J2T9w|9%$c!7SrXouc`MqdO*Bh4EC3v>aqYWOot$7+lR>tj9_Sqe9S^&4f$esAb z#QeadJl(KEoDlH=B+t?MOvS@KI`8}PVRLld3vH1)0U+*lhMW6n;@b1d%#{t=)Jdjp z>X#qs&Jcy{Au+n{?8!QZxNTB+X3P{iu_-cI{fmvOkFD+8EWJ54%9qT#m@fz04l?mt z%NIq=Xd&DqCRVYDX3f^qHea!56!3mfBa`t&L$`D2i+`HM2hQPuR?NCGe@kV@TqpWO zGXCswJj<;EgtG;>2kIs-xUj#9+>&SSM4@ zGMMK+QHD0sM@H0ADQ`sx;X%oK$8F#zTHQFc^Y(Ht#TORtrj~c(wcrqSm z>e{P``muy7CE}I%jjP%X_9Dmo1Ytz!2;6wg@SlrJY3(Cs4J&W+fnmlSgnMH`o_IJ> zN(2Myw|m-R=!|z^y2wNvUKFqf-yh;C;i|@cH2v;atex+fB#hpXZ!ZK(4)O`^3x!QG zta%QuH@=s5$bn;7o$&Qp9igvuNx3~7cVnUil%Z67gM8$ba}c9=^86{&}>me+yl>WIG`)b?G{ zu5Wz}q-EPi=9f)N`b``5`GM1UP#cFw+sCa1w>5(jRBmKO$?HkVbcZZ5?qXk@TNjbf zq1_6?-|@MZk7nk^#NEG9Fs7e}aODRId2QklN)(*Gex`X=j7(UwFadl)pkM-%zz z2SywQnkCwu5SBN-gP5EmbCgiT3|TtWeGg7a)kB~@Nrq?O$Y0f=KSO+d+KDtHsZau} zF*n6TfpkDva!4%o^`XdqhlZEiRNYlqJZ8%Zb7&5EL6VoSsHSxh4C+JKk)zkD`rC=V zp54KiFxEtTF>@MOusO4=H`UH;^5(}y*KZfIX6vsm1J`PWZjN&^w!E&cc#dB&3mv%z zPdVl5>oQ|+GtBM86&y0-2B>3@SMO>LI1Hm52lLyuMn$t#2p_Q)%MoH7m#NiVTz_Jv z(b+4zW$h_%e??LYT+}w8>!(+To6dSFH60ui*P+q~oRerUM4}4^vUY8ZZTASM1hwn= zF`f={tj{BKDanzM==3z~S+3$@y98Cu9(d6G>M!xjD%r+08(Nk<`3|CBj+T(=}Mc)+cqRFS%nWHU03mvMvV! z1HN5|{j=rhQ~p$)F~&ts5%}FqS%;qIu!>WlZ#$NpkA>M_Sz$!%-*23ninEG>QI%jx_((^oG zu1>a%7PnfnPq8wp<9VXRlNe4&luWsGi#2y1zigflWDo8#$QF++WrIla+2TtUO9&YeY`3EA_E6f9yEq^wpqFU4lb^6YumW zpoLeROyKcZgl@nMs;N$!kF4rf7eM30u7R}0Lc z`}Nv*CPLykj_2k!*7MusV|N~ltf{4B{G4ptjHHB_SL}JE`x&QAc{+IL(Z*}GofabH z=2M5K?n&Ug>MX0ay%G1U8cCm?)0PiF79Kqd@Iy*jJorC1WhBD}!Yf9Aokc1ORbwVc zuQOv*iy&Dhfz!}zGl-_ssOImnjP)fYU8acA&?L$%Qm5ZurWCL>`r0j_@G3`|LU#5Q zk(WhS!mpx;B^ql(;IRtNq$&B$(kG}w6L6g|8#ddL;kEII2)m#}WSo*$IvWR@AuU-x zC5>d^#xu`fm9>%aZK(SHR7t%lC!sqyTcl-g^eH(>#b77Fgtf2-JEPO8G{$Vh*DS{7 z5;|!&3^vRBc8XBR7(0)X827>F7>}Fh99JipQ;&LzXtxZtvRjAR;5wp$FhHW|jPYrS zPy-(P6u~T40HQ&TD+1*dfjk%3kt|mLr9qAmRHFm|m_~^m&5Xp|s)(I^A>pp_%hxgh>eFbH2HN_tl$N+Et|RDqm8D8gw0 z8ax&!bjpUIk0RS3Q1enuv5WnBc`VCt~i{^sxe~n`vDDD(8Z=2|h z&c18dsNH<~k!Uk#c_VLwY%z^jW|fFr{#p-Vnz0Gm2#*J+)G-4WAZ;)>kJ7kb*BV-G z6UwLk%hP$|B_KL<C|QV063e`p`dMkg7_P+6aOS{ zEait};D91KW(2r12x)!D@07C)C?c9aP9dz9>pwRX&P09NnT7HvTYw8RS%TXaVM)MD zvT)5^L99^{1SKm2ir|_@7)`Z+_(+k8P%9$@PO^B|vl6w-!|HBPgw4$;4U)4_>Z^j0 zWD)9Cfc-$Zp2k$tpDPKHo0btKV9(px1s7rw>6HdSe1QFs=K+J6A7oN20S@_CN+V{! z6NY^I$iglUtGD*SHmM|7LRvOYJ&H9?9t7*SMlep_?>rntNXvm81UDb2f2Kx*2@y4q zv{$x=qe_!nu#C7;XDVtYnU4!JP>c#DTY(ETQG+`gX`$7qB-+>}clJ@R{d`>F$Mu{T zOO=L20hTJwa_OM5q_fg)Ss`cHdVD=@Pw4+=vi-aQ?mRA+5!J_=Xt5}b{m5uk{6R4< zA6cGsT=H)o5~}|s{$5C(;-3>g3i+=m=s+MH@1x2Ybc{Rj?RpsCAz7v-Ti2EOMJjGS zxZBJO^^$e$IH!KTQAOiJQaf)EVDPT>OR5uS-A#*vasyBfG#@nuiX=~^HX;J*zD?g@ zmuOMmsZjpq1hvCPz2A0!ca0Stl?uP+Tb{9bmPT4_31@Og&vMVvmUwOQmm_Qmbh-hcdDcf^dSRwJ z>&D(?p-%0H(Yf5L-XG9exJN;kJFn7Hn+qP+ePND!=|QN6d(jK1hYQrIRFfkUrt@Kr zxpN-#RpHy)V#1*->ifXkhKZ0W>HB9Cz&}s(_|$0RR83?YeOnFEYn^Z?1@0>m^Xg+1 z15RzjViKXkUl*8v`5KDR#WynqjS5{Fh7)@AS~4gd4^RYwEnxg;c4i-#-Woy>ln~dU$Ao1XvXyc`t zguK4*_NG{*8Tc$()Mn+6+&Iu$={$G*V>WcDr$$v3_^AD^=ZevR3Y82UhVNq6=3ni|B%&)4j-4)q%sJ59 z*YA+Z6-h4h;iH#U(b5# zc=X;^2Aml}&MNz?e%X2d)&=KiZvBX@_K2P6t}!e4Nyuy60j#)ES5&1ds>XXho^SQ$q~Mp2T?FLA$kv!J6ZX}Jem`8^6ik8t;Co^gdGaeRlAsS;1V5O;mRRj|#Wt?1%;WWam--JZB8f9rX3-&^sJ$18A`*3D# zv$4X}wv|UL&UWG48>8Hw+<`$07mYaRzSLlBlqtOavU6Udfb~~9I?;b}8G`|?w)cK` z_TdOy&@fBbzJg5HhK~>pZ{KDa-x0pRZLr2AKR?lrGSs!&FfF(@BQYy-F?Q~|-p+bi z3?kfghVxy-?0SC4_WrfIIU`Y<#8Ft!Mrm9QS0_Z+?p~O_mi@KRB6+o&avD1fbJ5|7{@k3vosNkbNOwB&Va$tE5^Zz&0IF_4zCEN{vDyqlq` zBHZ-N{w164d80&U4`(PTS-!pm53Wu?U;w?C*%q*rk zrboD+Fd+x`=S_3^&XHm?C4`w41(oxH`DQZ~4SDswEiJWZ6;W9x^T}~Dhfn4sG@R5g z^wdR_CAk7mfVWixScDL7@kqs{{LSD`a_v(F=@ zr)<<_T;`n)Wln|+Y8|MLXnmzXE6Zj-3K@bfim`1YybEHw;}jY;^tN){=N@g3~lWz)#3L!|*x$^|6JcYt8O} z2cfVOMUj^i)+-uxY^i%4SX5Q;qcrxAOV!ReqKF?3O+ko9t zT?|DTKfYZ*y8>BbaZsfL1$(0{c^Z(Yxz{r8p19=U=}*RUR{rcTid};xhUAP>6r^lO z)$E9ackn3sdR4Rf&8UNF`)$|{thp$qh;lazR(N~iSlKY;AR66YA%is@6`h2TD| z=T9fHdjP{c1MR>O3>eKuGgM&$-C8-CEkwtkvol-Tdt1}f@HHI=hJa77pH8^g);gKg z<>6PU=Xc)=gQ1F{Klf+ceI}(bNc>p0duENljb~j0YF?`%9Ew+!Bt+GqC*W6ck5S74 zS=Cj;_iz-hv$roVq}>}w>s}G=aO}7fB7n4P34)SM6OMZsg1+2Pw~7hgE;a z(L>}~Z_4iKAch+-gNYE^5H;_D zHk^wBNzin%fP~Pro16msCsAVU<*)sVm7bUprZ+QI-gg}ZFSkEL0Tujhd@u1vbGYX_b ze|EdjZa#Xwtj?Il7j9rRAbyXKpC%jz8F8T9;}&V2t@R1wHZ0v^o*?Nrg9eL?n~e`C zjoHVla<1aj1R8Xd_z@sAV5VOQ#lUI>gvo*{Dz=!h&zZka=pKb?EXfJJMq(7gRFj}6 zX;1O9Tm)vruYZ;x<5h_T6P)4B25=tBdz5sga{4O%RW^#J}fD}mMcbPA`I$hbD z?bXo_P|L}ZeaSU64ik~sHpXzfdMRk5t)>-)nx90A=axb}7E=Mmqq)H_2l6Yw8S+l|Pys5tXy~d)fp~&Z$g9U1>L^0S@Q8PlZk8^v>P605|E+-RhP^fmz%T8DuooU9`K4_Mir`wJcD*`IlP@@aIPESh!1 z;^ZdXo8PE=lH`;cx=Zvyw~};7Qr>{m8ez+-vo-@adJeZeuCEO=ra9E^17~)2ySD#O z>H-1Ne2m)u`M(VQ_3Qj`{D*%wkdyo?!C#}T|9}Dj*&kY{KgC}E9r*V!)IXqIAKKyn z3QGMu{I6Q|KfnOM@`rHX|4+I8cc#C~mH*%(_)stZqj>r66n__Z{XxNi^KXhj%f0>% z|GVnp54a}bzu|w;K>Qv5_Z7ZB@YSUMDgOUm@B2IW@1yxYzyP*?ga0"); + return; + } + String url = args[1]; + + try { + CrawlStrategy strategy = strategyFactory.getStrategy(url); + if (strategy == null) { + view.printError("No strategy found for: " + url); + return; + } + Document doc = Jsoup.connect(url) + .userAgent("Mozilla/5.0") + .timeout(5000) + .get(); + + List
articles = strategy.parse(doc); + + + + int total = articles.size(); + int totalTitleLen = 0; + int totalContentLen = 0; + + for (Article a : articles) { + totalTitleLen += a.getTitle() == null ? 0 : a.getTitle().length(); + totalContentLen += a.getContent() == null ? 0 : a.getContent().length(); + } + + + view.printInfo("===== 分析统计结果 ====="); + view.printInfo("文章总数:" + total + " 篇"); + view.printInfo("标题总长度:" + totalTitleLen); + view.printInfo("内容总长度:" + totalContentLen); + if (total > 0) { + view.printInfo("平均标题长度:" + (totalTitleLen / total)); + view.printInfo("平均内容长度:" + (totalContentLen / total)); + } + view.printInfo("======================"); + view.printSuccess("分析完成(数据未保存)"); + + } catch (Exception e) { + view.printError("分析失败:" + e.getMessage()); + } + } +} \ No newline at end of file diff --git a/w10/w10datacollect/command/Command.java b/w10/w10datacollect/command/Command.java new file mode 100644 index 0000000..029cadc --- /dev/null +++ b/w10/w10datacollect/command/Command.java @@ -0,0 +1,8 @@ +package com.example.datacollect.command; + +import com.example.datacollect.repository.ArticleRepository; + +public interface Command { + String getName(); + void execute(String[] args, ArticleRepository repository); +} diff --git a/w10/w10datacollect/command/CrawlCommand.java b/w10/w10datacollect/command/CrawlCommand.java new file mode 100644 index 0000000..0841d57 --- /dev/null +++ b/w10/w10datacollect/command/CrawlCommand.java @@ -0,0 +1,50 @@ +package com.example.datacollect.command; + +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.strategy.CrawlStrategy; +import com.example.datacollect.strategy.StrategyFactory; +import com.example.datacollect.view.ConsoleView; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; + +public class CrawlCommand implements Command { + private final ConsoleView view; + private final StrategyFactory strategyFactory; + + public CrawlCommand(ConsoleView view, StrategyFactory strategyFactory) { + this.view = view; + this.strategyFactory = strategyFactory; + } + + @Override + public String getName() { + return "crawl"; + } + + @Override + public void execute(String[] args, ArticleRepository repository) { + if (args.length < 2) { + view.printError("Usage: crawl "); + return; + } + String url = args[1]; + + CrawlStrategy strategy = strategyFactory.getStrategy(url); + if (strategy == null) { + view.printError("No strategy found for: " + url); + return; + } + + try { + view.printInfo("Crawling: " + url); + Document doc = Jsoup.connect(url).get(); + var articles = strategy.parse(url, doc); + for (var article : articles) { + repository.add(article); + } + view.printSuccess("Crawled " + articles.size() + " articles."); + } catch (Exception e) { + view.printError("Failed to crawl: " + e.getMessage()); + } + } +} diff --git a/w10/w10datacollect/command/ExitCommand.java b/w10/w10datacollect/command/ExitCommand.java new file mode 100644 index 0000000..eafcd1d --- /dev/null +++ b/w10/w10datacollect/command/ExitCommand.java @@ -0,0 +1,23 @@ +package com.example.datacollect.command; + +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.view.ConsoleView; + +public class ExitCommand implements Command { + private final ConsoleView view; + + public ExitCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "exit"; + } + + @Override + public void execute(String[] args, ArticleRepository repository) { + view.printSuccess("Bye!"); + System.exit(0); + } +} diff --git a/w10/w10datacollect/command/HelpCommand.java b/w10/w10datacollect/command/HelpCommand.java new file mode 100644 index 0000000..cef04cc --- /dev/null +++ b/w10/w10datacollect/command/HelpCommand.java @@ -0,0 +1,22 @@ +package com.example.datacollect.command; + +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.view.ConsoleView; + +public class HelpCommand implements Command { + private final ConsoleView view; + + public HelpCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "help"; + } + + @Override + public void execute(String[] args, ArticleRepository repository) { + view.printInfo("Commands: crawl , list, help, exit,analyze"); + } +} diff --git a/w10/w10datacollect/command/ListCommand.java b/w10/w10datacollect/command/ListCommand.java new file mode 100644 index 0000000..8147be8 --- /dev/null +++ b/w10/w10datacollect/command/ListCommand.java @@ -0,0 +1,22 @@ +package com.example.datacollect.command; + +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.view.ConsoleView; + +public class ListCommand implements Command { + private final ConsoleView view; + + public ListCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "list"; + } + + @Override + public void execute(String[] args, ArticleRepository repository) { + view.display(repository.getAll()); + } +} diff --git a/w10/w10datacollect/controller/CrawlerController.java b/w10/w10datacollect/controller/CrawlerController.java new file mode 100644 index 0000000..2067cdb --- /dev/null +++ b/w10/w10datacollect/controller/CrawlerController.java @@ -0,0 +1,48 @@ +package com.example.datacollect.controller; + +import com.example.datacollect.command.Command; +import com.example.datacollect.command.CrawlCommand; +import com.example.datacollect.command.ExitCommand; +import com.example.datacollect.command.HelpCommand; +import com.example.datacollect.command.ListCommand; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.strategy.StrategyFactory; +import com.example.datacollect.view.ConsoleView; +import java.util.HashMap; +import java.util.Map; + +public class CrawlerController { + private final Map commands = new HashMap<>(); + private final ConsoleView view; + private final ArticleRepository repository; + + public CrawlerController(ConsoleView view, ArticleRepository repository, StrategyFactory strategyFactory) { + this.view = view; + this.repository = repository; + register(new HelpCommand(view)); + register(new ListCommand(view)); + register(new CrawlCommand(view, strategyFactory)); + register(new ExitCommand(view)); + register(new AnalyzeCommand(view,strategyFactory)); + } + + private void register(Command command) { + commands.put(command.getName(), command); + } + + public void handle(String input) { + String text = input == null ? "" : input.trim(); + if (text.isEmpty()) { + return; + } + + String[] args = text.split("\\s+"); + String cmdName = args[0].toLowerCase(); + Command command = commands.get(cmdName); + if (command == null) { + view.printError("Unknown command: " + cmdName); + return; + } + command.execute(args, repository); + } +} diff --git a/w10/w10datacollect/model/Article.java b/w10/w10datacollect/model/Article.java new file mode 100644 index 0000000..147dbe6 --- /dev/null +++ b/w10/w10datacollect/model/Article.java @@ -0,0 +1,45 @@ +package com.example.datacollect.model; + +public class Article { + private String title; + private String url; + private String content; + + public Article(String title, String url, String content) { + this.title = title; + this.url = url; + this.content = content; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public String getContent() { + return content; + } + + public void setContent(String content) { + this.content = content; + } + + @Override + public String toString() { + return "Article{" + + "title='" + title + '\'' + + ", url='" + url + '\'' + + '}'; + } +} diff --git a/w10/w10datacollect/repository/ArticleRepository.java b/w10/w10datacollect/repository/ArticleRepository.java new file mode 100644 index 0000000..698d907 --- /dev/null +++ b/w10/w10datacollect/repository/ArticleRepository.java @@ -0,0 +1,40 @@ +package com.example.datacollect.repository; + +import com.example.datacollect.model.Article; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class ArticleRepository { + private final List
articles = new ArrayList<>(); + + public void add(Article article) { + if (article == null) { + throw new IllegalArgumentException("Article cannot be null"); + } + articles.add(article); + } + + public void addALL(List
articleList){ + if (articleList ==null){ + throw new IllegalArgumentException("List cannot be null"); + } + for (Article article : articleList){ + if (article !=null){ + articles.add(article); + } + } + } + + public List
getAll() { + return Collections.unmodifiableList(articles); + } + + public int size() { + return articles.size(); + } + + public void clear() { + articles.clear(); + } +} diff --git a/w10/w10datacollect/strategy/BlogStrategy.java b/w10/w10datacollect/strategy/BlogStrategy.java new file mode 100644 index 0000000..1e23b2b --- /dev/null +++ b/w10/w10datacollect/strategy/BlogStrategy.java @@ -0,0 +1,25 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.model.Article; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import java.util.ArrayList; +import java.util.List; + +public class BlogStrategy implements CrawlStrategy { + @Override + public boolean supports(String url) { + return url.contains("blog.example.com"); + } + + @Override + public List
parse(String url, Document doc) { + List
articles = new ArrayList<>(); + Elements titles = doc.select(".post-title"); + for (Element e : titles) { + articles.add(new Article(e.text(), url, "")); + } + return articles; + } +} diff --git a/w10/w10datacollect/strategy/CrawlStrategy.java b/w10/w10datacollect/strategy/CrawlStrategy.java new file mode 100644 index 0000000..8b3cbe0 --- /dev/null +++ b/w10/w10datacollect/strategy/CrawlStrategy.java @@ -0,0 +1,10 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.model.Article; +import org.jsoup.nodes.Document; +import java.util.List; + +public interface CrawlStrategy { + List
parse(String url, Document doc); + boolean supports(String url); +} diff --git a/w10/w10datacollect/strategy/HnuNewsStrategy.java b/w10/w10datacollect/strategy/HnuNewsStrategy.java new file mode 100644 index 0000000..5ad3866 --- /dev/null +++ b/w10/w10datacollect/strategy/HnuNewsStrategy.java @@ -0,0 +1,49 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.model.Article; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import java.util.ArrayList; +import java.util.List; + +public class HnuNewsStrategy implements CrawlStrategy { + @Override + public boolean supports(String url) { + return url.contains("news.hnu.edu.cn"); + } + + @Override + public List
parse(String url, Document doc) { + List
articles = new ArrayList<>(); + Elements listItems = doc.select("ul.list11 li"); + + for (Element li : listItems) { + Element link = li.selectFirst("a"); + if (link == null) continue; + + String articleUrl = link.attr("href"); + if (!articleUrl.startsWith("http")) { + articleUrl = "https://news.hnu.edu.cn" + articleUrl.replace("..", ""); + } + + String title = ""; + Element titleEl = link.selectFirst("h4.l2.h4s2"); + if (titleEl != null) { + title = titleEl.text().trim(); + } + + String content = ""; + Element contentEl = link.selectFirst("p.l3.ps3"); + if (contentEl != null) { + content = contentEl.text().trim(); + } + + if (!title.isEmpty()) { + articles.add(new Article(title, articleUrl, content)); + } + } + + return articles; + } +} diff --git a/w10/w10datacollect/strategy/NewsStrategy.java b/w10/w10datacollect/strategy/NewsStrategy.java new file mode 100644 index 0000000..f6eb4bd --- /dev/null +++ b/w10/w10datacollect/strategy/NewsStrategy.java @@ -0,0 +1,25 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.model.Article; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import java.util.ArrayList; +import java.util.List; + +public class NewsStrategy implements CrawlStrategy { + @Override + public boolean supports(String url) { + return url.contains("news.example.com"); + } + + @Override + public List
parse(String url, Document doc) { + List
articles = new ArrayList<>(); + Elements items = doc.select(".article-headline"); + for (Element e : items) { + articles.add(new Article(e.text(), url, "")); + } + return articles; + } +} diff --git a/w10/w10datacollect/strategy/StrategyFactory.java b/w10/w10datacollect/strategy/StrategyFactory.java new file mode 100644 index 0000000..b66c696 --- /dev/null +++ b/w10/w10datacollect/strategy/StrategyFactory.java @@ -0,0 +1,27 @@ +package com.example.datacollect.strategy; + +import java.util.ArrayList; +import java.util.List; + +public class StrategyFactory { + private final List strategies = new ArrayList<>(); + + public StrategyFactory() { + strategies.add(new HnuNewsStrategy()); + strategies.add(new BlogStrategy()); + strategies.add(new NewsStrategy()); + } + + public CrawlStrategy getStrategy(String url) { + for (CrawlStrategy s : strategies) { + if (s.supports(url)) { + return s; + } + } + return null; + } + + public void register(CrawlStrategy strategy) { + strategies.add(strategy); + } +} diff --git a/w10/w10datacollect/view/ConsoleView.java b/w10/w10datacollect/view/ConsoleView.java new file mode 100644 index 0000000..3c1d47a --- /dev/null +++ b/w10/w10datacollect/view/ConsoleView.java @@ -0,0 +1,42 @@ +package com.example.datacollect.view; + +import com.example.datacollect.model.Article; +import java.util.List; +import java.util.Scanner; + +public class ConsoleView { + private static final String ANSI_RESET = "\u001B[0m"; + private static final String ANSI_GREEN = "\u001B[32m"; + private static final String ANSI_RED = "\u001B[31m"; + private static final String ANSI_BLUE = "\u001B[34m"; + + private final Scanner scanner = new Scanner(System.in); + + public String readLine() { + System.out.print("> "); + return scanner.nextLine(); + } + + public void printSuccess(String msg) { + System.out.println(ANSI_GREEN + msg + ANSI_RESET); + } + + public void printError(String msg) { + System.out.println(ANSI_RED + msg + ANSI_RESET); + } + + public void printInfo(String msg) { + System.out.println(ANSI_BLUE + msg + ANSI_RESET); + } + + public void display(List
articles) { + if (articles.isEmpty()) { + printInfo("暂无文章,请先执行 crawl。"); + return; + } + for (int i = 0; i < articles.size(); i++) { + Article a = articles.get(i); + System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl()); + } + } +}