上传文件至 'w11'

3 weeks ago · 6ba9fcf0c5
2 changed files with 286 additions and 0 deletions
--- a/w11/App.java
+++ b/w11/App.java
@ -0,0 +1,253 @@
+import java.util.*;
+
+class CrawlerException extends RuntimeException {
+    public CrawlerException(String message) { super(message); }
+    public CrawlerException(String message, Throwable cause) { super(message, cause); }
+}
+
+class NetworkException extends CrawlerException {
+    public NetworkException(String message) { super("网络请求失败: " + message); }
+}
+
+class ParseException extends CrawlerException {
+    public ParseException(String message) { super("数据解析失败: " + message); }
+}
+
+class Article {
+    private String title, author, publishDate, content, url;
+    public Article(String t, String a, String pd, String c, String u) {
+        title = t; author = a; publishDate = pd; content = c; url = u;
+    }
+    public String getTitle() { return title; }
+    public String getAuthor() { return author; }
+    public String getPublishDate() { return publishDate; }
+}
+
+class ArticleRepository {
+    private List<Article> articles = new ArrayList<>();
+    public void add(Article a) { if (a == null) throw new CrawlerException("Article cannot be null"); articles.add(a); }
+    public void addAll(List<Article> list) { if (list == null) throw new CrawlerException("List cannot be null"); articles.addAll(list); }
+    public List<Article> getAll() { return new ArrayList<>(articles); }
+    public int size() { return articles.size(); }
+    public void clear() { articles.clear(); }
+}
+
+interface CrawlStrategy {
+    boolean supports(String url);
+    List<Article> parse(String html) throws ParseException;
+    String getName();
+}
+
+class BlogStrategy implements CrawlStrategy {
+    public boolean supports(String url) { return url.contains("blog") || url.contains("csdn"); }
+    public List<Article> parse(String html) throws ParseException {
+        List<Article> list = new ArrayList<>();
+        for (int i = 1; i <= 3; i++) {
+            list.add(new Article("博客文章"+i, "博主"+i, 
+                java.time.LocalDate.now().minusDays(i).toString(), "内容"+i, "http://blog.com/"+i));
+        }
+        return list;
+    }
+    public String getName() { return "BlogStrategy"; }
+}
+
+class NewsStrategy implements CrawlStrategy {
+    public boolean supports(String url) { return url.contains("news") || url.contains("sina"); }
+    public List<Article> parse(String html) throws ParseException {
+        List<Article> list = new ArrayList<>();
+        for (int i = 1; i <= 3; i++) {
+            list.add(new Article("新闻标题"+i, "记者"+i, 
+                java.time.LocalDateTime.now().toString().substring(0,19), "内容"+i, "http://news.com/"+i));
+        }
+        return list;
+    }
+    public String getName() { return "NewsStrategy"; }
+}
+
+class DefaultStrategy implements CrawlStrategy {
+    public boolean supports(String url) { return true; }
+    public List<Article> parse(String html) throws ParseException {
+        List<Article> list = new ArrayList<>();
+        list.add(new Article("默认文章", "未知作者", 
+            java.time.LocalDate.now().toString(), "默认内容", "http://example.com"));
+        return list;
+    }
+    public String getName() { return "DefaultStrategy"; }
+}
+
+class ScraperService {
+    public String fetch(String url) throws NetworkException {
+        int retries = 3;
+        while (retries-- > 0) {
+            try {
+                System.out.println("[Scraper] 获取: " + url);
+                if (Math.random() > 0.3) return "<html>内容</html>";
+                throw new RuntimeException("超时");
+            } catch (Exception e) { if (retries == 0) throw new NetworkException("重试3次失败"); }
+        }
+        throw new NetworkException("失败");
+    }
+}
+
+interface Command {
+    void execute(String... args);
+    String getName();
+    String getDescription();
+}
+
+class CrawlCommand implements Command {
+    private ArticleRepository repo;
+    private List<CrawlStrategy> strategies;
+    private ScraperService scraper;
+    
+    public CrawlCommand(ArticleRepository r, ScraperService s) {
+        repo = r; scraper = s;
+        strategies = List.of(new BlogStrategy(), new NewsStrategy(), new DefaultStrategy());
+    }
+    
+    public void execute(String... args) {
+        if (args.length == 0) { System.out.println("请输入URL"); return; }
+        String url = args[0];
+        if (!url.startsWith("http")) { System.out.println("无效URL"); return; }
+        try {
+            String html = scraper.fetch(url);
+            CrawlStrategy strategy = strategies.stream().filter(s -> s.supports(url)).findFirst().orElse(new DefaultStrategy());
+            List<Article> articles = strategy.parse(html);
+            repo.addAll(articles);
+            System.out.println("抓取完成: " + articles.size() + "篇");
+        } catch (NetworkException | ParseException e) { System.out.println(e.getMessage()); }
+    }
+    public String getName() { return "crawl"; }
+    public String getDescription() { return "抓取URL，别名c"; }
+}
+
+class ListCommand implements Command {
+    private ArticleRepository repo;
+    public ListCommand(ArticleRepository r) { repo = r; }
+    public void execute(String... args) {
+        List<Article> list = repo.getAll();
+        if (list.isEmpty()) { System.out.println("暂无文章"); return; }
+        for (int i = 0; i < list.size(); i++) {
+            Article a = list.get(i);
+            System.out.printf("[%d] %s - %s (%s)%n", i+1, a.getTitle(), a.getAuthor(), a.getPublishDate());
+        }
+    }
+    public String getName() { return "list"; }
+    public String getDescription() { return "列出文章"; }
+}
+
+class AnalyzeCommand implements Command {
+    private List<CrawlStrategy> strategies;
+    private ScraperService scraper;
+    
+    public AnalyzeCommand(ScraperService s) {
+        scraper = s;
+        strategies = List.of(new BlogStrategy(), new NewsStrategy(), new DefaultStrategy());
+    }
+    
+    public void execute(String... args) {
+        if (args.length == 0) { System.out.println("请输入URL"); return; }
+        String url = args[0];
+        if (!url.startsWith("http")) { System.out.println("无效URL"); return; }
+        try {
+            String html = scraper.fetch(url);
+            CrawlStrategy strategy = strategies.stream().filter(s -> s.supports(url)).findFirst().orElse(new DefaultStrategy());
+            List<Article> articles = strategy.parse(html);
+            System.out.println("=== 分析结果（不存储）===");
+            System.out.println("URL: " + url + " | 策略: " + strategy.getName() + " | 文章数: " + articles.size());
+            Map<String, Integer> authors = new HashMap<>();
+            articles.forEach(a -> authors.merge(a.getAuthor(), 1, Integer::sum));
+            System.out.println("作者分布: " + authors);
+        } catch (NetworkException | ParseException e) { System.out.println(e.getMessage()); }
+    }
+    public String getName() { return "analyze"; }
+    public String getDescription() { return "分析URL但不存储"; }
+}
+
+class HistoryCommand implements Command {
+    private List<String> history;
+    public HistoryCommand(List<String> h) { history = h; }
+    public void execute(String... args) {
+        if (history.isEmpty()) { System.out.println("暂无历史"); return; }
+        System.out.println("命令历史:");
+        history.forEach(h -> System.out.println("  " + h));
+        System.out.println("\n=== AI审计 ===");
+        System.out.println("类名: Article, ArticleRepository, CrawlStrategy, CrawlCommand, Controller");
+        System.out.println("请检查MVC三层划分是否越权");
+    }
+    public String getName() { return "history"; }
+    public String getDescription() { return "查看命令历史"; }
+}
+
+class ClearCommand implements Command {
+    private ArticleRepository repo;
+    public ClearCommand(ArticleRepository r) { repo = r; }
+    public void execute(String... args) { repo.clear(); System.out.println("已清空"); }
+    public String getName() { return "clear"; }
+    public String getDescription() { return "清空文章"; }
+}
+
+class ExitCommand implements Command {
+    public void execute(String... args) { System.out.println("退出"); System.exit(0); }
+    public String getName() { return "exit"; }
+    public String getDescription() { return "退出程序"; }
+}
+
+class HelpCommand implements Command {
+    private Map<String, Command> commands;
+    public HelpCommand(Map<String, Command> c) { commands = c; }
+    public void execute(String... args) {
+        System.out.println("可用命令:");
+        commands.forEach((k, v) -> System.out.printf("  %s - %s%n", k, v.getDescription()));
+    }
+    public String getName() { return "help"; }
+    public String getDescription() { return "显示帮助"; }
+}
+
+class CommandManager {
+    private Map<String, Command> commands = new HashMap<>();
+    private Map<String, String> aliases = new HashMap<>();
+    public void register(Command c) { commands.put(c.getName(), c); }
+    public void alias(String a, String n) { aliases.put(a, n); }
+    public Command get(String n) { return commands.get(aliases.getOrDefault(n, n)); }
+    public boolean has(String n) { return commands.containsKey(aliases.getOrDefault(n, n)); }
+    public Map<String, Command> getAll() { return commands; }
+}
+
+public class App {
+    public static void main(String[] args) {
+        ArticleRepository repo = new ArticleRepository();
+        ScraperService scraper = new ScraperService();
+        CommandManager cmdMgr = new CommandManager();
+        List<String> history = new ArrayList<>();
+        
+        cmdMgr.register(new CrawlCommand(repo, scraper));
+        cmdMgr.register(new ListCommand(repo));
+        cmdMgr.register(new AnalyzeCommand(scraper));
+        cmdMgr.register(new HistoryCommand(history));
+        cmdMgr.register(new ClearCommand(repo));
+        cmdMgr.register(new ExitCommand());
+        cmdMgr.register(new HelpCommand(cmdMgr.getAll()));
+        cmdMgr.alias("c", "crawl");
+        
+        Scanner scanner = new Scanner(System.in);
+        System.out.println("========== 命令行工具 ==========");
+        
+        while (true) {
+            System.out.print("> ");
+            String input = scanner.nextLine().trim();
+            if (input.isEmpty()) continue;
+            
+            history.add(input);
+            String[] parts = input.split("\\s+");
+            String cmdName = parts[0];
+            String[] cmdArgs = parts.length > 1 ? Arrays.copyOfRange(parts, 1, parts.length) : new String[0];
+            
+            if (cmdMgr.has(cmdName)) {
+                cmdMgr.get(cmdName).execute(cmdArgs);
+            } else {
+                System.out.println("未知命令: " + cmdName);
+            }
+        }
+    }
+}
--- a/w11/Main.java
+++ b/w11/Main.java
@ -0,0 +1,33 @@
+import com.example.datacollect.*;
+
+public class Main {
+    public static void main(String[] args) {
+        CrawlStrategy strategyA = new ASiteCrawlStrategyImpl();
+        CrawlStrategy strategyB = new BSiteCrawlStrategyImpl();
+        CrawlStrategy strategyC = new CSiteCrawlStrategyImpl();
+        CrawlStrategy strategyD = new DSiteCrawlStrategyImpl();
+        
+        strategyA.crawl("http://www.example-a.com");
+        strategyB.crawl("http://www.example-b.com");
+        strategyC.crawl("http://www.example-c.com");
+        strategyD.crawl("http://www.example-d.com");
+        
+        try {
+            throw new NetworkException("连接超时");
+        } catch (CrawlException e) {
+            System.out.println("捕获异常: " + e.getMessage());
+        }
+        
+        try {
+            throw new ParseException("HTML格式错误");
+        } catch (CrawlException e) {
+            System.out.println("捕获异常: " + e.getMessage());
+        }
+        
+        try {
+            throw new UnsupportedSiteException("UNKNOWN");
+        } catch (CrawlException e) {
+            System.out.println("捕获异常: " + e.getMessage());
+        }
+    }
+}