Browse Source

上传文件至 'w11'

main
JiangYouhan 3 weeks ago
parent
commit
6ba9fcf0c5
  1. 253
      w11/App.java
  2. 33
      w11/Main.java

253
w11/App.java

@ -0,0 +1,253 @@
import java.util.*;
class CrawlerException extends RuntimeException {
public CrawlerException(String message) { super(message); }
public CrawlerException(String message, Throwable cause) { super(message, cause); }
}
class NetworkException extends CrawlerException {
public NetworkException(String message) { super("网络请求失败: " + message); }
}
class ParseException extends CrawlerException {
public ParseException(String message) { super("数据解析失败: " + message); }
}
class Article {
private String title, author, publishDate, content, url;
public Article(String t, String a, String pd, String c, String u) {
title = t; author = a; publishDate = pd; content = c; url = u;
}
public String getTitle() { return title; }
public String getAuthor() { return author; }
public String getPublishDate() { return publishDate; }
}
class ArticleRepository {
private List<Article> articles = new ArrayList<>();
public void add(Article a) { if (a == null) throw new CrawlerException("Article cannot be null"); articles.add(a); }
public void addAll(List<Article> list) { if (list == null) throw new CrawlerException("List cannot be null"); articles.addAll(list); }
public List<Article> getAll() { return new ArrayList<>(articles); }
public int size() { return articles.size(); }
public void clear() { articles.clear(); }
}
interface CrawlStrategy {
boolean supports(String url);
List<Article> parse(String html) throws ParseException;
String getName();
}
class BlogStrategy implements CrawlStrategy {
public boolean supports(String url) { return url.contains("blog") || url.contains("csdn"); }
public List<Article> parse(String html) throws ParseException {
List<Article> list = new ArrayList<>();
for (int i = 1; i <= 3; i++) {
list.add(new Article("博客文章"+i, "博主"+i,
java.time.LocalDate.now().minusDays(i).toString(), "内容"+i, "http://blog.com/"+i));
}
return list;
}
public String getName() { return "BlogStrategy"; }
}
class NewsStrategy implements CrawlStrategy {
public boolean supports(String url) { return url.contains("news") || url.contains("sina"); }
public List<Article> parse(String html) throws ParseException {
List<Article> list = new ArrayList<>();
for (int i = 1; i <= 3; i++) {
list.add(new Article("新闻标题"+i, "记者"+i,
java.time.LocalDateTime.now().toString().substring(0,19), "内容"+i, "http://news.com/"+i));
}
return list;
}
public String getName() { return "NewsStrategy"; }
}
class DefaultStrategy implements CrawlStrategy {
public boolean supports(String url) { return true; }
public List<Article> parse(String html) throws ParseException {
List<Article> list = new ArrayList<>();
list.add(new Article("默认文章", "未知作者",
java.time.LocalDate.now().toString(), "默认内容", "http://example.com"));
return list;
}
public String getName() { return "DefaultStrategy"; }
}
class ScraperService {
public String fetch(String url) throws NetworkException {
int retries = 3;
while (retries-- > 0) {
try {
System.out.println("[Scraper] 获取: " + url);
if (Math.random() > 0.3) return "<html>内容</html>";
throw new RuntimeException("超时");
} catch (Exception e) { if (retries == 0) throw new NetworkException("重试3次失败"); }
}
throw new NetworkException("失败");
}
}
interface Command {
void execute(String... args);
String getName();
String getDescription();
}
class CrawlCommand implements Command {
private ArticleRepository repo;
private List<CrawlStrategy> strategies;
private ScraperService scraper;
public CrawlCommand(ArticleRepository r, ScraperService s) {
repo = r; scraper = s;
strategies = List.of(new BlogStrategy(), new NewsStrategy(), new DefaultStrategy());
}
public void execute(String... args) {
if (args.length == 0) { System.out.println("请输入URL"); return; }
String url = args[0];
if (!url.startsWith("http")) { System.out.println("无效URL"); return; }
try {
String html = scraper.fetch(url);
CrawlStrategy strategy = strategies.stream().filter(s -> s.supports(url)).findFirst().orElse(new DefaultStrategy());
List<Article> articles = strategy.parse(html);
repo.addAll(articles);
System.out.println("抓取完成: " + articles.size() + "篇");
} catch (NetworkException | ParseException e) { System.out.println(e.getMessage()); }
}
public String getName() { return "crawl"; }
public String getDescription() { return "抓取URL,别名c"; }
}
class ListCommand implements Command {
private ArticleRepository repo;
public ListCommand(ArticleRepository r) { repo = r; }
public void execute(String... args) {
List<Article> list = repo.getAll();
if (list.isEmpty()) { System.out.println("暂无文章"); return; }
for (int i = 0; i < list.size(); i++) {
Article a = list.get(i);
System.out.printf("[%d] %s - %s (%s)%n", i+1, a.getTitle(), a.getAuthor(), a.getPublishDate());
}
}
public String getName() { return "list"; }
public String getDescription() { return "列出文章"; }
}
class AnalyzeCommand implements Command {
private List<CrawlStrategy> strategies;
private ScraperService scraper;
public AnalyzeCommand(ScraperService s) {
scraper = s;
strategies = List.of(new BlogStrategy(), new NewsStrategy(), new DefaultStrategy());
}
public void execute(String... args) {
if (args.length == 0) { System.out.println("请输入URL"); return; }
String url = args[0];
if (!url.startsWith("http")) { System.out.println("无效URL"); return; }
try {
String html = scraper.fetch(url);
CrawlStrategy strategy = strategies.stream().filter(s -> s.supports(url)).findFirst().orElse(new DefaultStrategy());
List<Article> articles = strategy.parse(html);
System.out.println("=== 分析结果(不存储)===");
System.out.println("URL: " + url + " | 策略: " + strategy.getName() + " | 文章数: " + articles.size());
Map<String, Integer> authors = new HashMap<>();
articles.forEach(a -> authors.merge(a.getAuthor(), 1, Integer::sum));
System.out.println("作者分布: " + authors);
} catch (NetworkException | ParseException e) { System.out.println(e.getMessage()); }
}
public String getName() { return "analyze"; }
public String getDescription() { return "分析URL但不存储"; }
}
class HistoryCommand implements Command {
private List<String> history;
public HistoryCommand(List<String> h) { history = h; }
public void execute(String... args) {
if (history.isEmpty()) { System.out.println("暂无历史"); return; }
System.out.println("命令历史:");
history.forEach(h -> System.out.println(" " + h));
System.out.println("\n=== AI审计 ===");
System.out.println("类名: Article, ArticleRepository, CrawlStrategy, CrawlCommand, Controller");
System.out.println("请检查MVC三层划分是否越权");
}
public String getName() { return "history"; }
public String getDescription() { return "查看命令历史"; }
}
class ClearCommand implements Command {
private ArticleRepository repo;
public ClearCommand(ArticleRepository r) { repo = r; }
public void execute(String... args) { repo.clear(); System.out.println("已清空"); }
public String getName() { return "clear"; }
public String getDescription() { return "清空文章"; }
}
class ExitCommand implements Command {
public void execute(String... args) { System.out.println("退出"); System.exit(0); }
public String getName() { return "exit"; }
public String getDescription() { return "退出程序"; }
}
class HelpCommand implements Command {
private Map<String, Command> commands;
public HelpCommand(Map<String, Command> c) { commands = c; }
public void execute(String... args) {
System.out.println("可用命令:");
commands.forEach((k, v) -> System.out.printf(" %s - %s%n", k, v.getDescription()));
}
public String getName() { return "help"; }
public String getDescription() { return "显示帮助"; }
}
class CommandManager {
private Map<String, Command> commands = new HashMap<>();
private Map<String, String> aliases = new HashMap<>();
public void register(Command c) { commands.put(c.getName(), c); }
public void alias(String a, String n) { aliases.put(a, n); }
public Command get(String n) { return commands.get(aliases.getOrDefault(n, n)); }
public boolean has(String n) { return commands.containsKey(aliases.getOrDefault(n, n)); }
public Map<String, Command> getAll() { return commands; }
}
public class App {
public static void main(String[] args) {
ArticleRepository repo = new ArticleRepository();
ScraperService scraper = new ScraperService();
CommandManager cmdMgr = new CommandManager();
List<String> history = new ArrayList<>();
cmdMgr.register(new CrawlCommand(repo, scraper));
cmdMgr.register(new ListCommand(repo));
cmdMgr.register(new AnalyzeCommand(scraper));
cmdMgr.register(new HistoryCommand(history));
cmdMgr.register(new ClearCommand(repo));
cmdMgr.register(new ExitCommand());
cmdMgr.register(new HelpCommand(cmdMgr.getAll()));
cmdMgr.alias("c", "crawl");
Scanner scanner = new Scanner(System.in);
System.out.println("========== 命令行工具 ==========");
while (true) {
System.out.print("> ");
String input = scanner.nextLine().trim();
if (input.isEmpty()) continue;
history.add(input);
String[] parts = input.split("\\s+");
String cmdName = parts[0];
String[] cmdArgs = parts.length > 1 ? Arrays.copyOfRange(parts, 1, parts.length) : new String[0];
if (cmdMgr.has(cmdName)) {
cmdMgr.get(cmdName).execute(cmdArgs);
} else {
System.out.println("未知命令: " + cmdName);
}
}
}
}

33
w11/Main.java

@ -0,0 +1,33 @@
import com.example.datacollect.*;
public class Main {
public static void main(String[] args) {
CrawlStrategy strategyA = new ASiteCrawlStrategyImpl();
CrawlStrategy strategyB = new BSiteCrawlStrategyImpl();
CrawlStrategy strategyC = new CSiteCrawlStrategyImpl();
CrawlStrategy strategyD = new DSiteCrawlStrategyImpl();
strategyA.crawl("http://www.example-a.com");
strategyB.crawl("http://www.example-b.com");
strategyC.crawl("http://www.example-c.com");
strategyD.crawl("http://www.example-d.com");
try {
throw new NetworkException("连接超时");
} catch (CrawlException e) {
System.out.println("捕获异常: " + e.getMessage());
}
try {
throw new ParseException("HTML格式错误");
} catch (CrawlException e) {
System.out.println("捕获异常: " + e.getMessage());
}
try {
throw new UnsupportedSiteException("UNKNOWN");
} catch (CrawlException e) {
System.out.println("捕获异常: " + e.getMessage());
}
}
}
Loading…
Cancel
Save