diff --git a/project/data/ExampleCrawler.json b/project/data/ExampleCrawler.json new file mode 100644 index 0000000..47c8dd1 --- /dev/null +++ b/project/data/ExampleCrawler.json @@ -0,0 +1,61 @@ +[ { + "title" : "跳至主要内容", + "content" : null, + "url" : "#MainContent", + "source" : "ExampleCrawler", + "publishDate" : null +}, { + "title" : "辅助功能帮助", + "content" : null, + "url" : "https://www.essentialaccessibility.com/the-weather-channel?utm_source=theweatherchannelhomepage&utm_medium=iconlarge&utm_term=eachannelpage&utm_content=header&utm_campaign=theweatherchannel", + "source" : "ExampleCrawler", + "publishDate" : null +}, { + "title" : "安提瓜和巴布达 | English", + "content" : null, + "url" : "/en-AG/weather/today/l/b03e06bc789c91465cb39e738b85c9e48d4ee723896c6bcec3a9ef6a4ca37e86", + "source" : "ExampleCrawler", + "publishDate" : null +}, { + "title" : "阿根廷 | Español", + "content" : null, + "url" : "/es-AR/tiempo/hoy/l/b03e06bc789c91465cb39e738b85c9e48d4ee723896c6bcec3a9ef6a4ca37e86", + "source" : "ExampleCrawler", + "publishDate" : null +}, { + "title" : "巴哈马 | English", + "content" : null, + "url" : "/en-BS/weather/today/l/b03e06bc789c91465cb39e738b85c9e48d4ee723896c6bcec3a9ef6a4ca37e86", + "source" : "ExampleCrawler", + "publishDate" : null +}, { + "title" : "巴巴多斯 | English", + "content" : null, + "url" : "/en-BB/weather/today/l/b03e06bc789c91465cb39e738b85c9e48d4ee723896c6bcec3a9ef6a4ca37e86", + "source" : "ExampleCrawler", + "publishDate" : null +}, { + "title" : "伯利兹 | English", + "content" : null, + "url" : "/en-BZ/weather/today/l/b03e06bc789c91465cb39e738b85c9e48d4ee723896c6bcec3a9ef6a4ca37e86", + "source" : "ExampleCrawler", + "publishDate" : null +}, { + "title" : "玻利维亚 | Español", + "content" : null, + "url" : "/es-BO/tiempo/hoy/l/b03e06bc789c91465cb39e738b85c9e48d4ee723896c6bcec3a9ef6a4ca37e86", + "source" : "ExampleCrawler", + "publishDate" : null +}, { + "title" : "巴西 | Português", + "content" : null, + "url" : "/pt-BR/clima/hoje/l/b03e06bc789c91465cb39e738b85c9e48d4ee723896c6bcec3a9ef6a4ca37e86", + "source" : "ExampleCrawler", + "publishDate" : null +}, { + "title" : "加拿大 | English", + "content" : null, + "url" : "/en-CA/weather/today/l/b03e06bc789c91465cb39e738b85c9e48d4ee723896c6bcec3a9ef6a4ca37e86", + "source" : "ExampleCrawler", + "publishDate" : null +} ] \ No newline at end of file diff --git a/project/data/HunanUniversityCrawler.json b/project/data/HunanUniversityCrawler.json new file mode 100644 index 0000000..0e96517 --- /dev/null +++ b/project/data/HunanUniversityCrawler.json @@ -0,0 +1,181 @@ +[ { + "title" : "English", + "content" : null, + "url" : "http://www-en.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "机关部处", + "content" : null, + "url" : "https://www.hnu.edu.cn/hdgk/gljg/jgbc.htm", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "群团组织", + "content" : null, + "url" : "https://www.hnu.edu.cn/hdgk/gljg/qtzz.htm", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "直附属机构", + "content" : null, + "url" : "https://www.hnu.edu.cn/hdgk/gljg/zfsjg.htm", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "国家卓越工程师学院", + "content" : null, + "url" : "http://ngcee.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "岳麓书院(历史与哲学学院)", + "content" : null, + "url" : "http://ylsy.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "经济与贸易学院", + "content" : null, + "url" : "http://cet.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "金融与统计学院", + "content" : null, + "url" : "http://jt.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "马克思主义学院", + "content" : null, + "url" : "http://marx.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "教育科学研究院", + "content" : null, + "url" : "http://edu.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "体育学院", + "content" : null, + "url" : "http://sports.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "中国语言文学学院", + "content" : null, + "url" : "http://wxy.hnu.edu.cn", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "外国语学院", + "content" : null, + "url" : "http://english.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "新闻与传播学院", + "content" : null, + "url" : "http://xinwen.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "数学学院", + "content" : null, + "url" : "http://math.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "物理与微电子科学学院", + "content" : null, + "url" : "http://spe.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "化学化工学院", + "content" : null, + "url" : "http://cc.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "生物学院", + "content" : null, + "url" : "http://bio.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "机械与运载工程学院", + "content" : null, + "url" : "http://mve.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "材料科学与工程学院", + "content" : null, + "url" : "http://clxy.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "电气与信息工程学院", + "content" : null, + "url" : "http://eeit.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "计算机学院(软件学院、国家保密学院)", + "content" : null, + "url" : "http://csee.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "网络空间安全学院", + "content" : null, + "url" : "http://cst.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "人工智能与机器人学院", + "content" : null, + "url" : "http://robotics.hnu.edu.cn/index.htm", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "建筑与规划学院", + "content" : null, + "url" : "http://arch.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "土木工程学院", + "content" : null, + "url" : "http://ce.hnu.edu.cn", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "环境科学与工程学院", + "content" : null, + "url" : "http://ee.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "隆平农学院", + "content" : null, + "url" : "https://agri.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "生命医学交叉研究院", + "content" : null, + "url" : "http://smyjy.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +}, { + "title" : "工商管理学院", + "content" : null, + "url" : "http://ibschool.hnu.edu.cn/", + "source" : "HunanUniversityCrawler", + "publishDate" : null +} ] \ No newline at end of file diff --git a/project/data/HunanUniversityNewsCrawler.json b/project/data/HunanUniversityNewsCrawler.json new file mode 100644 index 0000000..2a72d72 --- /dev/null +++ b/project/data/HunanUniversityNewsCrawler.json @@ -0,0 +1,121 @@ +[ { + "title" : "湖大主页", + "content" : null, + "url" : "https://www.hnu.edu.cn/", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +}, { + "title" : "新闻", + "content" : null, + "url" : "https://news.hnu.edu.cn/xw/zhxw.htm", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +}, { + "title" : "综合新闻", + "content" : null, + "url" : "https://news.hnu.edu.cn/xw/zhxw.htm", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +}, { + "title" : "人才培养", + "content" : null, + "url" : "https://news.hnu.edu.cn/xw/rcpy.htm", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +}, { + "title" : "科学研究", + "content" : null, + "url" : "https://news.hnu.edu.cn/xw/kxyj.htm", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +}, { + "title" : "合作交流", + "content" : null, + "url" : "https://news.hnu.edu.cn/xw/hzjl.htm", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +}, { + "title" : "菁菁校园", + "content" : null, + "url" : "https://news.hnu.edu.cn/xw/jjxy.htm", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +}, { + "title" : "校友新闻", + "content" : null, + "url" : "https://news.hnu.edu.cn/xw/xyxw.htm", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +}, { + "title" : "人物", + "content" : null, + "url" : "https://news.hnu.edu.cn/rw.htm", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +}, { + "title" : "湖大人物", + "content" : null, + "url" : "https://news.hnu.edu.cn/rw/hdrw1.htm", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +}, { + "title" : "青春派", + "content" : null, + "url" : "https://news.hnu.edu.cn/rw/qcp1.htm", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +}, { + "title" : "师道与问道", + "content" : null, + "url" : "https://news.hnu.edu.cn/rw/sdywd.htm", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +}, { + "title" : "与湖大共成长", + "content" : null, + "url" : "https://news.hnu.edu.cn/rw/yhdgcz1.htm", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +}, { + "title" : "视觉", + "content" : null, + "url" : "https://news.hnu.edu.cn/sj/tshd.htm", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +}, { + "title" : "图说湖大", + "content" : null, + "url" : "https://news.hnu.edu.cn/sj/tshd.htm", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +}, { + "title" : "视频湖大", + "content" : null, + "url" : "https://news.hnu.edu.cn/sj/sphd.htm", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +}, { + "title" : "媒体", + "content" : null, + "url" : "https://news.hnu.edu.cn/mt.htm", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +}, { + "title" : "校报", + "content" : null, + "url" : "http://hndxb.hnu.edu.cn/web/INDEX.html", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +}, { + "title" : "专题", + "content" : null, + "url" : "https://news.hnu.edu.cn/zt.htm", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +}, { + "title" : "投稿", + "content" : null, + "url" : "http://202.197.98.95:8080/system/caslogin.jsp", + "source" : "HunanUniversityNewsCrawler", + "publishDate" : null +} ] \ No newline at end of file diff --git a/project/data/MountBladeCrawler.json b/project/data/MountBladeCrawler.json new file mode 100644 index 0000000..ce1f80b --- /dev/null +++ b/project/data/MountBladeCrawler.json @@ -0,0 +1,181 @@ +[ { + "title" : "首页", + "content" : null, + "url" : "https://www.mountblade.com.cn/index.html", + "source" : "MountBladeCrawler", + "publishDate" : null +}, { + "title" : "最新动态", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/", + "source" : "MountBladeCrawler", + "publishDate" : null +}, { + "title" : "骑马与砍杀2:霸主", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/Bannerlord/", + "source" : "MountBladeCrawler", + "publishDate" : null +}, { + "title" : "骑马与砍杀1全系列", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/hangye/", + "source" : "MountBladeCrawler", + "publishDate" : null +}, { + "title" : "媒体中心", + "content" : null, + "url" : "https://www.mountblade.com.cn/media/", + "source" : "MountBladeCrawler", + "publishDate" : null +}, { + "title" : "精彩视频", + "content" : null, + "url" : "https://www.mountblade.com.cn/media/Screenshots/", + "source" : "MountBladeCrawler", + "publishDate" : null +}, { + "title" : "原画漫画", + "content" : null, + "url" : "https://www.mountblade.com.cn/media/original/", + "source" : "MountBladeCrawler", + "publishDate" : null +}, { + "title" : "下载中心", + "content" : null, + "url" : "https://bbs.mountblade.com.cn/download.html", + "source" : "MountBladeCrawler", + "publishDate" : null +}, { + "title" : "MOD中心", + "content" : null, + "url" : "https://bbs.mountblade.com.cn/forum-53-1.html", + "source" : "MountBladeCrawler", + "publishDate" : null +}, { + "title" : "骑砍百科", + "content" : null, + "url" : "http://wiki.mountbladecn.com/", + "source" : "MountBladeCrawler", + "publishDate" : null +}, { + "title" : "火爆论坛", + "content" : null, + "url" : "http://bbs.mountblade.com.cn/", + "source" : "MountBladeCrawler", + "publishDate" : null +}, { + "title" : "【MOD精选】卡拉多格附体!每场战斗都是一次蝗虫过境!专心搜打撤的MOD《ISKL的战争掠夺》来啦!", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/Bannerlord/2026-05-29/3186.html", + "source" : "MountBladeCrawler", + "publishDate" : "2026-05-29" +}, { + "title" : "骑砍2《战帆》v1.2.5与本体v1.4.5更新日志", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/Bannerlord/2026-05-22/3183.html", + "source" : "MountBladeCrawler", + "publishDate" : "2026-05-22" +}, { + "title" : "【MOD推荐】啥?地牢里有宝箱怪?!这款骑砍MOD让你开宝箱开到手软!《瓦斯纳世界》上线!", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/hangye/2026-05-19/3179.html", + "source" : "MountBladeCrawler", + "publishDate" : "2026-05-19" +}, { + "title" : "骑砍2《战帆》v1.2.5与本体v1.4.5测试版更新日志", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/Bannerlord/2026-05-15/3176.html", + "source" : "MountBladeCrawler", + "publishDate" : "2026-05-15" +}, { + "title" : "骑砍2《战帆》v1.2.4与本体v1.4.4测试版更新日志", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/Bannerlord/2026-05-13/3175.html", + "source" : "MountBladeCrawler", + "publishDate" : "2026-05-13" +}, { + "title" : "【MOD精选】新阵营、新国度、新装备、新附魔!《中古战锤》喜迎大版本更新!", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/Bannerlord/2026-05-11/3173.html", + "source" : "MountBladeCrawler", + "publishDate" : "2026-05-11" +}, { + "title" : "骑砍2《战帆》v1.2.3与本体v1.4.3测试版更新日志", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/Bannerlord/2026-05-08/3171.html", + "source" : "MountBladeCrawler", + "publishDate" : "2026-05-08" +}, { + "title" : "骑砍2《战帆》v1.2.2与本体v1.4.2测试版更新日志", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/Bannerlord/2026-05-06/3170.html", + "source" : "MountBladeCrawler", + "publishDate" : "2026-05-06" +}, { + "title" : "【MOD精选】告别无聊的战斗!百种精彩招式谱写战场华尔兹!《电影级战斗》更新!", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/Bannerlord/2026-05-05/3167.html", + "source" : "MountBladeCrawler", + "publishDate" : "2026-05-05" +}, { + "title" : "【MOD精选】远程仓库,远程商店上限!家族分封玩法上线?骑砍2《家族加强》MOD发布", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/Bannerlord/2026-05-03/3164.html", + "source" : "MountBladeCrawler", + "publishDate" : "2026-05-03" +}, { + "title" : "【MOD精选】卡拉多格附体!每场战斗都是一次蝗虫过境!专心搜打撤的MOD《ISKL的战争掠夺》来啦!", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/Bannerlord/2026-05-29/3186.html", + "source" : "MountBladeCrawler", + "publishDate" : "2026-05-29" +}, { + "title" : "骑砍2《战帆》v1.2.5与本体v1.4.5更新日志", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/Bannerlord/2026-05-22/3183.html", + "source" : "MountBladeCrawler", + "publishDate" : "2026-05-22" +}, { + "title" : "骑砍2《战帆》v1.2.5与本体v1.4.5测试版更新日志", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/Bannerlord/2026-05-15/3176.html", + "source" : "MountBladeCrawler", + "publishDate" : "2026-05-15" +}, { + "title" : "骑砍2《战帆》v1.2.4与本体v1.4.4测试版更新日志", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/Bannerlord/2026-05-13/3175.html", + "source" : "MountBladeCrawler", + "publishDate" : "2026-05-13" +}, { + "title" : "【MOD精选】新阵营、新国度、新装备、新附魔!《中古战锤》喜迎大版本更新!", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/Bannerlord/2026-05-11/3173.html", + "source" : "MountBladeCrawler", + "publishDate" : "2026-05-11" +}, { + "title" : "骑砍2《战帆》v1.2.3与本体v1.4.3测试版更新日志", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/Bannerlord/2026-05-08/3171.html", + "source" : "MountBladeCrawler", + "publishDate" : "2026-05-08" +}, { + "title" : "骑砍2《战帆》v1.2.2与本体v1.4.2测试版更新日志", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/Bannerlord/2026-05-06/3170.html", + "source" : "MountBladeCrawler", + "publishDate" : "2026-05-06" +}, { + "title" : "【MOD精选】告别无聊的战斗!百种精彩招式谱写战场华尔兹!《电影级战斗》更新!", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/Bannerlord/2026-05-05/3167.html", + "source" : "MountBladeCrawler", + "publishDate" : "2026-05-05" +}, { + "title" : "【MOD精选】远程仓库,远程商店上限!家族分封玩法上线?骑砍2《家族加强》MOD发布", + "content" : null, + "url" : "https://www.mountblade.com.cn/news/Bannerlord/2026-05-03/3164.html", + "source" : "MountBladeCrawler", + "publishDate" : "2026-05-03" +} ] \ No newline at end of file