You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
104 lines
4.2 KiB
104 lines
4.2 KiB
package com.music.strategy;
|
|
|
|
import com.music.exception.NetworkException;
|
|
import com.music.exception.ParseException;
|
|
import com.music.model.Song;
|
|
import com.music.util.RetryUtils;
|
|
import org.jsoup.Jsoup;
|
|
import org.jsoup.nodes.Document;
|
|
import org.jsoup.nodes.Element;
|
|
import org.jsoup.select.Elements;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
|
|
public class QQStrategy implements CrawlStrategy {
|
|
private static final Logger logger = LoggerFactory.getLogger(QQStrategy.class);
|
|
|
|
@Override
|
|
public boolean supports(String platform) {
|
|
return "qq".equalsIgnoreCase(platform);
|
|
}
|
|
|
|
@Override
|
|
public List<Song> crawl(int limit) throws NetworkException, ParseException {
|
|
logger.info("开始爬取 QQ 音乐热歌榜,限制 {} 首", limit);
|
|
List<Song> songs = new ArrayList<>();
|
|
try {
|
|
// 使用重试工具包装网络请求
|
|
String url = "https://y.qq.com/n/ryqq/toplist/4"; // QQ音乐热歌榜
|
|
Document doc = RetryUtils.retry(() -> fetchDocument(url), 3, 1000);
|
|
|
|
// 解析歌曲列表:选择器基于 QQ 音乐网页结构
|
|
Elements songItems = doc.select(".songlist__list li");
|
|
if (songItems.isEmpty()) {
|
|
logger.warn("未找到歌曲列表,网页结构可能已变化");
|
|
return songs; // 返回空列表,不抛异常
|
|
}
|
|
|
|
int rank = 1;
|
|
for (Element item : songItems) {
|
|
if (rank > limit) break;
|
|
|
|
// 歌曲名
|
|
String name = item.select(".songlist__songname").text();
|
|
if (name.isEmpty()) {
|
|
// 备用选择器
|
|
name = item.select(".songlist__songname_txt").text();
|
|
}
|
|
// 歌手
|
|
String artist = item.select(".songlist__artist").text();
|
|
if (artist.isEmpty()) {
|
|
artist = item.select(".songlist__artist_name").text();
|
|
}
|
|
// 时长(格式如 03:45)
|
|
String durationStr = item.select(".songlist__time").text();
|
|
int durationSeconds = parseDuration(durationStr);
|
|
|
|
Song song = new Song();
|
|
song.setPlatform("qq");
|
|
song.setRank(rank);
|
|
song.setChartType("热歌榜");
|
|
song.setName(name.isEmpty() ? "未知歌曲" : name);
|
|
song.setArtist(artist.isEmpty() ? "未知歌手" : artist);
|
|
song.setAlbum("QQ音乐专辑"); // 网页上未直接展示专辑,可留空或后续补充
|
|
song.setDuration(durationSeconds);
|
|
songs.add(song);
|
|
|
|
logger.debug("QQ音乐: 排名{} {} - {}", rank, name, artist);
|
|
rank++;
|
|
}
|
|
logger.info("QQ音乐爬取完成,共 {} 首", songs.size());
|
|
return songs;
|
|
} catch (Exception e) {
|
|
logger.error("QQ音乐爬取失败", e);
|
|
if (e instanceof NetworkException) throw (NetworkException) e;
|
|
if (e instanceof ParseException) throw (ParseException) e;
|
|
throw new ParseException("解析QQ音乐数据失败: " + e.getMessage(), e);
|
|
}
|
|
}
|
|
|
|
private Document fetchDocument(String url) throws Exception {
|
|
return Jsoup.connect(url)
|
|
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
|
|
.header("Referer", "https://y.qq.com/")
|
|
.timeout(10000)
|
|
.get();
|
|
}
|
|
|
|
private int parseDuration(String durationStr) {
|
|
if (durationStr == null || durationStr.isEmpty()) return 0;
|
|
// 格式: "03:45" -> 225秒
|
|
try {
|
|
String[] parts = durationStr.split(":");
|
|
if (parts.length == 2) {
|
|
return Integer.parseInt(parts[0]) * 60 + Integer.parseInt(parts[1]);
|
|
}
|
|
} catch (NumberFormatException e) {
|
|
logger.warn("时长解析失败: {}", durationStr);
|
|
}
|
|
return 0;
|
|
}
|
|
}
|