Skip to content

Instantly share code, notes, and snippets.

@hanxi
Last active April 27, 2025 16:27
Show Gist options
  • Save hanxi/dda82d964a28f8110f8fba81c3ff8314 to your computer and use it in GitHub Desktop.
Save hanxi/dda82d964a28f8110f8fba81c3ff8314 to your computer and use it in GitHub Desktop.
xiaomusic歌单分享
[
{
"name": "测试电台",
"musics": [
{
"name": "测试电台1",
"url": "http://ngcdn001.cnr.cn/live/zgzs/index.m3u8",
"type": "radio"
},
{
"name": "测试电台2",
"url": "https://lhttp.qtfm.cn/live/4915/64k.mp3",
"type": "radio"
}
]
},
{
"name": "测试歌单",
"musics": [
{
"name": "测试歌名1",
"url": "http://music.163.com/song/media/outer/url?id=447925558.mp3"
},
{
"name": "测试歌名2",
"url": "https://filesamples.com/samples/audio/mp3/sample1.mp3"
}
]
},
{
"name": "中国广播网",
"musics": [
{
"name": "CNR中国之声",
"type": "radio",
"url": "http://ngcdn001.cnr.cn/live/zgzs/index.m3u8"
},
{
"name": "CNR经济之声",
"type": "radio",
"url": "http://ngcdn002.cnr.cn/live/jjzs/index.m3u8"
},
{
"name": "CNR音乐之声",
"type": "radio",
"url": "http://ngcdn003.cnr.cn/live/yyzs/index.m3u8"
},
{
"name": "CNR经典音乐",
"type": "radio",
"url": "http://ngcdn004.cnr.cn/live/dszs/index.m3u8"
},
{
"name": "CNR台海之声",
"type": "radio",
"url": "http://ngcdn005.cnr.cn/live/zhzs/index.m3u8"
},
{
"name": "CNR神州之声",
"type": "radio",
"url": "http://ngcdn006.cnr.cn/live/szzs/index.m3u8"
},
{
"name": "CNR湾区之声",
"type": "radio",
"url": "http://ngcdn007.cnr.cn/live/hxzs/index.m3u8"
},
{
"name": "CNR香港之声",
"type": "radio",
"url": "http://ngcdn008.cnr.cn/live/xgzs/index.m3u8"
},
{
"name": "CNR文艺之声",
"type": "radio",
"url": "http://ngcdn010.cnr.cn/live/wyzs/index.m3u8"
},
{
"name": "CNR老年之声",
"type": "radio",
"url": "http://ngcdn011.cnr.cn/live/lnzs/index.m3u8"
},
{
"name": "CNR阅读之声",
"type": "radio",
"url": "http://ngcdn014.cnr.cn/live/ylgb/index.m3u8"
},
{
"name": "CNR乡村之声",
"type": "radio",
"url": "http://ngcdn017.cnr.cn/live/xczs/index.m3u8"
},
{
"name": "CNR交通广播",
"type": "radio",
"url": "http://ngcdn016.cnr.cn/live/gsgljtgb/index.m3u8"
},
{
"name": "CRI环球资讯",
"type": "radio",
"url": "http://sk.cri.cn/905.m3u8"
},
{
"name": "CRI华语环球",
"type": "radio",
"url": "http://sk.cri.cn/hyhq.m3u8"
},
{
"name": "CRI南海之声",
"type": "radio",
"url": "https://sk.cri.cn/nhzs.m3u8"
},
{
"name": "CRI英语资讯",
"type": "radio",
"url": "http://sk.cri.cn/am846.m3u8"
},
{
"name": "CRI世界华声",
"type": "radio",
"url": "http://sk.cri.cn/hxfh.m3u8"
},
{
"name": "CRI轻松调频",
"type": "radio",
"url": "http://sk.cri.cn/915.m3u8"
},
{
"name": "CRI HitFM广播",
"type": "radio",
"url": "http://sk.cri.cn/887.m3u8"
}
]
}
]
import re
import requests
import sys
import json
def normalize_url(url):
"""
规范化 URL,将带有 `#/` 的 URL 转换为标准格式。
:param url: 输入 URL
:return: 规范化后的 URL
"""
return re.sub(r"#/", "", url)
def fetch_html_from_url(url):
"""
从指定 URL 获取 HTML 内容。
:param url: 目标 URL
:return: HTML 内容的字符串
"""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
}
response = requests.get(url, headers=headers)
response.raise_for_status() # 如果请求失败会抛出 HTTPError
return response.text
def extract_title(html_content):
"""
从 HTML 内容中提取标题。
:param html_content: HTML 字符串
:return: 标题字符串
"""
match = re.search(r"<title>([^<]+)</title>", html_content)
return match.group(1).strip() if match else "未知歌单"
def extract_song_info(html_content):
"""
从 HTML 内容中提取歌曲的 id 和名字。
:param html_content: 包含 <li><a> 的 HTML 字符串
:return: 包含 {name, url} 字典的列表
"""
pattern = r'<a href="/song\?id=(\d+)">([^<]+)</a>'
matches = re.findall(pattern, html_content)
song_list = [
{
"name": name,
"url": f"http://music.163.com/song/media/outer/url?id={song_id}.mp3"
}
for song_id, name in matches
]
return song_list
def process_urls(urls):
"""
处理多个 URL,提取歌单标题和歌曲信息。
:param urls: URL 列表
:return: JSON 格式的数据结构
"""
result = []
for url in urls:
try:
normalized_url = normalize_url(url)
html_content = fetch_html_from_url(normalized_url)
title = extract_title(html_content)
songs = extract_song_info(html_content)
result.append({
"name": title,
"musics": songs
})
except Exception as e:
print(f"处理 URL {url} 时发生错误: {e}")
return result
def main():
if len(sys.argv) < 2:
print("使用方法: python script.py <URL1> <URL2> ...")
sys.exit(1)
urls = sys.argv[1:]
data = process_urls(urls)
# 将结果输出为 JSON 格式
print(json.dumps(data, ensure_ascii=False, indent=2))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment