B站手机缓存视频导出与整理

B站手机缓存视频导出与整理

B站手机缓存视频存储目录:

内部存储设备/Android/data/tv.danmaku.bili/download/

扫描已导出的视频并将其重命名

import os

import json

import shutil

import subprocess

from pathlib import Path

def sanitize_filename(filename):

"""

清理文件名中的非法字符

Args:

filename (str): 原始文件名

Returns:

str: 清理后的文件名

"""

# 替换文件名中的非法字符

illegal_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*']

for char in illegal_chars:

filename = filename.replace(char, '_')

return filename

def find_entry_json_folders(directory):

"""

递归查找包含entry.json文件的文件夹

Args:

directory (str): 要搜索的目录路径

Returns:

list: 包含entry.json文件的文件夹路径列表

"""

entry_json_folders = []

for root, dirs, files in os.walk(directory):

if 'entry.json' in files:

entry_json_folders.append(root)

return entry_json_folders

def recursive_search(data, key):

"""

在JSON结构中递归搜索指定的键

Args:

data: JSON数据结构(字典、列表或基本类型)

key: 要搜索的键名

Returns:

找到的值,如果未找到则返回None

"""

if isinstance(data, dict):

# 如果当前数据是字典

if key in data:

return data[key]

# 递归搜索字典中的每个值

for value in data.values():

result = recursive_search(value, key)

if result is not None:

return result

elif isinstance(data, list):

# 如果当前数据是列表,递归搜索每个元素

for item in data:

result = recursive_search(item, key)

if result is not None:

return result

# 如果是基本类型或其他情况,直接返回None

return None

def read_entry_json(entry_json_path):

"""

读取entry.json文件并提取所需信息

Args:

entry_json_path (str): entry.json文件路径

Returns:

dict: 包含提取信息的字典

"""

try:

with open(entry_json_path, 'r', encoding='utf-8') as f:

data = json.load(f)

# 使用递归搜索提取所需字段

title = recursive_search(data, 'title')

cid = recursive_search(data, 'cid')

page = recursive_search(data, 'page') or recursive_search(data, 'index')

part = recursive_search(data, 'part') or recursive_search(data, 'index_title')

# 获取avid或bvid或av_id

xvid = (recursive_search(data, 'avid') or

recursive_search(data, 'bvid') or

recursive_search(data, 'av_id'))

# 获取owner信息

owner_id = recursive_search(data, 'owner_id') or recursive_search(data, 'mid')

owner_name = recursive_search(data, 'owner_name') or recursive_search(data, 'name')

entry_info = {}

if title:

entry_info['title'] = sanitize_filename(str(title))

if xvid:

entry_info['xvid'] = str(xvid)

if part:

entry_info['part'] = sanitize_filename(str(part))

if cid:

entry_info['cid'] = str(cid)

if page:

entry_info['page'] = str(page)

if owner_id:

entry_info['owner_id'] = str(owner_id)

if owner_name:

entry_info['owner_name'] = sanitize_filename(str(owner_name))

return entry_info

except Exception as e:

raise Exception(f"读取entry.json失败 {entry_json_path}: {e}")

def generate_new_filename(entry_info, is_mp4=True):

"""

根据entry.json信息生成新文件名

Args:

entry_info (dict): entry.json中提取的信息

is_mp4 (bool): 是否为MP4文件

Returns:

str: 新文件名

"""

# 构建基础文件名,只包含存在的字段

name_parts = []

# 按照指定顺序添加字段

field_order = ['title', 'xvid', 'part', 'page', 'cid', 'owner_id', 'owner_name']

for field in field_order:

if field in entry_info:

name_parts.append(entry_info[field])

base_name = '_'.join(name_parts)

if is_mp4:

return f"{base_name}.mp4"

else:

return base_name # 对于BLV文件,不添加扩展名

def find_media_files(folder):

"""

在指定文件夹中查找BLV文件和M4S文件

Args:

folder (str): 要搜索的文件夹路径

Returns:

tuple: (blv_files列表, m4s_files字典{audio_path, video_path})

"""

blv_files = []

m4s_files = {'audio': None, 'video': None}

for root, dirs, files in os.walk(folder):

for file in files:

file_path = os.path.join(root, file)

# 跳过entry.json文件本身

if file == 'entry.json':

continue

# 查找BLV文件

if file.lower().endswith('.blv'):

blv_files.append(file_path)

# 查找M4S文件

elif file == 'audio.m4s':

m4s_files['audio'] = file_path

elif file == 'video.m4s':

m4s_files['video'] = file_path

return blv_files, m4s_files

def merge_m4s_files(audio_path, video_path, output_path):

"""

使用ffmpeg将audio.m4s和video.m4s合并为mp4文件

Args:

audio_path (str): 音频文件路径

video_path (str): 视频文件路径

output_path (str): 输出文件路径

"""

try:

# 使用ffmpeg合并音视频文件

cmd = [

'ffmpeg',

'-i', video_path, # 输入视频文件

'-i', audio_path, # 输入音频文件

'-c', 'copy', # 直接复制编解码,不重新编码

'-y', # 覆盖输出文件

output_path

]

# 执行ffmpeg命令

result = subprocess.run(cmd, capture_output=True, text=True)

if result.returncode != 0:

raise Exception(f"FFmpeg执行失败: {result.stderr}")

return True

except Exception as e:

raise Exception(f"合并M4S文件失败: {e}")

def simulate_rename_operations(entry_json_folders, output_directory):

"""

模拟重命名操作,生成文件名映射

Args:

entry_json_folders (list): 包含entry.json的文件夹列表

output_directory (str): 输出目录路径

Returns:

dict: 操作信息映射

Raises:

Exception: 当出现文件名冲突时抛出异常

"""

operations = {}

for folder in entry_json_folders:

# 读取entry.json

entry_json_path = os.path.join(folder, 'entry.json')

entry_info = read_entry_json(entry_json_path)

# 查找媒体文件

blv_files, m4s_files = find_media_files(folder)

# 处理BLV文件

for blv_file in blv_files:

new_filename = generate_new_filename(entry_info, is_mp4=False) + f"_{os.path.basename(blv_file)}"

new_filename = new_filename.replace('.blv', '.flv') # 修改后缀名

new_file_path = os.path.join(output_directory, new_filename)

# 检查是否有冲突

if new_filename in operations:

raise Exception(f"文件名冲突: {new_filename}")

operations[new_filename] = {

'type': 'blv_rename',

'source': blv_file,

'target': new_file_path

}

# 处理M4S文件

if m4s_files['audio'] and m4s_files['video']:

new_filename = generate_new_filename(entry_info, is_mp4=True)

new_file_path = os.path.join(output_directory, new_filename)

# 检查是否有冲突

if new_filename in operations:

raise Exception(f"文件名冲突: {new_filename}")

operations[new_filename] = {

'type': 'm4s_merge',

'audio': m4s_files['audio'],

'video': m4s_files['video'],

'target': new_file_path

}

return operations

def execute_operations(operations):

"""

执行实际的操作(重命名BLV文件和合并M4S文件)

Args:

operations (dict): 操作信息映射

"""

# 确保输出目录存在

os.makedirs(output_directory, exist_ok=True)

# 执行操作

for new_filename, op_info in operations.items():

try:

if op_info['type'] == 'blv_rename':

# 重命名BLV文件为FLV文件

shutil.move(op_info['source'], op_info['target'])

print(f"重命名成功: {op_info['source']} -> {op_info['target']}")

elif op_info['type'] == 'm4s_merge':

# 合并M4S文件为MP4文件

merge_m4s_files(op_info['audio'], op_info['video'], op_info['target'])

print(f"合并成功: {op_info['target']}")

except Exception as e:

print(f"操作失败 {new_filename}: {e}")

def process_bilibili_cache(input_directory, output_directory):

"""

处理哔哩哔哩缓存文件夹

Args:

input_directory (str): 输入目录路径(包含缓存文件的根目录)

output_directory (str): 输出目录路径

"""

# 查找所有包含entry.json的文件夹

entry_json_folders = find_entry_json_folders(input_directory)

if not entry_json_folders:

print("未找到任何包含entry.json的文件夹")

return

print(f"找到 {len(entry_json_folders)} 个包含entry.json的文件夹")

try:

# 模拟操作并生成映射

operations = simulate_rename_operations(entry_json_folders, output_directory)

print(f"模拟操作完成,共 {len(operations)} 个操作需要执行")

print("操作列表:")

for new_name, op_info in operations.items():

if op_info['type'] == 'blv_rename':

print(f" 重命名BLV: {new_name} <- {op_info['source']}")

elif op_info['type'] == 'm4s_merge':

print(f" 合并M4S: {new_name}")

# 确认用户是否继续执行

confirm = input("\n是否继续执行实际操作?(y/N): ")

if confirm.lower() == 'y':

# 执行实际操作

execute_operations(operations)

print("所有操作已完成")

else:

print("操作已取消")

except Exception as e:

print(f"操作失败: {e}")

# 使用示例

if __name__ == "__main__":

# 指定输入和输出目录

input_directory = "./" # 修改为您的输入目录路径

output_directory = "../renamed_videos2" # 修改为您的输出目录路径

process_bilibili_cache(input_directory, output_directory)