import os
import pathlib
import argparse
from typing import List, Set, Tuple
def is_code_file(file_path: str, ignore_dirs: List[str]) -> bool:
"""
检查文件是否为代码文件,并检查是否在忽略目录中
Args:
file_path (str): 文件路径
ignore_dirs (List[str]): 要忽略的目录列表
Returns:
bool: 如果是代码文件则返回 True,否则返回 False
"""
for ignore_dir in ignore_dirs:
if ignore_dir in file_path:
return False
file_extension = pathlib.Path(file_path).suffix[1:]
code_extensions: Set[str] = {'py', 'js', 'java', 'c', 'cpp', 'cc', 'cs', 'php', 'rb', 'go', 'rs', 'swift', 'kt', 'scala', 'pl', 'sh', 'html', 'htm', 'yml', 'yaml', 'h', 'proto', 'makefile'}
return file_extension in code_extensions or file_path.endswith('Makefile')
def read_files_to_markdown(dir_path: str, output_file: str, ignore_dirs: List[str]) -> None:
"""
读取指定目录下的代码文件,并将内容保存到 Markdown 文件中
其他格式文件只打印相对路径信息
Args:
dir_path (str): 要读取的目录路径
output_file (str): 输出文件名
ignore_dirs (List[str]): 要忽略的目录列表
"""
with open(output_file, 'w') as f:
for root, dirs, files in os.walk(dir_path):
for file in files:
file_path = os.path.join(root, file)
if is_code_file(file_path, ignore_dirs):
relative_file_path = os.path.relpath(file_path, dir_path)
file_extension = pathlib.Path(file_path).suffix[1:]
with open(file_path, 'r') as file_content_f:
file_content = file_content_f.read()
language_map: Dict[str, str] = {
'.py': 'python',
'.js': 'javascript',
'.java': 'java',
'.c': 'c',
'.cpp': 'cpp',
'.cc': 'cpp',
'.cs': 'csharp',
'.php': 'php',
'.rb': 'ruby',
'.go': 'go',
'.rs': 'rust',
'.swift': 'swift',
'.kt': 'kotlin',
'.scala': 'scala',
'.pl': 'perl',
'.sh': 'bash',
'.html': 'html',
'.htm': 'html',
'.yml': 'yaml',
'.yaml': 'yaml',
'.h': 'c',
'.proto': 'protobuf',
'Makefile': 'makefile'
}
language = language_map.get(f'.{file_extension}', file_extension)
if file_path.endswith('Makefile'):
language = 'makefile'
f.write(f"## {relative_file_path}\n```{language}\n{file_content}\n```\n\n")
else:
relative_file_path = os.path.relpath(file_path, dir_path)
print(f"Skipping non-code file: {relative_file_path}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Read code files from a directory and save their content to a Markdown file.")
parser.add_argument("-d", "--dir", required=True, help="The directory path to read files from.")
parser.add_argument("-o", "--output", default="code.md", help="The output Markdown file name (default: code.md)")
parser.add_argument("-i", "--ignore", nargs='+', default=[], help="Directories to ignore (space-separated)")
args = parser.parse_args()
read_files_to_markdown(args.dir, args.output, args.ignore)
print(f"Markdown file '{args.output}' generated successfully.")
代码合并成markdown
2024-03-28