import os
import pathlib
import argparse
from typing import List, Set, Tuple

def is_code_file(file_path: str, ignore_dirs: List[str]) -> bool:
    """
    检查文件是否为代码文件,并检查是否在忽略目录中
    
    Args:
        file_path (str): 文件路径
        ignore_dirs (List[str]): 要忽略的目录列表
    
    Returns:
        bool: 如果是代码文件则返回 True,否则返回 False
    """
    for ignore_dir in ignore_dirs:
        if ignore_dir in file_path:
            return False
    
    file_extension = pathlib.Path(file_path).suffix[1:]
    code_extensions: Set[str] = {'py', 'js', 'java', 'c', 'cpp', 'cc', 'cs', 'php', 'rb', 'go', 'rs', 'swift', 'kt', 'scala', 'pl', 'sh', 'html', 'htm', 'yml', 'yaml', 'h', 'proto', 'makefile'}
    return file_extension in code_extensions or file_path.endswith('Makefile')

def read_files_to_markdown(dir_path: str, output_file: str, ignore_dirs: List[str]) -> None:
    """
    读取指定目录下的代码文件,并将内容保存到 Markdown 文件中
    其他格式文件只打印相对路径信息
    
    Args:
        dir_path (str): 要读取的目录路径
        output_file (str): 输出文件名
        ignore_dirs (List[str]): 要忽略的目录列表
    """
    with open(output_file, 'w') as f:
        for root, dirs, files in os.walk(dir_path):
            for file in files:
                file_path = os.path.join(root, file)
                if is_code_file(file_path, ignore_dirs):
                    relative_file_path = os.path.relpath(file_path, dir_path)
                    file_extension = pathlib.Path(file_path).suffix[1:]
                    
                    with open(file_path, 'r') as file_content_f:
                        file_content = file_content_f.read()
                    
                    language_map: Dict[str, str] = {
                        '.py': 'python',
                        '.js': 'javascript',
                        '.java': 'java',
                        '.c': 'c',
                        '.cpp': 'cpp',
                        '.cc': 'cpp',
                        '.cs': 'csharp',
                        '.php': 'php',
                        '.rb': 'ruby',
                        '.go': 'go',
                        '.rs': 'rust',
                        '.swift': 'swift',
                        '.kt': 'kotlin',
                        '.scala': 'scala',
                        '.pl': 'perl',
                        '.sh': 'bash',
                        '.html': 'html',
                        '.htm': 'html',
                        '.yml': 'yaml',
                        '.yaml': 'yaml',
                        '.h': 'c',
                        '.proto': 'protobuf',
                        'Makefile': 'makefile'
                    }
                    
                    language = language_map.get(f'.{file_extension}', file_extension)
                    if file_path.endswith('Makefile'):
                        language = 'makefile'
                    f.write(f"## {relative_file_path}\n```{language}\n{file_content}\n```\n\n")
                else:
                    relative_file_path = os.path.relpath(file_path, dir_path)
                    print(f"Skipping non-code file: {relative_file_path}")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Read code files from a directory and save their content to a Markdown file.")
    parser.add_argument("-d", "--dir", required=True, help="The directory path to read files from.")
    parser.add_argument("-o", "--output", default="code.md", help="The output Markdown file name (default: code.md)")
    parser.add_argument("-i", "--ignore", nargs='+', default=[], help="Directories to ignore (space-separated)")
    args = parser.parse_args()

    read_files_to_markdown(args.dir, args.output, args.ignore)
    print(f"Markdown file '{args.output}' generated successfully.")