from pathlib import Path import re import os def convert_paths_in_file(file_path, root_folder, base_dir): """ Convert absolute paths to relative paths in a single file. Args: file_path (Path): Path to the file being processed root_folder (str): Root folder path to check against base_dir (Path): Base directory for calculating relative paths Returns: bool: True if modifications were made, False otherwise """ try: # Read the file contents with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # Find all href attributes starting with root_folder pattern = r'href=[\'"]([^\'"]+)[\'"]' matches = re.finditer(pattern, content) modified = False offset = 0 for match in matches: href = match.group(1) # Check if href starts with root_folder if href.startswith(root_folder): # Calculate relative path rel_path = os.path.relpath(href, str(base_dir)) # Create replacement string preserving quotes quote_char = '"' if match.group(0)[5] == '"' else "'" replacement = f'href={quote_char}{rel_path}{quote_char}' # Perform replacement content = content[:match.start() + offset] + \ replacement + \ content[match.end() + offset:] offset += len(replacement) - len(match.group(0)) modified = True # Write back modified content if changes were made if modified: with open(file_path, 'w', encoding='utf-8') as f: f.write(content) return modified except Exception as e: print(f"Error processing {file_path}: {str(e)}") return False def scan_and_convert(root_folder, base_dir=None): """ Scan all files in the specified root folder and convert absolute paths to relative paths. Only processes HTML, JS, and CSS files. Args: root_folder (str): Root folder path to scan base_dir (str, optional): Base directory for calculating relative paths. Defaults to the root_folder if not specified. """ # Convert inputs to Path objects root_path = Path(root_folder) base_dir = Path(base_dir or root_folder) # Validate inputs if not root_path.exists(): raise FileNotFoundError(f"Root folder '{root_folder}' does not exist") print(f"Scanning folder: {root_folder}") print(f"Base directory: {base_dir}") total_files = 0 modified_files = 0 # Define allowed file extensions allowed_extensions = {'.html'} # Scan all files recursively for file_path in root_path.rglob('*'): if file_path.is_file(): # Check if file has an allowed extension if file_path.suffix.lower() in allowed_extensions: total_files += 1 if convert_paths_in_file(file_path, str(root_path), base_dir): modified_files += 1 print(f"\nProcessing complete:") print(f"Total files scanned: {total_files}") print(f"Files modified: {modified_files}") def main(): import argparse parser = argparse.ArgumentParser(description='Convert absolute href paths to relative paths') parser.add_argument('root_folder', help='Root folder to scan') parser.add_argument('--base-dir', help='Base directory for calculating relative paths') args = parser.parse_args() try: scan_and_convert(args.root_folder, args.base_dir) except Exception as e: print(f"Error: {str(e)}") if __name__ == "__main__": main()