#!/usr/bin/env python3 import re import sys import time # Constants _NB_RUN = 10 _PATTERN = re.compile(r"https://sws.geonames.org/(\d+)/about.rdf") # _PATH_TXT = "./data/input_big.xml" _PATH_TXT = "./A" _PATH_MAP = "corres" _PATH_RES = "output.xml" def build_map(data): """ Build a mapping dictionary from a list of data. Args: data (list): A list of strings where each even-indexed element is a key and each odd-indexed element is a value. Returns: dict: A dictionary mapping keys to values. """ return {data[k][:-1]: data[k - 1][:-1] for k in range(1, len(data), 2)} def replace_match(match, mapping): """ Replace a matched string using a mapping dictionary. Args: match (re.Match): A match object containing the matched string. mapping (dict): A dictionary mapping keys to replacement values. Returns: str: The replacement string. """ key = match.group(1) return mapping.get(key, match.group(0)) def read_file(file_path): """ Read the content of a file. Args: file_path (str): The path to the file to be read. Returns: str: The content of the file as a string. """ try: with open(file_path, "rb") as file: return file.read().decode("utf-8") except FileNotFoundError: sys.exit(f"Error: File '{file_path}' not found.") except Exception as e: sys.exit(f"Error reading file '{file_path}': {str(e)}") def write_file(file_path, content): """ Write content to a file. Args: file_path (str): The path to the file to be written. content (str): The content to write to the file. """ try: with open(file_path, "wb") as file: file.write(content.encode("utf-8")) except Exception as e: sys.exit(f"Error writing to file '{file_path}': {str(e)}") def process(input_file, map_file, pattern): """ Process input data using a pattern and a mapping dictionary and write the result to an output file. Args: input_file (str): The path to the input file. map_file (str): The path to the map file. pattern (re.Pattern): A regular expression pattern to match. """ txt = read_file(input_file) mapping = build_map(read_file(map_file)) output = pattern.sub(lambda match: replace_match(match, mapping), txt) write_file(_PATH_RES, output) if __name__ == "__main__": times = [] print(f"Starting batch of {_NB_RUN} runs...") for run, _ in enumerate(range(_NB_RUN)): start_time = time.time() process(_PATH_TXT, _PATH_MAP, _PATTERN) sys.stdout.write(f"\rRun {run + 1}/{_NB_RUN} completed") sys.stdout.flush() times.append(time.time() - start_time) print("\nExecution times:") print(f" - min: {min(times):.5f} sec") print(f" - avg: {sum(times) / _NB_RUN:.5f} sec") print(f" - max: {max(times):.5f} sec")