import re non_parsable_parentheses = re.compile(r'(\s*)<\?(.+)\?>\s*\n') start_nested_find = re.compile(r'(\s*)<(\w+)>\s*\n') value_find = re.compile(r'(\s*)<(\w+)>(.*)\s*\n') close_nested_find = re.compile(r'(\s*)\s*\n') redundant_commas_find = re.compile(r',(\n\s*})') def xml_to_json(xml: str) -> str: json = xml json = re.sub(non_parsable_parentheses, '', json) json = re.sub(start_nested_find, r'\1\t"\2": {\n', json) json = re.sub(value_find, r'\1\t"\2": "\3",\n', json) json = re.sub(close_nested_find, r'\1\t},\n', json) json = '{\n' + json + '}' json = re.sub(redundant_commas_find, r'\1', json) return json if __name__ == "__main__": file = open('schedule.xml', encoding='utf8') contents = file.read() result = xml_to_json(contents) file = open('schedule.json', 'w', encoding='utf8') file.writelines(result) print(result)