commit 973a0d55cba0bd28a5e65707c4df165ed4ed1eda Author: erius Date: Fri Jul 21 01:08:19 2023 +0300 First commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..06109db --- /dev/null +++ b/.gitignore @@ -0,0 +1,153 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..73f69e0 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/Lab4.iml b/.idea/Lab4.iml new file mode 100644 index 0000000..74d515a --- /dev/null +++ b/.idea/Lab4.iml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..fc76246 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,13 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..e6d264a --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..0c0d001 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/extra1.py b/extra1.py new file mode 100644 index 0000000..813b19a --- /dev/null +++ b/extra1.py @@ -0,0 +1,18 @@ +import xmltodict +import json + + +def xml_to_json(xml: str, pretty_formatting: bool) -> str: + xml_dict = xmltodict.parse(xml) + indent = 4 if pretty_formatting else None + json_str = json.dumps(xml_dict, ensure_ascii=False, indent=indent) + return json_str + + +if __name__ == "__main__": + file = open('schedule.xml', encoding='utf8') + content = file.read() + result = xml_to_json(content, True) + file = open('schedule.json', 'w', encoding='utf8') + file.writelines(result) + print(result) diff --git a/extra2.py b/extra2.py new file mode 100644 index 0000000..59d87ab --- /dev/null +++ b/extra2.py @@ -0,0 +1,27 @@ +import re + +non_parsable_parentheses = re.compile(r'(\s*)<\?(.+)\?>\s*\n') +start_nested_find = re.compile(r'(\s*)<(\w+)>\s*\n') +value_find = re.compile(r'(\s*)<(\w+)>(.*)\s*\n') +close_nested_find = re.compile(r'(\s*)\s*\n') +redundant_commas_find = re.compile(r',(\n\s*})') + + +def xml_to_json(xml: str) -> str: + json = xml + json = re.sub(non_parsable_parentheses, '', json) + json = re.sub(start_nested_find, r'\1\t"\2": {\n', json) + json = re.sub(value_find, r'\1\t"\2": "\3",\n', json) + json = re.sub(close_nested_find, r'\1\t},\n', json) + json = '{\n' + json + '}' + json = re.sub(redundant_commas_find, r'\1', json) + return json + + +if __name__ == "__main__": + file = open('schedule.xml', encoding='utf8') + contents = file.read() + result = xml_to_json(contents) + file = open('schedule.json', 'w', encoding='utf8') + file.writelines(result) + print(result) diff --git a/extra3.py b/extra3.py new file mode 100644 index 0000000..8c38b37 --- /dev/null +++ b/extra3.py @@ -0,0 +1,46 @@ +from timeit import default_timer as timer + +import extra1 +import extra2 +import main_task + +iterations = 10 + + +def time_main() -> float: + start = timer() + file = open('schedule.xml', encoding='utf8') + contents = file.read() + xml_parser = main_task.XmlParser() + for i in range(iterations): + xml_parser.parse_xml(contents) + xml_parser.load_json() + end = timer() + return end - start + + +def time_extra1() -> float: + start = timer() + file = open('schedule.xml', encoding='utf8') + content = file.read() + for i in range(iterations): + extra1.xml_to_json(content, True) + end = timer() + return end - start + + +def time_extra2() -> float: + start = timer() + file = open('schedule.xml', encoding='utf8') + contents = file.read() + for i in range(iterations): + extra2.xml_to_json(contents) + end = timer() + return end - start + + +if __name__ == "__main__": + print(f'Количество итераций: {iterations}') + print(f'Основное задание (без regex и библиотек): {time_main():.4f} секунд') + print(f'Доп. задание №1 (с помощью библиотек xmltodict и json): {time_extra1():.4f} секунд') + print(f'Доп. задание №2 (с помощью regex): {time_extra2():.4f} секунд') diff --git a/extra4.py b/extra4.py new file mode 100644 index 0000000..2a79871 --- /dev/null +++ b/extra4.py @@ -0,0 +1,37 @@ +from enum import Enum + +from main_task import XmlParser + + +class Markup(Enum): + CSV = 'csv' + TSV = 'tsv' + PROTO3 = 'proto3' + WML = 'wml' + + +def other_markups(parser: XmlParser, lang: Markup): + if lang == Markup.CSV: + parser.load_csv() + file = open('schedule.csv', 'w', encoding='utf8') + file.writelines(parser.get_csv()) + print(parser.get_csv()) + elif lang == Markup.TSV: + parser.load_tsv() + file = open('schedule.tsv', 'w', encoding='utf8') + file.writelines(parser.get_tsv()) + print(parser.get_tsv()) + elif lang == Markup.PROTO3: + pass + elif lang == Markup.WML: + pass + else: + raise ValueError('Unknown markup language ' + lang.value) + + +if __name__ == "__main__": + schedule = open('schedule.xml', encoding='utf8') + contents = schedule.read() + xml_parser = XmlParser() + xml_parser.parse_xml(contents) + other_markups(xml_parser, Markup.CSV) diff --git a/main_task.py b/main_task.py new file mode 100644 index 0000000..d38a8fe --- /dev/null +++ b/main_task.py @@ -0,0 +1,134 @@ +# Номер в ИСУ: 316304 +# Вариант: 8 + +class XmlParser: + class Node: + parent: 'Node' = None + children = None + attribute: str = '' + depth: int = 0 + + def __init__(self, parent: 'Node', children, attribute: str, depth: int): + self.parent = parent + self.children = children + self.attribute = attribute + self.depth = depth + + def __str__(self): + return f'' + + def get_full_path(self, sep: str) -> str: + attributes = [] + node = self + while node is not None: + attributes.append(node.attribute) + node = node.parent + path = sep.join(attributes[::-1]) + return path + + root: Node = None + xml = '' + json = '' + proto3 = '' + wml = '' + tsv = ['', ''] + csv = ['', ''] + + def parse_xml(self, xml: str): + self.xml = xml + node = None + depth = 0 + for line in xml.splitlines(): + line = line.strip() + if line[:2] == '') + attribute = line[1:value_start] + value = line[value_start + 1:value_end] + element = self.Node(node, value, attribute, depth) + node.children.append(element) + + def form_json(self, data: Node): + if isinstance(data.children, list): + self.json += '\t' * (data.depth + 1) + f'"{data.attribute}": ' + '{\n' + for node in data.children: + self.form_json(node) + self.json = self.json[:-2] + '\n' + '\t' * (data.depth + 1) + '},\n' + else: + self.json += '\t' * (data.depth + 1) + f'"{data.attribute}": "{data.children}",\n' + + def load_json(self): + self.json = '' + self.form_json(self.root) + self.json = '{\n' + self.json[:-2] + '\n}' + + def form_csv(self, data: Node): + if isinstance(data.children, list): + for node in data.children: + self.form_csv(node) + else: + self.csv[0] += data.get_full_path('/') + ',' + self.csv[1] += data.children + ',' + + def load_csv(self): + self.csv = ['', ''] + self.form_csv(self.root) + self.csv[0] = self.csv[0][:-1] + self.csv[1] = self.csv[1][:-1] + self.csv = '\n'.join(self.csv) + + def form_tsv(self, data: Node): + if isinstance(data.children, list): + for node in data.children: + self.form_tsv(node) + else: + self.tsv[0] += data.get_full_path('.') + '\t' + self.tsv[1] += data.children + '\t' + + def load_tsv(self): + self.tsv = ['', ''] + self.form_tsv(self.root) + self.tsv[0] = self.tsv[0][:-1] + self.tsv[1] = self.tsv[1][:-1] + self.tsv = '\n'.join(self.tsv) + + def get_json(self) -> str: + return self.json + + def get_xml(self) -> str: + return self.xml + + def get_proto3(self) -> str: + return self.proto3 + + def get_tsv(self) -> str: + return str(self.tsv) + + def get_csv(self) -> str: + return str(self.csv) + + +if __name__ == '__main__': + file = open('schedule.xml', encoding='utf8') + contents = file.read() + xml_parser = XmlParser() + xml_parser.parse_xml(contents) + xml_parser.load_json() + file = open('schedule.json', 'w', encoding='utf8') + file.writelines(xml_parser.get_json()) + print(xml_parser.get_json()) diff --git a/schedule.csv b/schedule.csv new file mode 100644 index 0000000..3d5e44a --- /dev/null +++ b/schedule.csv @@ -0,0 +1,2 @@ +schedule/day1/name,schedule/day1/lesson/name,schedule/day1/lesson/type,schedule/day1/lesson/time,schedule/day1/lesson/evenWeek,schedule/day1/lesson/room/address,schedule/day1/lesson/room/number,schedule/day1/lesson/teacher,schedule/day1/lesson/format +Вторник,Дискретная математика,Лекция,13:30-15:00,true,Кронверский пр., д.49, лит.А,285,Поляков Владимир Иванович,Очно-дистанционный \ No newline at end of file diff --git a/schedule.json b/schedule.json new file mode 100644 index 0000000..ebf330f --- /dev/null +++ b/schedule.json @@ -0,0 +1,19 @@ +{ + "schedule": { + "day1": { + "name": "Вторник", + "lesson": { + "name": "Дискретная математика", + "type": "Лекция", + "time": "13:30-15:00", + "evenWeek": "true", + "room": { + "address": "Кронверский пр., д.49, лит.А", + "number": "285" + }, + "teacher": "Поляков Владимир Иванович", + "format": "Очно-дистанционный" + } + } + } +} \ No newline at end of file diff --git a/schedule.tsv b/schedule.tsv new file mode 100644 index 0000000..8a06386 --- /dev/null +++ b/schedule.tsv @@ -0,0 +1,2 @@ +schedule.day1.name schedule.day1.lesson.name schedule.day1.lesson.type schedule.day1.lesson.time schedule.day1.lesson.evenWeek schedule.day1.lesson.room.address schedule.day1.lesson.room.number schedule.day1.lesson.teacher schedule.day1.lesson.format +Вторник Дискретная математика Лекция 13:30-15:00 true Кронверский пр., д.49, лит.А 285 Поляков Владимир Иванович Очно-дистанционный \ No newline at end of file diff --git a/schedule.xml b/schedule.xml new file mode 100644 index 0000000..887a495 --- /dev/null +++ b/schedule.xml @@ -0,0 +1,18 @@ + + + + Вторник + + Дискретная математика + Лекция + + true + +
Кронверский пр., д.49, лит.А
+ 285 +
+ Поляков Владимир Иванович + Очно-дистанционный +
+
+