First commit

2023-07-21 01:08:19 +03:00 · 2023-07-21 01:08:19 +03:00 · 973a0d55cb
commit 973a0d55cb
16 changed files with 505 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,153 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
+# Editor-based HTTP Client requests
+/httpRequests/
--- a/.idea/Lab4.iml
+++ b/.idea/Lab4.iml
@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/venv" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@ -0,0 +1,13 @@
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredIdentifiers">
+        <list>
+          <option value="list.__getitem__" />
+          <option value="str.replace" />
+        </list>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (Lab4)" project-jdk-type="Python SDK" />
+</project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/Lab4.iml" filepath="$PROJECT_DIR$/.idea/Lab4.iml" />
+    </modules>
+  </component>
+</project>
--- a/extra1.py
+++ b/extra1.py
@ -0,0 +1,18 @@
+import xmltodict
+import json
+
+
+def xml_to_json(xml: str, pretty_formatting: bool) -> str:
+    xml_dict = xmltodict.parse(xml)
+    indent = 4 if pretty_formatting else None
+    json_str = json.dumps(xml_dict, ensure_ascii=False, indent=indent)
+    return json_str
+
+
+if __name__ == "__main__":
+    file = open('schedule.xml', encoding='utf8')
+    content = file.read()
+    result = xml_to_json(content, True)
+    file = open('schedule.json', 'w', encoding='utf8')
+    file.writelines(result)
+    print(result)
--- a/extra2.py
+++ b/extra2.py
@ -0,0 +1,27 @@
+import re
+
+non_parsable_parentheses = re.compile(r'(\s*)<\?(.+)\?>\s*\n')
+start_nested_find = re.compile(r'(\s*)<(\w+)>\s*\n')
+value_find = re.compile(r'(\s*)<(\w+)>(.*)</\w+>\s*\n')
+close_nested_find = re.compile(r'(\s*)</\w+>\s*\n')
+redundant_commas_find = re.compile(r',(\n\s*})')
+
+
+def xml_to_json(xml: str) -> str:
+    json = xml
+    json = re.sub(non_parsable_parentheses, '', json)
+    json = re.sub(start_nested_find, r'\1\t"\2": {\n', json)
+    json = re.sub(value_find, r'\1\t"\2": "\3",\n', json)
+    json = re.sub(close_nested_find, r'\1\t},\n', json)
+    json = '{\n' + json + '}'
+    json = re.sub(redundant_commas_find, r'\1', json)
+    return json
+
+
+if __name__ == "__main__":
+    file = open('schedule.xml', encoding='utf8')
+    contents = file.read()
+    result = xml_to_json(contents)
+    file = open('schedule.json', 'w', encoding='utf8')
+    file.writelines(result)
+    print(result)
--- a/extra3.py
+++ b/extra3.py
@ -0,0 +1,46 @@
+from timeit import default_timer as timer
+
+import extra1
+import extra2
+import main_task
+
+iterations = 10
+
+
+def time_main() -> float:
+    start = timer()
+    file = open('schedule.xml', encoding='utf8')
+    contents = file.read()
+    xml_parser = main_task.XmlParser()
+    for i in range(iterations):
+        xml_parser.parse_xml(contents)
+        xml_parser.load_json()
+    end = timer()
+    return end - start
+
+
+def time_extra1() -> float:
+    start = timer()
+    file = open('schedule.xml', encoding='utf8')
+    content = file.read()
+    for i in range(iterations):
+        extra1.xml_to_json(content, True)
+    end = timer()
+    return end - start
+
+
+def time_extra2() -> float:
+    start = timer()
+    file = open('schedule.xml', encoding='utf8')
+    contents = file.read()
+    for i in range(iterations):
+        extra2.xml_to_json(contents)
+    end = timer()
+    return end - start
+
+
+if __name__ == "__main__":
+    print(f'Количество итераций: {iterations}')
+    print(f'Основное задание (без regex и библиотек):               {time_main():.4f} секунд')
+    print(f'Доп. задание №1 (с помощью библиотек xmltodict и json): {time_extra1():.4f} секунд')
+    print(f'Доп. задание №2 (с помощью regex):                      {time_extra2():.4f} секунд')
--- a/extra4.py
+++ b/extra4.py
@ -0,0 +1,37 @@
+from enum import Enum
+
+from main_task import XmlParser
+
+
+class Markup(Enum):
+    CSV = 'csv'
+    TSV = 'tsv'
+    PROTO3 = 'proto3'
+    WML = 'wml'
+
+
+def other_markups(parser: XmlParser, lang: Markup):
+    if lang == Markup.CSV:
+        parser.load_csv()
+        file = open('schedule.csv', 'w', encoding='utf8')
+        file.writelines(parser.get_csv())
+        print(parser.get_csv())
+    elif lang == Markup.TSV:
+        parser.load_tsv()
+        file = open('schedule.tsv', 'w', encoding='utf8')
+        file.writelines(parser.get_tsv())
+        print(parser.get_tsv())
+    elif lang == Markup.PROTO3:
+        pass
+    elif lang == Markup.WML:
+        pass
+    else:
+        raise ValueError('Unknown markup language ' + lang.value)
+
+
+if __name__ == "__main__":
+    schedule = open('schedule.xml', encoding='utf8')
+    contents = schedule.read()
+    xml_parser = XmlParser()
+    xml_parser.parse_xml(contents)
+    other_markups(xml_parser, Markup.CSV)
--- a/main_task.py
+++ b/main_task.py
@ -0,0 +1,134 @@
+# Номер в ИСУ: 316304
+# Вариант: 8
+
+class XmlParser:
+    class Node:
+        parent: 'Node' = None
+        children = None
+        attribute: str = ''
+        depth: int = 0
+
+        def __init__(self, parent: 'Node', children, attribute: str, depth: int):
+            self.parent = parent
+            self.children = children
+            self.attribute = attribute
+            self.depth = depth
+
+        def __str__(self):
+            return f'<attribute="{self.attribute}", depth={self.depth}, parent={self.parent}, children={self.children}>'
+
+        def get_full_path(self, sep: str) -> str:
+            attributes = []
+            node = self
+            while node is not None:
+                attributes.append(node.attribute)
+                node = node.parent
+            path = sep.join(attributes[::-1])
+            return path
+
+    root: Node = None
+    xml = ''
+    json = ''
+    proto3 = ''
+    wml = ''
+    tsv = ['', '']
+    csv = ['', '']
+
+    def parse_xml(self, xml: str):
+        self.xml = xml
+        node = None
+        depth = 0
+        for line in xml.splitlines():
+            line = line.strip()
+            if line[:2] == '<?':
+                continue
+            value_end = line.rfind('</')
+            if value_end == -1:
+                attribute = line[1:-1]
+                if node is None:
+                    node = self.Node(None, [], attribute, depth)
+                    self.root = node
+                else:
+                    element = self.Node(node, [], attribute, depth)
+                    node.children.append(element)
+                    node = element
+                depth += 1
+            elif value_end == 0:
+                node = node.parent
+                depth -= 1
+            else:
+                value_start = line.find('>')
+                attribute = line[1:value_start]
+                value = line[value_start + 1:value_end]
+                element = self.Node(node, value, attribute, depth)
+                node.children.append(element)
+
+    def form_json(self, data: Node):
+        if isinstance(data.children, list):
+            self.json += '\t' * (data.depth + 1) + f'"{data.attribute}": ' + '{\n'
+            for node in data.children:
+                self.form_json(node)
+            self.json = self.json[:-2] + '\n' + '\t' * (data.depth + 1) + '},\n'
+        else:
+            self.json += '\t' * (data.depth + 1) + f'"{data.attribute}": "{data.children}",\n'
+
+    def load_json(self):
+        self.json = ''
+        self.form_json(self.root)
+        self.json = '{\n' + self.json[:-2] + '\n}'
+
+    def form_csv(self, data: Node):
+        if isinstance(data.children, list):
+            for node in data.children:
+                self.form_csv(node)
+        else:
+            self.csv[0] += data.get_full_path('/') + ','
+            self.csv[1] += data.children + ','
+
+    def load_csv(self):
+        self.csv = ['', '']
+        self.form_csv(self.root)
+        self.csv[0] = self.csv[0][:-1]
+        self.csv[1] = self.csv[1][:-1]
+        self.csv = '\n'.join(self.csv)
+
+    def form_tsv(self, data: Node):
+        if isinstance(data.children, list):
+            for node in data.children:
+                self.form_tsv(node)
+        else:
+            self.tsv[0] += data.get_full_path('.') + '\t'
+            self.tsv[1] += data.children + '\t'
+
+    def load_tsv(self):
+        self.tsv = ['', '']
+        self.form_tsv(self.root)
+        self.tsv[0] = self.tsv[0][:-1]
+        self.tsv[1] = self.tsv[1][:-1]
+        self.tsv = '\n'.join(self.tsv)
+
+    def get_json(self) -> str:
+        return self.json
+
+    def get_xml(self) -> str:
+        return self.xml
+
+    def get_proto3(self) -> str:
+        return self.proto3
+
+    def get_tsv(self) -> str:
+        return str(self.tsv)
+
+    def get_csv(self) -> str:
+        return str(self.csv)
+
+
+if __name__ == '__main__':
+    file = open('schedule.xml', encoding='utf8')
+    contents = file.read()
+    xml_parser = XmlParser()
+    xml_parser.parse_xml(contents)
+    xml_parser.load_json()
+    file = open('schedule.json', 'w', encoding='utf8')
+    file.writelines(xml_parser.get_json())
+    print(xml_parser.get_json())
--- a/schedule.csv
+++ b/schedule.csv
@ -0,0 +1,2 @@
+schedule/day1/name,schedule/day1/lesson/name,schedule/day1/lesson/type,schedule/day1/lesson/time,schedule/day1/lesson/evenWeek,schedule/day1/lesson/room/address,schedule/day1/lesson/room/number,schedule/day1/lesson/teacher,schedule/day1/lesson/format
+Вторник,Дискретная математика,Лекция,13:30-15:00,true,Кронверский пр., д.49, лит.А,285,Поляков Владимир Иванович,Очно-дистанционный
--- a/schedule.json
+++ b/schedule.json
@ -0,0 +1,19 @@
+{
+	"schedule": {
+		"day1": {
+			"name": "Вторник",
+			"lesson": {
+				"name": "Дискретная математика",
+				"type": "Лекция",
+				"time": "13:30-15:00",
+				"evenWeek": "true",
+				"room": {
+					"address": "Кронверский пр., д.49, лит.А",
+					"number": "285"
+				},
+				"teacher": "Поляков Владимир Иванович",
+				"format": "Очно-дистанционный"
+			}
+		}
+	}
+}
--- a/schedule.tsv
+++ b/schedule.tsv
@ -0,0 +1,2 @@
+schedule.day1.name	schedule.day1.lesson.name	schedule.day1.lesson.type	schedule.day1.lesson.time	schedule.day1.lesson.evenWeek	schedule.day1.lesson.room.address	schedule.day1.lesson.room.number	schedule.day1.lesson.teacher	schedule.day1.lesson.format
+Вторник	Дискретная математика	Лекция	13:30-15:00	true	Кронверский пр., д.49, лит.А	285	Поляков Владимир Иванович	Очно-дистанционный
--- a/schedule.xml
+++ b/schedule.xml
@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<schedule>
+    <day1>
+        <name>Вторник</name>
+        <lesson>
+            <name>Дискретная математика</name>
+            <type>Лекция</type>
+            <time>13:30-15:00</time>
+            <evenWeek>true</evenWeek>
+            <room>
+                <address>Кронверский пр., д.49, лит.А</address>
+                <number>285</number>
+            </room>
+            <teacher>Поляков Владимир Иванович</teacher>
+            <format>Очно-дистанционный</format>
+        </lesson>
+    </day1>
+</schedule>