First commit

This commit is contained in:
Egor 2023-07-21 01:08:19 +03:00
commit 973a0d55cb
16 changed files with 505 additions and 0 deletions

153
.gitignore vendored Normal file
View file

@ -0,0 +1,153 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/

8
.idea/.gitignore vendored Normal file
View file

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# Editor-based HTTP Client requests
/httpRequests/

10
.idea/Lab4.iml Normal file
View file

@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/venv" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View file

@ -0,0 +1,13 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredIdentifiers">
<list>
<option value="list.__getitem__" />
<option value="str.replace" />
</list>
</option>
</inspection_tool>
</profile>
</component>

View file

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

4
.idea/misc.xml Normal file
View file

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (Lab4)" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml Normal file
View file

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/Lab4.iml" filepath="$PROJECT_DIR$/.idea/Lab4.iml" />
</modules>
</component>
</project>

18
extra1.py Normal file
View file

@ -0,0 +1,18 @@
import xmltodict
import json
def xml_to_json(xml: str, pretty_formatting: bool) -> str:
xml_dict = xmltodict.parse(xml)
indent = 4 if pretty_formatting else None
json_str = json.dumps(xml_dict, ensure_ascii=False, indent=indent)
return json_str
if __name__ == "__main__":
file = open('schedule.xml', encoding='utf8')
content = file.read()
result = xml_to_json(content, True)
file = open('schedule.json', 'w', encoding='utf8')
file.writelines(result)
print(result)

27
extra2.py Normal file
View file

@ -0,0 +1,27 @@
import re
non_parsable_parentheses = re.compile(r'(\s*)<\?(.+)\?>\s*\n')
start_nested_find = re.compile(r'(\s*)<(\w+)>\s*\n')
value_find = re.compile(r'(\s*)<(\w+)>(.*)</\w+>\s*\n')
close_nested_find = re.compile(r'(\s*)</\w+>\s*\n')
redundant_commas_find = re.compile(r',(\n\s*})')
def xml_to_json(xml: str) -> str:
json = xml
json = re.sub(non_parsable_parentheses, '', json)
json = re.sub(start_nested_find, r'\1\t"\2": {\n', json)
json = re.sub(value_find, r'\1\t"\2": "\3",\n', json)
json = re.sub(close_nested_find, r'\1\t},\n', json)
json = '{\n' + json + '}'
json = re.sub(redundant_commas_find, r'\1', json)
return json
if __name__ == "__main__":
file = open('schedule.xml', encoding='utf8')
contents = file.read()
result = xml_to_json(contents)
file = open('schedule.json', 'w', encoding='utf8')
file.writelines(result)
print(result)

46
extra3.py Normal file
View file

@ -0,0 +1,46 @@
from timeit import default_timer as timer
import extra1
import extra2
import main_task
iterations = 10
def time_main() -> float:
start = timer()
file = open('schedule.xml', encoding='utf8')
contents = file.read()
xml_parser = main_task.XmlParser()
for i in range(iterations):
xml_parser.parse_xml(contents)
xml_parser.load_json()
end = timer()
return end - start
def time_extra1() -> float:
start = timer()
file = open('schedule.xml', encoding='utf8')
content = file.read()
for i in range(iterations):
extra1.xml_to_json(content, True)
end = timer()
return end - start
def time_extra2() -> float:
start = timer()
file = open('schedule.xml', encoding='utf8')
contents = file.read()
for i in range(iterations):
extra2.xml_to_json(contents)
end = timer()
return end - start
if __name__ == "__main__":
print(f'Количество итераций: {iterations}')
print(f'Основное задание (без regex и библиотек): {time_main():.4f} секунд')
print(f'Доп. задание №1 (с помощью библиотек xmltodict и json): {time_extra1():.4f} секунд')
print(f'Доп. задание №2 (с помощью regex): {time_extra2():.4f} секунд')

37
extra4.py Normal file
View file

@ -0,0 +1,37 @@
from enum import Enum
from main_task import XmlParser
class Markup(Enum):
CSV = 'csv'
TSV = 'tsv'
PROTO3 = 'proto3'
WML = 'wml'
def other_markups(parser: XmlParser, lang: Markup):
if lang == Markup.CSV:
parser.load_csv()
file = open('schedule.csv', 'w', encoding='utf8')
file.writelines(parser.get_csv())
print(parser.get_csv())
elif lang == Markup.TSV:
parser.load_tsv()
file = open('schedule.tsv', 'w', encoding='utf8')
file.writelines(parser.get_tsv())
print(parser.get_tsv())
elif lang == Markup.PROTO3:
pass
elif lang == Markup.WML:
pass
else:
raise ValueError('Unknown markup language ' + lang.value)
if __name__ == "__main__":
schedule = open('schedule.xml', encoding='utf8')
contents = schedule.read()
xml_parser = XmlParser()
xml_parser.parse_xml(contents)
other_markups(xml_parser, Markup.CSV)

134
main_task.py Normal file
View file

@ -0,0 +1,134 @@
# Номер в ИСУ: 316304
# Вариант: 8
class XmlParser:
class Node:
parent: 'Node' = None
children = None
attribute: str = ''
depth: int = 0
def __init__(self, parent: 'Node', children, attribute: str, depth: int):
self.parent = parent
self.children = children
self.attribute = attribute
self.depth = depth
def __str__(self):
return f'<attribute="{self.attribute}", depth={self.depth}, parent={self.parent}, children={self.children}>'
def get_full_path(self, sep: str) -> str:
attributes = []
node = self
while node is not None:
attributes.append(node.attribute)
node = node.parent
path = sep.join(attributes[::-1])
return path
root: Node = None
xml = ''
json = ''
proto3 = ''
wml = ''
tsv = ['', '']
csv = ['', '']
def parse_xml(self, xml: str):
self.xml = xml
node = None
depth = 0
for line in xml.splitlines():
line = line.strip()
if line[:2] == '<?':
continue
value_end = line.rfind('</')
if value_end == -1:
attribute = line[1:-1]
if node is None:
node = self.Node(None, [], attribute, depth)
self.root = node
else:
element = self.Node(node, [], attribute, depth)
node.children.append(element)
node = element
depth += 1
elif value_end == 0:
node = node.parent
depth -= 1
else:
value_start = line.find('>')
attribute = line[1:value_start]
value = line[value_start + 1:value_end]
element = self.Node(node, value, attribute, depth)
node.children.append(element)
def form_json(self, data: Node):
if isinstance(data.children, list):
self.json += '\t' * (data.depth + 1) + f'"{data.attribute}": ' + '{\n'
for node in data.children:
self.form_json(node)
self.json = self.json[:-2] + '\n' + '\t' * (data.depth + 1) + '},\n'
else:
self.json += '\t' * (data.depth + 1) + f'"{data.attribute}": "{data.children}",\n'
def load_json(self):
self.json = ''
self.form_json(self.root)
self.json = '{\n' + self.json[:-2] + '\n}'
def form_csv(self, data: Node):
if isinstance(data.children, list):
for node in data.children:
self.form_csv(node)
else:
self.csv[0] += data.get_full_path('/') + ','
self.csv[1] += data.children + ','
def load_csv(self):
self.csv = ['', '']
self.form_csv(self.root)
self.csv[0] = self.csv[0][:-1]
self.csv[1] = self.csv[1][:-1]
self.csv = '\n'.join(self.csv)
def form_tsv(self, data: Node):
if isinstance(data.children, list):
for node in data.children:
self.form_tsv(node)
else:
self.tsv[0] += data.get_full_path('.') + '\t'
self.tsv[1] += data.children + '\t'
def load_tsv(self):
self.tsv = ['', '']
self.form_tsv(self.root)
self.tsv[0] = self.tsv[0][:-1]
self.tsv[1] = self.tsv[1][:-1]
self.tsv = '\n'.join(self.tsv)
def get_json(self) -> str:
return self.json
def get_xml(self) -> str:
return self.xml
def get_proto3(self) -> str:
return self.proto3
def get_tsv(self) -> str:
return str(self.tsv)
def get_csv(self) -> str:
return str(self.csv)
if __name__ == '__main__':
file = open('schedule.xml', encoding='utf8')
contents = file.read()
xml_parser = XmlParser()
xml_parser.parse_xml(contents)
xml_parser.load_json()
file = open('schedule.json', 'w', encoding='utf8')
file.writelines(xml_parser.get_json())
print(xml_parser.get_json())

2
schedule.csv Normal file
View file

@ -0,0 +1,2 @@
schedule/day1/name,schedule/day1/lesson/name,schedule/day1/lesson/type,schedule/day1/lesson/time,schedule/day1/lesson/evenWeek,schedule/day1/lesson/room/address,schedule/day1/lesson/room/number,schedule/day1/lesson/teacher,schedule/day1/lesson/format
Вторник,Дискретная математика,Лекция,13:30-15:00,true,Кронверский пр., д.49, лит.А,285,Поляков Владимир Иванович,Очно-дистанционный
1 schedule/day1/name,schedule/day1/lesson/name,schedule/day1/lesson/type,schedule/day1/lesson/time,schedule/day1/lesson/evenWeek,schedule/day1/lesson/room/address,schedule/day1/lesson/room/number,schedule/day1/lesson/teacher,schedule/day1/lesson/format
2 Вторник,Дискретная математика,Лекция,13:30-15:00,true,Кронверский пр., д.49, лит.А,285,Поляков Владимир Иванович,Очно-дистанционный

19
schedule.json Normal file
View file

@ -0,0 +1,19 @@
{
"schedule": {
"day1": {
"name": "Вторник",
"lesson": {
"name": "Дискретная математика",
"type": "Лекция",
"time": "13:30-15:00",
"evenWeek": "true",
"room": {
"address": "Кронверский пр., д.49, лит.А",
"number": "285"
},
"teacher": "Поляков Владимир Иванович",
"format": "Очно-дистанционный"
}
}
}
}

2
schedule.tsv Normal file
View file

@ -0,0 +1,2 @@
schedule.day1.name schedule.day1.lesson.name schedule.day1.lesson.type schedule.day1.lesson.time schedule.day1.lesson.evenWeek schedule.day1.lesson.room.address schedule.day1.lesson.room.number schedule.day1.lesson.teacher schedule.day1.lesson.format
Вторник Дискретная математика Лекция 13:30-15:00 true Кронверский пр., д.49, лит.А 285 Поляков Владимир Иванович Очно-дистанционный
1 schedule.day1.name schedule.day1.lesson.name schedule.day1.lesson.type schedule.day1.lesson.time schedule.day1.lesson.evenWeek schedule.day1.lesson.room.address schedule.day1.lesson.room.number schedule.day1.lesson.teacher schedule.day1.lesson.format
2 Вторник Дискретная математика Лекция 13:30-15:00 true Кронверский пр., д.49, лит.А 285 Поляков Владимир Иванович Очно-дистанционный

18
schedule.xml Normal file
View file

@ -0,0 +1,18 @@
<?xml version="1.0" encoding="UTF-8"?>
<schedule>
<day1>
<name>Вторник</name>
<lesson>
<name>Дискретная математика</name>
<type>Лекция</type>
<time>13:30-15:00</time>
<evenWeek>true</evenWeek>
<room>
<address>Кронверский пр., д.49, лит.А</address>
<number>285</number>
</room>
<teacher>Поляков Владимир Иванович</teacher>
<format>Очно-дистанционный</format>
</lesson>
</day1>
</schedule>