Removed clang-tidy check for do-while loops
Added 2 new constructors for FileTape which copy the size of existing tape into a new one Implemented external sort of a Tape using heap sort Rewritten FileTape constructors implementation to better take advantage of existing constructor calls Added const to some parameters to indicate their immutability Added unit tests for external_sort
This commit is contained in:
parent
c2f46935dc
commit
33fee958a5
11 changed files with 274 additions and 28 deletions
|
@ -1,4 +1,4 @@
|
|||
Checks: 'clang-diagnostic-*,clang-analyzer-*,cppcoreguidelines-*,modernize-*,-modernize-use-trailing-return-type,readability-*'
|
||||
Checks: 'clang-diagnostic-*,clang-analyzer-*,cppcoreguidelines-*,modernize-*,-modernize-use-trailing-return-type,readability-*,-cppcoreguidelines-avoid-do-while'
|
||||
CheckOptions:
|
||||
- { key: readability-identifier-naming.NamespaceCase, value: lower_case }
|
||||
- { key: readability-identifier-naming.ClassCase, value: CamelCase }
|
||||
|
|
|
@ -4,6 +4,7 @@ project(yadro-task VERSION 0.1 LANGUAGES CXX)
|
|||
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
set(CMAKE_BUILD_TYPE Debug)
|
||||
|
||||
# clang-tidy
|
||||
find_program(CLANG_TIDY_EXE NAMES clang-tidy REQUIRED)
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
#include <iostream>
|
||||
#include <ostream>
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
std::cout << "Hello, World!" << std::endl;
|
||||
|
|
|
@ -41,7 +41,7 @@ const static int FT_SEEK_OFFSET = FT_CELL_SIZE + 1;
|
|||
* configuration file. Each cell is represented as a plain text 32-bit unsigned
|
||||
* int, padded to FT_CELL_SIZE with leading zeroes, separated by FT_DELIMETER.
|
||||
* Each cell MUST be FT_CELL_SIZE long to correctly implement write operation
|
||||
* without re-writing the entire file. FT_DELIMETER
|
||||
* without re-writing the entire file.
|
||||
*
|
||||
* Example of a valid file for a FileTape
|
||||
* - example.txt
|
||||
|
@ -95,6 +95,24 @@ class FileTape : public Tape {
|
|||
FileTape(size_t cells, std::string file_name,
|
||||
FileTapeSettings settings = FT_DEFAULT_SETTINGS);
|
||||
|
||||
/**
|
||||
* Initializes a new instance of FileTape that will create and open a file
|
||||
* with the specified name and fill it with zeroed cells - the amount of
|
||||
* cells will be the same as the specified FileTape.
|
||||
* File must not exist and directory must be write accessible.
|
||||
*
|
||||
* This is necessary in the situations when the FileTapse size is a bigger
|
||||
* than size_t max value. Seeking will not introduce delay during this
|
||||
* operation.
|
||||
*
|
||||
* @param same_size_as FileTape, the size of which will be carried over to
|
||||
* the new FileTape
|
||||
* @param file_name Name of a file to be created and opened
|
||||
* @param settings FileTape settings
|
||||
*/
|
||||
FileTape(FileTape &same_size_as, std::string file_name,
|
||||
FileTapeSettings = FT_DEFAULT_SETTINGS);
|
||||
|
||||
/**
|
||||
* Initializes a new instance of FileTape that will create and open a
|
||||
* temporary file and fill it with *cells* amount of zeroed cells.
|
||||
|
@ -107,6 +125,23 @@ class FileTape : public Tape {
|
|||
*/
|
||||
FileTape(size_t cells, FileTapeSettings settings = FT_DEFAULT_SETTINGS);
|
||||
|
||||
/**
|
||||
* Initializes a new instance of FileTape that will create and open a
|
||||
* temporary file and fill it with zeroed cells -
|
||||
* the amount of cells will be the same as the specified FileTape. File must
|
||||
* not exist and directory must be write accessible.
|
||||
*
|
||||
* This is necessary in the situations when the FileTapse size is a bigger
|
||||
* than size_t max value. Seeking will not introduce delay during this
|
||||
* operation.
|
||||
*
|
||||
* @param same_size_as FileTape, the size of which will be carried over to
|
||||
* the new FileTape
|
||||
* @param file_name Name of a file to be created and opened
|
||||
* @param settings FileTape settings
|
||||
*/
|
||||
FileTape(FileTape &same_size_as, FileTapeSettings = FT_DEFAULT_SETTINGS);
|
||||
|
||||
/**
|
||||
* Initializes a new instance of FileTape that will create and open a
|
||||
* temporary file, and fill it with the data from the provided vector.
|
||||
|
@ -169,4 +204,4 @@ class FileTape : public Tape {
|
|||
|
||||
} // namespace tape
|
||||
|
||||
#endif
|
||||
#endif // !FILE_TAPE_H
|
||||
|
|
|
@ -62,4 +62,4 @@ class Tape {
|
|||
|
||||
} // namespace tape
|
||||
|
||||
#endif
|
||||
#endif // !TAPE_H
|
||||
|
|
|
@ -1,12 +1,47 @@
|
|||
#ifndef TAPE_UTIL_H
|
||||
#define TAPE_UTIL_H
|
||||
|
||||
#include "filetape.h"
|
||||
#include "tape.h"
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
using std::unique_ptr;
|
||||
|
||||
namespace tape {
|
||||
|
||||
void sort(Tape &input, Tape &output);
|
||||
/**
|
||||
* Lambda, which takes the amount of cells a tape should have and returns
|
||||
* a unique pointer to a temporary Tape with the specified amount of cells
|
||||
*/
|
||||
using TempTapeFactory = std::function<unique_ptr<Tape>(size_t)>;
|
||||
|
||||
/**
|
||||
* TempTapeFactory that creates a new temporary FileTape
|
||||
*/
|
||||
const static inline TempTapeFactory FILETAPE_FACTORY =
|
||||
[](size_t cells) -> unique_ptr<Tape> {
|
||||
return std::make_unique<FileTape>(FileTape(cells));
|
||||
};
|
||||
|
||||
/**
|
||||
* External sort for Tape. Reads *ram_limit* cells into memory, sorts them,
|
||||
* writes them to a temporary Tape, does this until all data from input Tape is
|
||||
* sorted and put into temporary Tapes, then merges temporary Tapes sorted data
|
||||
* into output Tape. Heap sort is preffered due to reduction in seek operations.
|
||||
*
|
||||
* @param input Tape, data of which should be sorted
|
||||
* @param output Tape, where the sorted data is going to be written
|
||||
* @param tmp_tape_factory Lambda expression that defines how new temporary
|
||||
* Tapes are created
|
||||
* @param sort_limit Max amount of cells that can be loaded into memory to use
|
||||
* while sorting. Set to SIZE_MAX by default
|
||||
*/
|
||||
void external_sort(Tape &input, Tape &output, TempTapeFactory tmp_tape_factory,
|
||||
size_t sort_limit = SIZE_MAX);
|
||||
|
||||
} // namespace tape
|
||||
|
||||
#endif
|
||||
#endif // !TAPE_UTIL_H
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
#include "filetape.h"
|
||||
#include <cstddef>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
|
@ -17,37 +19,50 @@ std::string generate_tmp_file_name() {
|
|||
return tmp_file_name.data();
|
||||
}
|
||||
|
||||
void fill_file_with_empty_cells(size_t cells, fstream &file) {
|
||||
file << std::setfill('0') << std::setw(tape::FT_CELL_SIZE) << '0';
|
||||
for (size_t i = 1; i < cells; i++) {
|
||||
file << tape::FT_DELIMETER << std::setfill('0')
|
||||
<< std::setw(tape::FT_CELL_SIZE) << '0';
|
||||
}
|
||||
file.seekg(0, fstream::beg);
|
||||
}
|
||||
|
||||
FileTape::FileTape(std::string file_name, FileTapeSettings settings)
|
||||
FileTape::FileTape(const std::string file_name, const FileTapeSettings settings)
|
||||
: settings(settings), file_name(file_name) {
|
||||
this->file = fstream(file_name);
|
||||
}
|
||||
|
||||
FileTape::FileTape(size_t cells, std::string file_name,
|
||||
FileTapeSettings settings)
|
||||
FileTape::FileTape(const size_t cells, const std::string file_name,
|
||||
const FileTapeSettings settings)
|
||||
: FileTape(file_name, settings) {
|
||||
this->file =
|
||||
fstream(this->file_name, fstream::in | fstream::out | fstream::trunc);
|
||||
fill_file_with_empty_cells(cells, this->file);
|
||||
this->file << std::setfill('0') << std::setw(tape::FT_CELL_SIZE) << '0';
|
||||
for (size_t i = 1; i < cells; i++) {
|
||||
this->file << FT_DELIMETER << std::setfill('0')
|
||||
<< std::setw(FT_CELL_SIZE) << '0';
|
||||
}
|
||||
this->file.seekg(0, fstream::beg);
|
||||
}
|
||||
|
||||
FileTape::FileTape(size_t cells, FileTapeSettings settings)
|
||||
: settings(settings), tmp(true) {
|
||||
this->file_name = generate_tmp_file_name();
|
||||
FileTape::FileTape(const size_t cells, const FileTapeSettings settings)
|
||||
: tape::FileTape(cells, generate_tmp_file_name(), settings) {
|
||||
this->tmp = true;
|
||||
}
|
||||
|
||||
FileTape::FileTape(FileTape &same_size_as, const std::string file_name,
|
||||
const FileTapeSettings settings)
|
||||
: settings(settings), file_name(file_name) {
|
||||
this->file =
|
||||
fstream(this->file_name, fstream::in | fstream::out | fstream::trunc);
|
||||
fill_file_with_empty_cells(cells, this->file);
|
||||
this->file << std::setfill('0') << std::setw(tape::FT_CELL_SIZE) << '0';
|
||||
while (same_size_as.seek_forward()) {
|
||||
file << tape::FT_DELIMETER << std::setfill('0')
|
||||
<< std::setw(tape::FT_CELL_SIZE) << '0';
|
||||
}
|
||||
this->file.seekg(0, fstream::beg);
|
||||
same_size_as.file.seekg(0, fstream::beg);
|
||||
}
|
||||
|
||||
FileTape::FileTape(const std::vector<uint32_t> &data, FileTapeSettings settings)
|
||||
FileTape::FileTape(FileTape &same_size_as, const FileTapeSettings settings)
|
||||
: tape::FileTape(same_size_as, generate_tmp_file_name(), settings) {
|
||||
this->tmp = true;
|
||||
}
|
||||
|
||||
FileTape::FileTape(const std::vector<uint32_t> &data,
|
||||
const FileTapeSettings settings)
|
||||
: settings(settings), tmp(true) {
|
||||
this->file_name = generate_tmp_file_name();
|
||||
this->file =
|
||||
|
@ -98,7 +113,7 @@ uint32_t FileTape::read() {
|
|||
return data;
|
||||
}
|
||||
|
||||
void FileTape::write(uint32_t data) {
|
||||
void FileTape::write(const uint32_t data) {
|
||||
std::this_thread::sleep_for(this->settings.write_delay);
|
||||
std::stringstream cell;
|
||||
cell << std::setfill('0') << std::setw(FT_CELL_SIZE)
|
||||
|
|
|
@ -1,3 +1,104 @@
|
|||
#include "tape_util.h"
|
||||
#include <algorithm>
|
||||
|
||||
void tape::sort(Tape &input, Tape &output) {}
|
||||
using std::unique_ptr;
|
||||
using std::vector;
|
||||
using tape::Tape;
|
||||
|
||||
struct TapeHeapNode {
|
||||
unique_ptr<Tape> tape;
|
||||
uint32_t value;
|
||||
};
|
||||
|
||||
class TapeHeapNodeComparator {
|
||||
public:
|
||||
bool operator()(const TapeHeapNode &obj1, const TapeHeapNode &obj2) {
|
||||
return obj1.value > obj2.value;
|
||||
}
|
||||
};
|
||||
|
||||
// min heap implementation for tape heap nodes
|
||||
// cant use priority queue since you cant pop the element and push it back
|
||||
// without doing some pointer magic
|
||||
class TapeHeap {
|
||||
// define comparator to use in heap
|
||||
constexpr static TapeHeapNodeComparator COMPARATOR =
|
||||
TapeHeapNodeComparator();
|
||||
|
||||
private:
|
||||
vector<TapeHeapNode> elements;
|
||||
|
||||
public:
|
||||
void push(TapeHeapNode element) {
|
||||
elements.push_back(std::move(element));
|
||||
std::ranges::push_heap(elements, COMPARATOR);
|
||||
}
|
||||
|
||||
TapeHeapNode pop() {
|
||||
std::ranges::pop_heap(elements, COMPARATOR);
|
||||
TapeHeapNode element = std::move(elements.back());
|
||||
elements.pop_back();
|
||||
return element;
|
||||
}
|
||||
|
||||
bool empty() { return this->elements.empty(); }
|
||||
};
|
||||
|
||||
void tape::external_sort(Tape &input, Tape &output,
|
||||
TempTapeFactory tmp_tape_factory,
|
||||
const size_t sort_limit) {
|
||||
vector<uint32_t> data_buf;
|
||||
TapeHeap heap;
|
||||
bool input_ended = false;
|
||||
bool all_data_in_memory = false;
|
||||
while (!input_ended) {
|
||||
for (size_t i = 0; i < sort_limit; i++) {
|
||||
data_buf.push_back(input.read());
|
||||
if (!input.seek_forward()) {
|
||||
input_ended = true;
|
||||
// if all input data was put into buffer in one go,
|
||||
// prevent creation of external Tapes by settings
|
||||
// all_data_in_memory flag to true
|
||||
if (heap.empty()) {
|
||||
all_data_in_memory = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (all_data_in_memory) {
|
||||
std::ranges::sort(data_buf);
|
||||
for (uint32_t num : data_buf) {
|
||||
output.write(num);
|
||||
output.seek_forward();
|
||||
}
|
||||
return;
|
||||
}
|
||||
// in order to avoid seeking twice as much in tmp tapes - sort the data
|
||||
// in descending order. this way - the data will be read as though it is
|
||||
// ascending, because we will be reading it from the end, while seeking
|
||||
// backwards
|
||||
std::ranges::sort(data_buf, std::greater());
|
||||
unique_ptr<Tape> tmp_tape = tmp_tape_factory(sort_limit);
|
||||
tmp_tape->write(data_buf[0]);
|
||||
for (size_t i = 1; i < data_buf.size(); i++) {
|
||||
tmp_tape->seek_forward();
|
||||
tmp_tape->write(data_buf[i]);
|
||||
}
|
||||
heap.push(TapeHeapNode{.tape = std::move(tmp_tape),
|
||||
.value = data_buf[data_buf.size() - 1]});
|
||||
data_buf.clear();
|
||||
}
|
||||
// take a top tape node from the heap, write its value into the output,
|
||||
// update the value in the node and push it back. if a node is out of values
|
||||
// - discard it. do this until heap is empty. all the elements will
|
||||
// eventually end up sorted in the output
|
||||
while (!heap.empty()) {
|
||||
TapeHeapNode node = heap.pop();
|
||||
output.write(node.value);
|
||||
output.seek_forward();
|
||||
if (node.tape->seek_backwards()) {
|
||||
node.value = node.tape->read();
|
||||
heap.push(std::move(node)); // move is required to avoid copying
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,3 +5,9 @@ target_link_libraries(filetape_tests PRIVATE tapelib)
|
|||
target_link_libraries(filetape_tests PRIVATE Catch2::Catch2WithMain)
|
||||
|
||||
add_test(filetape_tests filetape_tests)
|
||||
|
||||
add_executable(filetape_sort_tests ${PROJECT_SOURCE_DIR}/tests/filetape_sort_tests.cpp)
|
||||
target_link_libraries(filetape_sort_tests PRIVATE tapelib)
|
||||
target_link_libraries(filetape_sort_tests PRIVATE Catch2::Catch2WithMain)
|
||||
|
||||
add_test(filetape_sort_tests filetape_sort_tests)
|
||||
|
|
55
tests/filetape_sort_tests.cpp
Normal file
55
tests/filetape_sort_tests.cpp
Normal file
|
@ -0,0 +1,55 @@
|
|||
#include "tape_util.h"
|
||||
#include <algorithm>
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
|
||||
const static std::vector<uint32_t> TEST_DATA = {123, 26, 87, 266, 111, 234,
|
||||
6, 63, 28, 1, 90, 33};
|
||||
const static std::vector<uint32_t> TEST_DATA_SORTED = {
|
||||
1, 6, 26, 28, 33, 63, 87, 90, 111, 123, 234, 266};
|
||||
|
||||
std::vector<uint32_t> read_from_tape_backwards(tape::Tape &tape) {
|
||||
std::vector<uint32_t> data;
|
||||
do {
|
||||
data.push_back(tape.read());
|
||||
} while (tape.seek_backwards());
|
||||
std::ranges::reverse(data);
|
||||
return data;
|
||||
}
|
||||
|
||||
// NOLINTBEGIN(*-magic-numbers)
|
||||
TEST_CASE("Sorting FileTape with external sort", "[sort]") {
|
||||
tape::FileTape input(TEST_DATA);
|
||||
tape::FileTape output(input);
|
||||
|
||||
SECTION("Sorting with no limitations") {
|
||||
tape::external_sort(input, output, tape::FILETAPE_FACTORY);
|
||||
REQUIRE(read_from_tape_backwards(output) == TEST_DATA_SORTED);
|
||||
}
|
||||
|
||||
SECTION("Sorting with 4 elements in memory limit") {
|
||||
tape::external_sort(input, output, tape::FILETAPE_FACTORY, 4);
|
||||
REQUIRE(read_from_tape_backwards(output) == TEST_DATA_SORTED);
|
||||
}
|
||||
|
||||
SECTION("Sorting with 9 elements in memory limit") {
|
||||
tape::external_sort(input, output, tape::FILETAPE_FACTORY, 9);
|
||||
REQUIRE(read_from_tape_backwards(output) == TEST_DATA_SORTED);
|
||||
}
|
||||
|
||||
SECTION("Sorting with vector size elements in memory limit") {
|
||||
tape::external_sort(input, output, tape::FILETAPE_FACTORY,
|
||||
TEST_DATA.size());
|
||||
REQUIRE(read_from_tape_backwards(output) == TEST_DATA_SORTED);
|
||||
}
|
||||
|
||||
SECTION("Sorting with 1000 elements in memory limit") {
|
||||
tape::external_sort(input, output, tape::FILETAPE_FACTORY, 1000);
|
||||
REQUIRE(read_from_tape_backwards(output) == TEST_DATA_SORTED);
|
||||
}
|
||||
|
||||
SECTION("Sorting with 1 elements in memory limit") {
|
||||
tape::external_sort(input, output, tape::FILETAPE_FACTORY, 1);
|
||||
REQUIRE(read_from_tape_backwards(output) == TEST_DATA_SORTED);
|
||||
}
|
||||
}
|
||||
// NOLINTEND(*-magic-numbers)
|
|
@ -1,6 +1,5 @@
|
|||
#include "filetape.h"
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
const static std::vector<uint32_t> TEST_DATA = {1, 12345, 0, 2222222222,
|
||||
|
|
Loading…
Reference in a new issue