Merge branch '13-external-sort' into 'master'
Resolve "external sort" Closes #13 See merge request slon/shad-go-private!16
This commit is contained in:
commit
ae450e3267
35 changed files with 409 additions and 0 deletions
58
externalsort/README.md
Normal file
58
externalsort/README.md
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
## externalsort
|
||||||
|
|
||||||
|
В этой задаче нужно написать однопроходную внешнюю сортировку слиянием.
|
||||||
|
Моделируется ситуация, в которой, данные расположены на внешних устройствах и суммарно не вмещаются в оперативную память,
|
||||||
|
но каждый кусочек по-отдельности вмещается.
|
||||||
|
|
||||||
|
Задача разбита на 3 составные части.
|
||||||
|
|
||||||
|
#### Reader & writer
|
||||||
|
|
||||||
|
Реализовать интерфейсы для построчного чтения/записи строк:
|
||||||
|
```
|
||||||
|
type LineReader interface {
|
||||||
|
ReadLine() (string, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type LineWriter interface {
|
||||||
|
Write(l string) error
|
||||||
|
}
|
||||||
|
```
|
||||||
|
и два конструктора:
|
||||||
|
```
|
||||||
|
func NewReader(r io.Reader) LineReader
|
||||||
|
func NewWriter(w io.Writer) LineWriter
|
||||||
|
```
|
||||||
|
|
||||||
|
`NewLineReader` оборачивает переданный `io.Reader` в `LineReader`.
|
||||||
|
|
||||||
|
Вызов `ReadLine` должен читать одну строку.
|
||||||
|
Строка имеет произвольную длину.
|
||||||
|
Конец строки определяется переводом строки ('\n').
|
||||||
|
Непустая последовательность символов после последнего перевода строки также считается строкой.
|
||||||
|
|
||||||
|
`ReadLine` должен возращать `io.EOF` при достижении конца файла.
|
||||||
|
|
||||||
|
#### Merge
|
||||||
|
|
||||||
|
Функция слияния произвольного количества отсортированных групп строк:
|
||||||
|
```
|
||||||
|
func Merge(w LineWriter, readers ...LineReader) error
|
||||||
|
```
|
||||||
|
|
||||||
|
`Merge` по необходимости читает из reader'ов и пишет во writer.
|
||||||
|
|
||||||
|
#### Sort
|
||||||
|
|
||||||
|
```
|
||||||
|
Sort(w io.Writer, in ...string) error
|
||||||
|
```
|
||||||
|
|
||||||
|
Функция принимает на вход произвольное количество файлов, каждый их которых помещается в оперативную память,
|
||||||
|
а также writer для записи результата.
|
||||||
|
|
||||||
|
Результаты сортировки отдельных файлов можно записывать поверх входных данных.
|
||||||
|
|
||||||
|
### Ссылки
|
||||||
|
|
||||||
|
* container/heap: https://golang.org/pkg/container/heap/
|
11
externalsort/io.go
Normal file
11
externalsort/io.go
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
// +build !change
|
||||||
|
|
||||||
|
package externalsort
|
||||||
|
|
||||||
|
type LineReader interface {
|
||||||
|
ReadLine() (string, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type LineWriter interface {
|
||||||
|
Write(l string) error
|
||||||
|
}
|
167
externalsort/io_test.go
Normal file
167
externalsort/io_test.go
Normal file
|
@ -0,0 +1,167 @@
|
||||||
|
package externalsort
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"testing/iotest"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func newStringReader(s string) LineReader {
|
||||||
|
return NewReader(strings.NewReader(s))
|
||||||
|
}
|
||||||
|
|
||||||
|
func readAll(r LineReader) (lines []string, err error) {
|
||||||
|
for {
|
||||||
|
l, err := r.ReadLine()
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, io.EOF) {
|
||||||
|
return lines, nil
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
lines = append(lines, l)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLineReader(t *testing.T) {
|
||||||
|
type Wrapper func(r io.Reader) io.Reader
|
||||||
|
|
||||||
|
for _, tc := range []struct {
|
||||||
|
name string
|
||||||
|
in string
|
||||||
|
wrappers []Wrapper
|
||||||
|
expected []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "empty",
|
||||||
|
in: "",
|
||||||
|
expected: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "one-row",
|
||||||
|
in: "abc",
|
||||||
|
expected: []string{"abc"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "linebreak",
|
||||||
|
in: `abc
|
||||||
|
|
||||||
|
`,
|
||||||
|
expected: []string{"abc", ""},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "multiple-rows",
|
||||||
|
in: `a
|
||||||
|
|
||||||
|
b
|
||||||
|
b
|
||||||
|
`,
|
||||||
|
expected: []string{"a", "", "b", "b"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "large-row",
|
||||||
|
in: strings.Repeat("a", 4097),
|
||||||
|
expected: []string{strings.Repeat("a", 4097)},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "huge-row",
|
||||||
|
in: strings.Repeat("a", 65537),
|
||||||
|
expected: []string{strings.Repeat("a", 65537)},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "half-reader",
|
||||||
|
in: strings.Repeat("a", 1025),
|
||||||
|
wrappers: []Wrapper{iotest.HalfReader},
|
||||||
|
expected: []string{strings.Repeat("a", 1025)},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "eof",
|
||||||
|
in: strings.Repeat("a", 1025),
|
||||||
|
wrappers: []Wrapper{iotest.DataErrReader},
|
||||||
|
expected: []string{strings.Repeat("a", 1025)},
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
var r io.Reader
|
||||||
|
r = strings.NewReader(tc.in)
|
||||||
|
for _, w := range tc.wrappers {
|
||||||
|
r = w(r)
|
||||||
|
}
|
||||||
|
|
||||||
|
lineReader := NewReader(r)
|
||||||
|
|
||||||
|
lines, err := readAll(lineReader)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Len(t, lines, len(tc.expected),
|
||||||
|
"expected: %+v, got: %+v", tc.expected, lines)
|
||||||
|
require.Equal(t, tc.expected, lines)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type brokenReader int
|
||||||
|
|
||||||
|
func (r brokenReader) Read(data []byte) (n int, err error) {
|
||||||
|
return 0, errors.New("read is broken")
|
||||||
|
}
|
||||||
|
|
||||||
|
type eofReader int
|
||||||
|
|
||||||
|
func (r eofReader) Read(p []byte) (n int, err error) {
|
||||||
|
return 0, io.EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLineReader_error(t *testing.T) {
|
||||||
|
_, err := NewReader(new(brokenReader)).ReadLine()
|
||||||
|
require.Error(t, err)
|
||||||
|
require.False(t, errors.Is(err, io.EOF))
|
||||||
|
|
||||||
|
r := NewReader(new(eofReader))
|
||||||
|
_, err = r.ReadLine()
|
||||||
|
require.True(t, errors.Is(err, io.EOF))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLineWriter(t *testing.T) {
|
||||||
|
for _, tc := range []struct {
|
||||||
|
name string
|
||||||
|
lines []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "empty",
|
||||||
|
lines: []string{""},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "simple",
|
||||||
|
lines: []string{"a", "b", "c"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "large-line",
|
||||||
|
lines: []string{strings.Repeat("xx", 2049), "x", "y"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "huge-line",
|
||||||
|
lines: []string{strings.Repeat("?", 65537), "?", "!"},
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
w := bufio.NewWriter(&buf)
|
||||||
|
lw := NewWriter(w)
|
||||||
|
|
||||||
|
for _, l := range tc.lines {
|
||||||
|
require.NoError(t, lw.Write(l))
|
||||||
|
}
|
||||||
|
|
||||||
|
require.NoError(t, w.Flush())
|
||||||
|
expected := strings.Join(tc.lines, "\n") + "\n"
|
||||||
|
require.Equal(t, expected, buf.String())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
23
externalsort/sort.go
Normal file
23
externalsort/sort.go
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
// +build !solution
|
||||||
|
|
||||||
|
package externalsort
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
)
|
||||||
|
|
||||||
|
func NewReader(r io.Reader) LineReader {
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewWriter(w io.Writer) LineWriter {
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func Merge(w LineWriter, readers ...LineReader) error {
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func Sort(w io.Writer, in ...string) error {
|
||||||
|
panic("implement me")
|
||||||
|
}
|
150
externalsort/sort_test.go
Normal file
150
externalsort/sort_test.go
Normal file
|
@ -0,0 +1,150 @@
|
||||||
|
package externalsort
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
"gitlab.com/slon/shad-go/tools/testtool"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestMerge(t *testing.T) {
|
||||||
|
for _, tc := range []struct {
|
||||||
|
name string
|
||||||
|
in []string
|
||||||
|
out string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "simple",
|
||||||
|
in: []string{`0`, `1
|
||||||
|
1
|
||||||
|
1`},
|
||||||
|
out: `0
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// Merge believes lines are read in sorted order.
|
||||||
|
name: "single-unsorted-file",
|
||||||
|
in: []string{`1
|
||||||
|
0`},
|
||||||
|
out: `1
|
||||||
|
0
|
||||||
|
`,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
out := &bytes.Buffer{}
|
||||||
|
w := bufio.NewWriter(out)
|
||||||
|
lw := NewWriter(w)
|
||||||
|
|
||||||
|
var readers []LineReader
|
||||||
|
for _, s := range tc.in {
|
||||||
|
readers = append(readers, newStringReader(s))
|
||||||
|
}
|
||||||
|
|
||||||
|
err := Merge(lw, readers...)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.NoError(t, w.Flush())
|
||||||
|
require.Equal(t, tc.out, out.String())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSort_fileNotFound(t *testing.T) {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
err := Sort(&buf, testtool.RandomName())
|
||||||
|
require.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSort(t *testing.T) {
|
||||||
|
testDir := path.Join("./testdata", "sort")
|
||||||
|
|
||||||
|
readTestCase := func(dir string) (in []string, out string) {
|
||||||
|
files, err := ioutil.ReadDir(dir)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
for _, f := range files {
|
||||||
|
if strings.HasPrefix(f.Name(), "in") {
|
||||||
|
in = append(in, path.Join(dir, f.Name()))
|
||||||
|
}
|
||||||
|
if f.Name() == "out.txt" {
|
||||||
|
out = path.Join(dir, f.Name())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, d := range listDirs(t, testDir) {
|
||||||
|
testCaseDir := path.Join(testDir, d)
|
||||||
|
|
||||||
|
t.Run(d, func(t *testing.T) {
|
||||||
|
tmpDir, err := ioutil.TempDir("", fmt.Sprintf("sort%s-", d))
|
||||||
|
require.NoError(t, err)
|
||||||
|
defer func() { _ = os.RemoveAll(tmpDir) }()
|
||||||
|
|
||||||
|
in, out := readTestCase(testCaseDir)
|
||||||
|
in = copyFiles(t, in, tmpDir)
|
||||||
|
|
||||||
|
var buf bytes.Buffer
|
||||||
|
w := bufio.NewWriter(&buf)
|
||||||
|
require.NoError(t, Sort(w, in...))
|
||||||
|
|
||||||
|
expected, err := ioutil.ReadFile(out)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.NoError(t, w.Flush())
|
||||||
|
require.Equal(t, string(expected), buf.String())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func listDirs(t *testing.T, dir string) []string {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
files, err := ioutil.ReadDir(dir)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var dirs []string
|
||||||
|
for _, f := range files {
|
||||||
|
if f.IsDir() {
|
||||||
|
dirs = append(dirs, f.Name())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return dirs
|
||||||
|
}
|
||||||
|
|
||||||
|
func copyFiles(t *testing.T, in []string, dir string) []string {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
var ret []string
|
||||||
|
for _, f := range in {
|
||||||
|
ret = append(ret, copyFile(t, f, dir))
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
func copyFile(t *testing.T, f, dir string) string {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
data, err := ioutil.ReadFile(f)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
dst := path.Join(dir, path.Base(f))
|
||||||
|
err = ioutil.WriteFile(dst, data, 0644)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
return dst
|
||||||
|
}
|
BIN
externalsort/testdata/sort/1/in1.txt
vendored
Normal file
BIN
externalsort/testdata/sort/1/in1.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/1/out.txt
vendored
Normal file
BIN
externalsort/testdata/sort/1/out.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/2/in1.txt
vendored
Normal file
BIN
externalsort/testdata/sort/2/in1.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/2/out.txt
vendored
Normal file
BIN
externalsort/testdata/sort/2/out.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/3/in1.txt
vendored
Normal file
BIN
externalsort/testdata/sort/3/in1.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/3/in2.txt
vendored
Normal file
BIN
externalsort/testdata/sort/3/in2.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/3/in3.txt
vendored
Normal file
BIN
externalsort/testdata/sort/3/in3.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/3/out.txt
vendored
Normal file
BIN
externalsort/testdata/sort/3/out.txt
vendored
Normal file
Binary file not shown.
0
externalsort/testdata/sort/4/in1.txt
vendored
Normal file
0
externalsort/testdata/sort/4/in1.txt
vendored
Normal file
0
externalsort/testdata/sort/4/in2.txt
vendored
Normal file
0
externalsort/testdata/sort/4/in2.txt
vendored
Normal file
0
externalsort/testdata/sort/4/out.txt
vendored
Normal file
0
externalsort/testdata/sort/4/out.txt
vendored
Normal file
BIN
externalsort/testdata/sort/5/in1.txt
vendored
Normal file
BIN
externalsort/testdata/sort/5/in1.txt
vendored
Normal file
Binary file not shown.
0
externalsort/testdata/sort/5/in2.txt
vendored
Normal file
0
externalsort/testdata/sort/5/in2.txt
vendored
Normal file
BIN
externalsort/testdata/sort/5/in3.txt
vendored
Normal file
BIN
externalsort/testdata/sort/5/in3.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/5/out.txt
vendored
Normal file
BIN
externalsort/testdata/sort/5/out.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/6/in1.txt
vendored
Normal file
BIN
externalsort/testdata/sort/6/in1.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/6/in2.txt
vendored
Normal file
BIN
externalsort/testdata/sort/6/in2.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/6/in3.txt
vendored
Normal file
BIN
externalsort/testdata/sort/6/in3.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/6/out.txt
vendored
Normal file
BIN
externalsort/testdata/sort/6/out.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/7/in1.txt
vendored
Normal file
BIN
externalsort/testdata/sort/7/in1.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/7/in10.txt
vendored
Normal file
BIN
externalsort/testdata/sort/7/in10.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/7/in2.txt
vendored
Normal file
BIN
externalsort/testdata/sort/7/in2.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/7/in3.txt
vendored
Normal file
BIN
externalsort/testdata/sort/7/in3.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/7/in4.txt
vendored
Normal file
BIN
externalsort/testdata/sort/7/in4.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/7/in5.txt
vendored
Normal file
BIN
externalsort/testdata/sort/7/in5.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/7/in6.txt
vendored
Normal file
BIN
externalsort/testdata/sort/7/in6.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/7/in7.txt
vendored
Normal file
BIN
externalsort/testdata/sort/7/in7.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/7/in8.txt
vendored
Normal file
BIN
externalsort/testdata/sort/7/in8.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/7/in9.txt
vendored
Normal file
BIN
externalsort/testdata/sort/7/in9.txt
vendored
Normal file
Binary file not shown.
BIN
externalsort/testdata/sort/7/out.txt
vendored
Normal file
BIN
externalsort/testdata/sort/7/out.txt
vendored
Normal file
Binary file not shown.
Loading…
Reference in a new issue