2022-02-10 22:06:57 +00:00
|
|
|
//go:build !solution
|
2020-03-12 17:52:18 +00:00
|
|
|
|
|
|
|
package externalsort
|
|
|
|
|
|
|
|
import (
|
2024-06-06 23:21:38 +00:00
|
|
|
"bufio"
|
|
|
|
"container/heap"
|
2020-03-12 17:52:18 +00:00
|
|
|
"io"
|
2024-06-06 23:21:38 +00:00
|
|
|
"os"
|
|
|
|
"sort"
|
|
|
|
"strings"
|
2020-03-12 17:52:18 +00:00
|
|
|
)
|
|
|
|
|
2024-06-06 23:21:38 +00:00
|
|
|
// lineReader is simply a wrapper around bufffered reader
|
|
|
|
type lineReader struct {
|
|
|
|
br bufio.Reader
|
|
|
|
}
|
|
|
|
|
|
|
|
func (lr *lineReader) ReadLine() (string, error) {
|
|
|
|
s, err := lr.br.ReadString('\n')
|
|
|
|
if err != nil {
|
|
|
|
return s, err
|
|
|
|
}
|
|
|
|
s = s[:len(s)-1]
|
|
|
|
return s, err
|
|
|
|
}
|
|
|
|
|
|
|
|
type lineWriter struct {
|
|
|
|
w io.Writer
|
|
|
|
}
|
|
|
|
|
|
|
|
func (lw *lineWriter) Write(l string) error {
|
|
|
|
_, err := lw.w.Write(append([]byte(l), '\n'))
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-03-12 17:52:18 +00:00
|
|
|
func NewReader(r io.Reader) LineReader {
|
2024-06-06 23:21:38 +00:00
|
|
|
return &lineReader{*bufio.NewReader(r)}
|
2020-03-12 17:52:18 +00:00
|
|
|
}
|
|
|
|
|
2020-03-12 20:12:36 +00:00
|
|
|
func NewWriter(w io.Writer) LineWriter {
|
2024-06-06 23:21:38 +00:00
|
|
|
return &lineWriter{w}
|
|
|
|
}
|
|
|
|
|
|
|
|
// item for the reader heap
|
|
|
|
// stores reader to get the next line from,
|
|
|
|
// the line from reader which is used for sorting the heap
|
|
|
|
// lastLine bool which determines whether the item has no more lines to read
|
|
|
|
// and it can be safely removed from the heap
|
|
|
|
type readerHeapItem struct {
|
|
|
|
reader *LineReader
|
|
|
|
line string
|
|
|
|
lastLine bool
|
|
|
|
}
|
|
|
|
|
|
|
|
type readerHeap []readerHeapItem
|
|
|
|
|
|
|
|
func (h readerHeap) Len() int {
|
|
|
|
return len(h)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h readerHeap) Less(i, j int) bool {
|
|
|
|
return h[i].line < h[j].line
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h readerHeap) Swap(i, j int) {
|
|
|
|
h[i], h[j] = h[j], h[i]
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *readerHeap) Push(x any) {
|
|
|
|
*h = append(*h, x.(readerHeapItem))
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *readerHeap) Pop() any {
|
|
|
|
old := *h
|
|
|
|
n := len(old)
|
|
|
|
x := old[n-1]
|
|
|
|
*h = old[0 : n-1]
|
|
|
|
return x
|
|
|
|
}
|
|
|
|
|
|
|
|
// push reader back to the heap if it still has lines
|
|
|
|
// and update its lastLine value
|
|
|
|
// otherwise do nothing
|
|
|
|
// returns any errors that occured while reading (expect io.EOF)
|
|
|
|
func pushIfNotLast(h *readerHeap, reader *LineReader) error {
|
|
|
|
line, err := (*reader).ReadLine()
|
|
|
|
if err != nil {
|
|
|
|
if err != io.EOF {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
heap.Push(h, readerHeapItem{reader, line, true})
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
heap.Push(h, readerHeapItem{reader, line, false})
|
|
|
|
return nil
|
2020-03-12 17:52:18 +00:00
|
|
|
}
|
|
|
|
|
2020-03-12 20:12:36 +00:00
|
|
|
func Merge(w LineWriter, readers ...LineReader) error {
|
2024-06-06 23:21:38 +00:00
|
|
|
h := make(readerHeap, 0, len(readers))
|
|
|
|
heap.Init(&h)
|
|
|
|
// push initial values to heap
|
|
|
|
for _, reader := range readers {
|
|
|
|
err := pushIfNotLast(&h, &reader)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// write lines from the top of the heap while
|
|
|
|
// the heap is not empty
|
|
|
|
for len(h) > 0 {
|
|
|
|
it := heap.Pop(&h).(readerHeapItem)
|
|
|
|
w.Write(it.line)
|
|
|
|
if it.lastLine {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
err := pushIfNotLast(&h, it.reader)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
2020-03-12 17:52:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func Sort(w io.Writer, in ...string) error {
|
2024-06-06 23:21:38 +00:00
|
|
|
readers := make([]LineReader, 0, len(in))
|
|
|
|
for _, fn := range in {
|
|
|
|
b, err := os.ReadFile(fn)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
s := strings.TrimSuffix(string(b), "\n")
|
|
|
|
// do not sort empty files
|
|
|
|
if len(s) == 0 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
lines := strings.Split(s, "\n")
|
|
|
|
sort.Strings(lines)
|
|
|
|
s = strings.Join(lines, "\n")
|
|
|
|
readers = append(readers, NewReader(strings.NewReader(s)))
|
|
|
|
}
|
|
|
|
return Merge(NewWriter(w), readers...)
|
2020-03-12 17:52:18 +00:00
|
|
|
}
|