Implemented externalsort
This commit is contained in:
parent
6fe051eea5
commit
ffd4745b1a
1 changed files with 124 additions and 4 deletions
|
@ -3,21 +3,141 @@
|
|||
package externalsort
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"container/heap"
|
||||
"io"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// lineReader is simply a wrapper around bufffered reader
|
||||
type lineReader struct {
|
||||
br bufio.Reader
|
||||
}
|
||||
|
||||
func (lr *lineReader) ReadLine() (string, error) {
|
||||
s, err := lr.br.ReadString('\n')
|
||||
if err != nil {
|
||||
return s, err
|
||||
}
|
||||
s = s[:len(s)-1]
|
||||
return s, err
|
||||
}
|
||||
|
||||
type lineWriter struct {
|
||||
w io.Writer
|
||||
}
|
||||
|
||||
func (lw *lineWriter) Write(l string) error {
|
||||
_, err := lw.w.Write(append([]byte(l), '\n'))
|
||||
return err
|
||||
}
|
||||
|
||||
func NewReader(r io.Reader) LineReader {
|
||||
panic("implement me")
|
||||
return &lineReader{*bufio.NewReader(r)}
|
||||
}
|
||||
|
||||
func NewWriter(w io.Writer) LineWriter {
|
||||
panic("implement me")
|
||||
return &lineWriter{w}
|
||||
}
|
||||
|
||||
// item for the reader heap
|
||||
// stores reader to get the next line from,
|
||||
// the line from reader which is used for sorting the heap
|
||||
// lastLine bool which determines whether the item has no more lines to read
|
||||
// and it can be safely removed from the heap
|
||||
type readerHeapItem struct {
|
||||
reader *LineReader
|
||||
line string
|
||||
lastLine bool
|
||||
}
|
||||
|
||||
type readerHeap []readerHeapItem
|
||||
|
||||
func (h readerHeap) Len() int {
|
||||
return len(h)
|
||||
}
|
||||
|
||||
func (h readerHeap) Less(i, j int) bool {
|
||||
return h[i].line < h[j].line
|
||||
}
|
||||
|
||||
func (h readerHeap) Swap(i, j int) {
|
||||
h[i], h[j] = h[j], h[i]
|
||||
}
|
||||
|
||||
func (h *readerHeap) Push(x any) {
|
||||
*h = append(*h, x.(readerHeapItem))
|
||||
}
|
||||
|
||||
func (h *readerHeap) Pop() any {
|
||||
old := *h
|
||||
n := len(old)
|
||||
x := old[n-1]
|
||||
*h = old[0 : n-1]
|
||||
return x
|
||||
}
|
||||
|
||||
// push reader back to the heap if it still has lines
|
||||
// and update its lastLine value
|
||||
// otherwise do nothing
|
||||
// returns any errors that occured while reading (expect io.EOF)
|
||||
func pushIfNotLast(h *readerHeap, reader *LineReader) error {
|
||||
line, err := (*reader).ReadLine()
|
||||
if err != nil {
|
||||
if err != io.EOF {
|
||||
return err
|
||||
}
|
||||
heap.Push(h, readerHeapItem{reader, line, true})
|
||||
return nil
|
||||
}
|
||||
heap.Push(h, readerHeapItem{reader, line, false})
|
||||
return nil
|
||||
}
|
||||
|
||||
func Merge(w LineWriter, readers ...LineReader) error {
|
||||
panic("implement me")
|
||||
h := make(readerHeap, 0, len(readers))
|
||||
heap.Init(&h)
|
||||
// push initial values to heap
|
||||
for _, reader := range readers {
|
||||
err := pushIfNotLast(&h, &reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// write lines from the top of the heap while
|
||||
// the heap is not empty
|
||||
for len(h) > 0 {
|
||||
it := heap.Pop(&h).(readerHeapItem)
|
||||
w.Write(it.line)
|
||||
if it.lastLine {
|
||||
continue
|
||||
}
|
||||
err := pushIfNotLast(&h, it.reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func Sort(w io.Writer, in ...string) error {
|
||||
panic("implement me")
|
||||
readers := make([]LineReader, 0, len(in))
|
||||
for _, fn := range in {
|
||||
b, err := os.ReadFile(fn)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s := strings.TrimSuffix(string(b), "\n")
|
||||
// do not sort empty files
|
||||
if len(s) == 0 {
|
||||
continue
|
||||
}
|
||||
lines := strings.Split(s, "\n")
|
||||
sort.Strings(lines)
|
||||
s = strings.Join(lines, "\n")
|
||||
readers = append(readers, NewReader(strings.NewReader(s)))
|
||||
}
|
||||
return Merge(NewWriter(w), readers...)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue