From ffd4745b1a129f2bdd5a8737ec79ec3137c50758 Mon Sep 17 00:00:00 2001 From: erius Date: Fri, 7 Jun 2024 02:21:38 +0300 Subject: [PATCH] Implemented externalsort --- externalsort/sort.go | 128 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 124 insertions(+), 4 deletions(-) diff --git a/externalsort/sort.go b/externalsort/sort.go index b9c05b3..b2a5cce 100644 --- a/externalsort/sort.go +++ b/externalsort/sort.go @@ -3,21 +3,141 @@ package externalsort import ( + "bufio" + "container/heap" "io" + "os" + "sort" + "strings" ) +// lineReader is simply a wrapper around bufffered reader +type lineReader struct { + br bufio.Reader +} + +func (lr *lineReader) ReadLine() (string, error) { + s, err := lr.br.ReadString('\n') + if err != nil { + return s, err + } + s = s[:len(s)-1] + return s, err +} + +type lineWriter struct { + w io.Writer +} + +func (lw *lineWriter) Write(l string) error { + _, err := lw.w.Write(append([]byte(l), '\n')) + return err +} + func NewReader(r io.Reader) LineReader { - panic("implement me") + return &lineReader{*bufio.NewReader(r)} } func NewWriter(w io.Writer) LineWriter { - panic("implement me") + return &lineWriter{w} +} + +// item for the reader heap +// stores reader to get the next line from, +// the line from reader which is used for sorting the heap +// lastLine bool which determines whether the item has no more lines to read +// and it can be safely removed from the heap +type readerHeapItem struct { + reader *LineReader + line string + lastLine bool +} + +type readerHeap []readerHeapItem + +func (h readerHeap) Len() int { + return len(h) +} + +func (h readerHeap) Less(i, j int) bool { + return h[i].line < h[j].line +} + +func (h readerHeap) Swap(i, j int) { + h[i], h[j] = h[j], h[i] +} + +func (h *readerHeap) Push(x any) { + *h = append(*h, x.(readerHeapItem)) +} + +func (h *readerHeap) Pop() any { + old := *h + n := len(old) + x := old[n-1] + *h = old[0 : n-1] + return x +} + +// push reader back to the heap if it still has lines +// and update its lastLine value +// otherwise do nothing +// returns any errors that occured while reading (expect io.EOF) +func pushIfNotLast(h *readerHeap, reader *LineReader) error { + line, err := (*reader).ReadLine() + if err != nil { + if err != io.EOF { + return err + } + heap.Push(h, readerHeapItem{reader, line, true}) + return nil + } + heap.Push(h, readerHeapItem{reader, line, false}) + return nil } func Merge(w LineWriter, readers ...LineReader) error { - panic("implement me") + h := make(readerHeap, 0, len(readers)) + heap.Init(&h) + // push initial values to heap + for _, reader := range readers { + err := pushIfNotLast(&h, &reader) + if err != nil { + return err + } + } + // write lines from the top of the heap while + // the heap is not empty + for len(h) > 0 { + it := heap.Pop(&h).(readerHeapItem) + w.Write(it.line) + if it.lastLine { + continue + } + err := pushIfNotLast(&h, it.reader) + if err != nil { + return err + } + } + return nil } func Sort(w io.Writer, in ...string) error { - panic("implement me") + readers := make([]LineReader, 0, len(in)) + for _, fn := range in { + b, err := os.ReadFile(fn) + if err != nil { + return err + } + s := strings.TrimSuffix(string(b), "\n") + // do not sort empty files + if len(s) == 0 { + continue + } + lines := strings.Split(s, "\n") + sort.Strings(lines) + s = strings.Join(lines, "\n") + readers = append(readers, NewReader(strings.NewReader(s))) + } + return Merge(NewWriter(w), readers...) }