From 300704d0c14d67be3ad68a050838064e44cc9e90 Mon Sep 17 00:00:00 2001 From: erius Date: Mon, 3 Jun 2024 04:29:30 +0300 Subject: [PATCH] Implemented spacecollapse, optimized reverse --- utf8/reverse/reverse.go | 28 +++++++++++++++++++++++- utf8/spacecollapse/collapse.go | 40 +++++++++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/utf8/reverse/reverse.go b/utf8/reverse/reverse.go index 38d066c..43e9330 100644 --- a/utf8/reverse/reverse.go +++ b/utf8/reverse/reverse.go @@ -2,6 +2,32 @@ package reverse +import ( + "strings" + "unicode/utf8" +) + +// inplace []rune reverse with 2 allocations +// first for runes slice and second for output +// func Reverse(input string) string { +// runes := []rune(input) +// for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 { +// runes[i], runes[j] = runes[j], runes[i] +// } +// return string(runes) +// } + +// string.Builder and utf8 decoding magic = 1 allocation +// overall more iterations but each one is faster, +// so its as fast as the inplace []rune reverse func Reverse(input string) string { - return "" + output := strings.Builder{} + output.Grow(len(input)) + end := len(input) + for end > 0 { + r, n := utf8.DecodeLastRuneInString(input[:end]) + end -= n + output.WriteRune(r) + } + return output.String() } diff --git a/utf8/spacecollapse/collapse.go b/utf8/spacecollapse/collapse.go index 2af1244..25da29f 100644 --- a/utf8/spacecollapse/collapse.go +++ b/utf8/spacecollapse/collapse.go @@ -2,6 +2,44 @@ package spacecollapse +import ( + "strings" + "unicode" +) + +// 1 alloc/op, best ns/op but more iterations overall func CollapseSpaces(input string) string { - return "" + output := strings.Builder{} + output.Grow(len(input)) + writeSpace := true + for _, r := range input { + if unicode.IsSpace(r) { + if writeSpace { + output.WriteRune(' ') + } + writeSpace = false + } else { + output.WriteRune(r) + writeSpace = true + } + } + return output.String() } + +// 3 allocs/op for runes slice, output slice and output string +// func CollapseSpaces(input string) string { +// runes := []rune(input) +// output, writeSpace := make([]rune, 0, len(runes)), true +// for _, r := range input { +// if unicode.IsSpace(r) { +// if writeSpace { +// output = append(output, ' ') +// } +// writeSpace = false +// } else { +// output = append(output, r) +// writeSpace = true +// } +// } +// return string(output) +// }