Utils: Reimplement util.GetRandomString to avoid modulo bias (#64481)

* reimplement GetRandomString, add tests that results are unbiased
This commit is contained in:
Dan Cech 2023-04-21 00:24:02 +10:00 committed by GitHub
parent c903d1546f
commit 7e765c870a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 117 additions and 13 deletions

View File

@ -13,22 +13,39 @@ import (
"golang.org/x/crypto/pbkdf2"
)
// GetRandomString generate random string by specify chars.
// source: https://github.com/gogits/gogs/blob/9ee80e3e5426821f03a4e99fad34418f5c736413/modules/base/tool.go#L58
func GetRandomString(n int, alphabets ...byte) (string, error) {
const alphanum = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
var bytes = make([]byte, n)
if _, err := rand.Read(bytes); err != nil {
return "", err
}
const alphanum = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
for i, b := range bytes {
if len(alphabets) == 0 {
bytes[i] = alphanum[b%byte(len(alphanum))]
} else {
bytes[i] = alphabets[b%byte(len(alphabets))]
// GetRandomString generates a random alphanumeric string of the specified length,
// optionally using only specified characters
func GetRandomString(n int, alphabets ...byte) (string, error) {
chars := alphanum
if len(alphabets) > 0 {
chars = string(alphabets)
}
cnt := len(chars)
max := 255 / cnt * cnt
bytes := make([]byte, n)
randread := n * 5 / 4
randbytes := make([]byte, randread)
for i := 0; i < n; {
if _, err := rand.Read(randbytes); err != nil {
return "", err
}
for j := 0; i < n && j < randread; j++ {
b := int(randbytes[j])
if b >= max {
continue
}
bytes[i] = chars[b%cnt]
i++
}
}
return string(bytes), nil
}

View File

@ -1,6 +1,7 @@
package util
import (
"math"
"strings"
"testing"
@ -130,3 +131,89 @@ func TestDecodeQuotedPrintable(t *testing.T) {
assert.Equal(t, str_out, val)
})
}
func TestGetRandomString(t *testing.T) {
charset := "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
chars := len(charset)
length := 20
rounds := 50_000
// Generate random strings and count the frequency of each character
m := make(map[string]int)
for i := 0; i < rounds; i++ {
r, err := GetRandomString(length)
require.NoError(t, err)
for _, c := range r {
m[string(c)]++
}
}
// Find lowest and highest frequencies
min := rounds * length
max := 0
// Calculate chi-squared statistic
expected := float64(rounds) * float64(length) / float64(chars)
chiSquared := 0.0
for _, char := range charset {
if m[string(char)] < min {
min = m[string(char)]
}
if m[string(char)] > max {
max = m[string(char)]
}
chiSquared += math.Pow(float64(m[string(char)])-expected, 2) / expected
}
// Ensure there is no more than 10% variance between lowest and highest frequency characters
assert.LessOrEqual(t, float64(max-min)/float64(min), 0.1, "Variance between lowest and highest frequency characters must be no more than 10%")
// Ensure chi-squared value is lower than the critical bound
// 99.99% probability for 61 degrees of freedom
assert.Less(t, chiSquared, 110.8397, "Chi squared value must be less than the 99.99% critical bound")
}
func TestGetRandomDigits(t *testing.T) {
charset := "0123456789"
chars := len(charset)
length := 20
rounds := 50_000
// Generate random strings and count the frequency of each character
m := make(map[string]int)
for i := 0; i < rounds; i++ {
r, err := GetRandomString(length, []byte(charset)...)
require.NoError(t, err)
for _, c := range r {
m[string(c)]++
}
}
// Find lowest and highest frequencies
min := rounds * length
max := 0
// Calculate chi-squared statistic
expected := float64(rounds) * float64(length) / float64(chars)
chiSquared := 0.0
for _, char := range charset {
if m[string(char)] < min {
min = m[string(char)]
}
if m[string(char)] > max {
max = m[string(char)]
}
chiSquared += math.Pow(float64(m[string(char)])-expected, 2) / expected
}
// Ensure there is no more than 10% variance between lowest and highest frequency characters
assert.LessOrEqual(t, float64(max-min)/float64(min), 0.1, "Variance between lowest and highest frequency characters must be no more than 10%")
// Ensure chi-squared value is lower than the critical bound
// 99.99% probability for 9 degrees of freedom
assert.Less(t, chiSquared, 33.7199, "Chi squared value must be less than the 99.99% critical bound")
}