Browse Source
* update go-structr library -> v0.6.0, add necessary wrapping types + code changes to support these changes
* update readme with go-structr package changes
* improved wrapping of the SliceCache type
* add code comments for the cache wrapper types
* remove test.out 😇
---------
Co-authored-by: tobi <31960611+tsmethurst@users.noreply.github.com>
pull/2799/head
62 changed files with 2318 additions and 5763 deletions
@ -1,52 +0,0 @@
|
||||
// GoToSocial
|
||||
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package cache |
||||
|
||||
import ( |
||||
"slices" |
||||
|
||||
"codeberg.org/gruf/go-cache/v3/simple" |
||||
) |
||||
|
||||
// SliceCache wraps a simple.Cache to provide simple loader-callback
|
||||
// functions for fetching + caching slices of objects (e.g. IDs).
|
||||
type SliceCache[T any] struct { |
||||
*simple.Cache[string, []T] |
||||
} |
||||
|
||||
// Load will attempt to load an existing slice from the cache for the given key, else calling the provided load function and caching the result.
|
||||
func (c *SliceCache[T]) Load(key string, load func() ([]T, error)) ([]T, error) { |
||||
// Look for follow IDs list in cache under this key.
|
||||
data, ok := c.Get(key) |
||||
|
||||
if !ok { |
||||
var err error |
||||
|
||||
// Not cached, load!
|
||||
data, err = load() |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
// Store the data.
|
||||
c.Set(key, data) |
||||
} |
||||
|
||||
// Return data clone for safety.
|
||||
return slices.Clone(data), nil |
||||
} |
||||
@ -0,0 +1,214 @@
|
||||
// GoToSocial
|
||||
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package cache |
||||
|
||||
import ( |
||||
"slices" |
||||
|
||||
"codeberg.org/gruf/go-cache/v3/simple" |
||||
"codeberg.org/gruf/go-structr" |
||||
) |
||||
|
||||
// SliceCache wraps a simple.Cache to provide simple loader-callback
|
||||
// functions for fetching + caching slices of objects (e.g. IDs).
|
||||
type SliceCache[T any] struct { |
||||
cache simple.Cache[string, []T] |
||||
} |
||||
|
||||
// Init initializes the cache with given length + capacity.
|
||||
func (c *SliceCache[T]) Init(len, cap int) { |
||||
c.cache = simple.Cache[string, []T]{} |
||||
c.cache.Init(len, cap) |
||||
} |
||||
|
||||
// Load will attempt to load an existing slice from cache for key, else calling load function and caching the result.
|
||||
func (c *SliceCache[T]) Load(key string, load func() ([]T, error)) ([]T, error) { |
||||
// Look for cached values.
|
||||
data, ok := c.cache.Get(key) |
||||
|
||||
if !ok { |
||||
var err error |
||||
|
||||
// Not cached, load!
|
||||
data, err = load() |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
// Store the data.
|
||||
c.cache.Set(key, data) |
||||
} |
||||
|
||||
// Return data clone for safety.
|
||||
return slices.Clone(data), nil |
||||
} |
||||
|
||||
// Invalidate: see simple.Cache{}.InvalidateAll().
|
||||
func (c *SliceCache[T]) Invalidate(keys ...string) { |
||||
_ = c.cache.InvalidateAll(keys...) |
||||
} |
||||
|
||||
// Trim: see simple.Cache{}.Trim().
|
||||
func (c *SliceCache[T]) Trim(perc float64) { |
||||
c.cache.Trim(perc) |
||||
} |
||||
|
||||
// Clear: see simple.Cache{}.Clear().
|
||||
func (c *SliceCache[T]) Clear() { |
||||
c.cache.Clear() |
||||
} |
||||
|
||||
// Len: see simple.Cache{}.Len().
|
||||
func (c *SliceCache[T]) Len() int { |
||||
return c.cache.Len() |
||||
} |
||||
|
||||
// Cap: see simple.Cache{}.Cap().
|
||||
func (c *SliceCache[T]) Cap() int { |
||||
return c.cache.Cap() |
||||
} |
||||
|
||||
// StructCache wraps a structr.Cache{} to simple index caching
|
||||
// by name (also to ease update to library version that introduced
|
||||
// this). (in the future it may be worth embedding these indexes by
|
||||
// name under the main database caches struct which would reduce
|
||||
// time required to access cached values).
|
||||
type StructCache[StructType any] struct { |
||||
cache structr.Cache[StructType] |
||||
index map[string]*structr.Index |
||||
} |
||||
|
||||
// Init initializes the cache with given structr.CacheConfig{}.
|
||||
func (c *StructCache[T]) Init(config structr.CacheConfig[T]) { |
||||
c.index = make(map[string]*structr.Index, len(config.Indices)) |
||||
c.cache = structr.Cache[T]{} |
||||
c.cache.Init(config) |
||||
for _, cfg := range config.Indices { |
||||
c.index[cfg.Fields] = c.cache.Index(cfg.Fields) |
||||
} |
||||
} |
||||
|
||||
// GetOne calls structr.Cache{}.GetOne(), using a cached structr.Index{} by 'index' name.
|
||||
// Note: this also handles conversion of the untyped (any) keys to structr.Key{} via structr.Index{}.
|
||||
func (c *StructCache[T]) GetOne(index string, key ...any) (T, bool) { |
||||
i := c.index[index] |
||||
return c.cache.GetOne(i, i.Key(key...)) |
||||
} |
||||
|
||||
// Get calls structr.Cache{}.Get(), using a cached structr.Index{} by 'index' name.
|
||||
// Note: this also handles conversion of the untyped (any) keys to structr.Key{} via structr.Index{}.
|
||||
func (c *StructCache[T]) Get(index string, keys ...[]any) []T { |
||||
i := c.index[index] |
||||
return c.cache.Get(i, i.Keys(keys...)...) |
||||
} |
||||
|
||||
// Put: see structr.Cache{}.Put().
|
||||
func (c *StructCache[T]) Put(values ...T) { |
||||
c.cache.Put(values...) |
||||
} |
||||
|
||||
// LoadOne calls structr.Cache{}.LoadOne(), using a cached structr.Index{} by 'index' name.
|
||||
// Note: this also handles conversion of the untyped (any) keys to structr.Key{} via structr.Index{}.
|
||||
func (c *StructCache[T]) LoadOne(index string, load func() (T, error), key ...any) (T, error) { |
||||
i := c.index[index] |
||||
return c.cache.LoadOne(i, i.Key(key...), load) |
||||
} |
||||
|
||||
// LoadIDs calls structr.Cache{}.Load(), using a cached structr.Index{} by 'index' name. Note: this also handles
|
||||
// conversion of the ID strings to structr.Key{} via structr.Index{}. Strong typing is used for caller convenience.
|
||||
//
|
||||
// If you need to load multiple cache keys other than by ID strings, please create another convenience wrapper.
|
||||
func (c *StructCache[T]) LoadIDs(index string, ids []string, load func([]string) ([]T, error)) ([]T, error) { |
||||
i := c.index[index] |
||||
if i == nil { |
||||
// we only perform this check here as
|
||||
// we're going to use the index before
|
||||
// passing it to cache in main .Load().
|
||||
panic("missing index for cache type") |
||||
} |
||||
|
||||
// Generate cache keys for ID types.
|
||||
keys := make([]structr.Key, len(ids)) |
||||
for x, id := range ids { |
||||
keys[x] = i.Key(id) |
||||
} |
||||
|
||||
// Pass loader callback with wrapper onto main cache load function.
|
||||
return c.cache.Load(i, keys, func(uncached []structr.Key) ([]T, error) { |
||||
uncachedIDs := make([]string, len(uncached)) |
||||
for i := range uncached { |
||||
uncachedIDs[i] = uncached[i].Values()[0].(string) |
||||
} |
||||
return load(uncachedIDs) |
||||
}) |
||||
} |
||||
|
||||
// Store: see structr.Cache{}.Store().
|
||||
func (c *StructCache[T]) Store(value T, store func() error) error { |
||||
return c.cache.Store(value, store) |
||||
} |
||||
|
||||
// Invalidate calls structr.Cache{}.Invalidate(), using a cached structr.Index{} by 'index' name.
|
||||
// Note: this also handles conversion of the untyped (any) keys to structr.Key{} via structr.Index{}.
|
||||
func (c *StructCache[T]) Invalidate(index string, key ...any) { |
||||
i := c.index[index] |
||||
c.cache.Invalidate(i, i.Key(key...)) |
||||
} |
||||
|
||||
// InvalidateIDs calls structr.Cache{}.Invalidate(), using a cached structr.Index{} by 'index' name. Note: this also
|
||||
// handles conversion of the ID strings to structr.Key{} via structr.Index{}. Strong typing is used for caller convenience.
|
||||
//
|
||||
// If you need to invalidate multiple cache keys other than by ID strings, please create another convenience wrapper.
|
||||
func (c *StructCache[T]) InvalidateIDs(index string, ids []string) { |
||||
i := c.index[index] |
||||
if i == nil { |
||||
// we only perform this check here as
|
||||
// we're going to use the index before
|
||||
// passing it to cache in main .Load().
|
||||
panic("missing index for cache type") |
||||
} |
||||
|
||||
// Generate cache keys for ID types.
|
||||
keys := make([]structr.Key, len(ids)) |
||||
for x, id := range ids { |
||||
keys[x] = i.Key(id) |
||||
} |
||||
|
||||
// Pass to main invalidate func.
|
||||
c.cache.Invalidate(i, keys...) |
||||
} |
||||
|
||||
// Trim: see structr.Cache{}.Trim().
|
||||
func (c *StructCache[T]) Trim(perc float64) { |
||||
c.cache.Trim(perc) |
||||
} |
||||
|
||||
// Clear: see structr.Cache{}.Clear().
|
||||
func (c *StructCache[T]) Clear() { |
||||
c.cache.Clear() |
||||
} |
||||
|
||||
// Len: see structr.Cache{}.Len().
|
||||
func (c *StructCache[T]) Len() int { |
||||
return c.cache.Len() |
||||
} |
||||
|
||||
// Cap: see structr.Cache{}.Cap().
|
||||
func (c *StructCache[T]) Cap() int { |
||||
return c.cache.Cap() |
||||
} |
||||
@ -0,0 +1,9 @@
|
||||
MIT License |
||||
|
||||
Copyright (c) 2023 gruf |
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: |
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. |
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
@ -0,0 +1,41 @@
|
||||
# go-mangler |
||||
|
||||
[Documentation](https://pkg.go.dev/codeberg.org/gruf/go-mangler). |
||||
|
||||
To put it simply is a bit of an odd library. It aims to provide incredibly fast, unique string outputs for all default supported input data types during a given runtime instance. |
||||
|
||||
It is useful, for example, for use as part of larger abstractions involving hashmaps. That was my particular usecase anyways... |
||||
|
||||
This package does make liberal use of the "unsafe" package. |
||||
|
||||
Benchmarks are below. Those with missing values panicked during our set of benchmarks, usually a case of not handling nil values elegantly. Please note the more important thing to notice here is the relative difference in benchmark scores, the actual `ns/op`,`B/op`,`allocs/op` accounts for running through over 80 possible test cases, including some not-ideal situations. |
||||
|
||||
The choice of libraries in the benchmark are just a selection of libraries that could be used in a similar manner to this one, i.e. serializing in some manner. |
||||
|
||||
``` |
||||
go test -run=none -benchmem -gcflags=all='-l=4' -bench=.* |
||||
goos: linux |
||||
goarch: amd64 |
||||
pkg: codeberg.org/gruf/go-mangler |
||||
cpu: 11th Gen Intel(R) Core(TM) i7-1185G7 @ 3.00GHz |
||||
BenchmarkMangle |
||||
BenchmarkMangle-8 877761 1323 ns/op 0 B/op 0 allocs/op |
||||
BenchmarkMangleKnown |
||||
BenchmarkMangleKnown-8 1462954 814.5 ns/op 0 B/op 0 allocs/op |
||||
BenchmarkJSON |
||||
BenchmarkJSON-8 199930 5910 ns/op 2698 B/op 119 allocs/op |
||||
BenchmarkLoosy |
||||
BenchmarkLoosy-8 307575 3718 ns/op 664 B/op 53 allocs/op |
||||
BenchmarkBinary |
||||
BenchmarkBinary-8 413216 2640 ns/op 3824 B/op 116 allocs/op |
||||
BenchmarkFmt |
||||
BenchmarkFmt-8 133429 8568 ns/op 3010 B/op 207 allocs/op |
||||
BenchmarkFxmackerCbor |
||||
BenchmarkFxmackerCbor-8 258562 4268 ns/op 2118 B/op 134 allocs/op |
||||
BenchmarkMitchellhHashStructure |
||||
BenchmarkMitchellhHashStructure-8 88941 13049 ns/op 10269 B/op 1096 allocs/op |
||||
BenchmarkCnfStructhash |
||||
BenchmarkCnfStructhash-8 5586 179537 ns/op 290373 B/op 5863 allocs/op |
||||
PASS |
||||
ok codeberg.org/gruf/go-mangler 12.469s |
||||
``` |
||||
@ -0,0 +1,250 @@
|
||||
package mangler |
||||
|
||||
import ( |
||||
"reflect" |
||||
"unsafe" |
||||
|
||||
"github.com/modern-go/reflect2" |
||||
) |
||||
|
||||
type ( |
||||
byteser interface{ Bytes() []byte } |
||||
stringer interface{ String() string } |
||||
binarymarshaler interface{ MarshalBinary() ([]byte, error) } |
||||
textmarshaler interface{ MarshalText() ([]byte, error) } |
||||
jsonmarshaler interface{ MarshalJSON() ([]byte, error) } |
||||
) |
||||
|
||||
func append_uint16(b []byte, u uint16) []byte { |
||||
return append(b, // LE
|
||||
byte(u), |
||||
byte(u>>8), |
||||
) |
||||
} |
||||
|
||||
func append_uint32(b []byte, u uint32) []byte { |
||||
return append(b, // LE
|
||||
byte(u), |
||||
byte(u>>8), |
||||
byte(u>>16), |
||||
byte(u>>24), |
||||
) |
||||
} |
||||
|
||||
func append_uint64(b []byte, u uint64) []byte { |
||||
return append(b, // LE
|
||||
byte(u), |
||||
byte(u>>8), |
||||
byte(u>>16), |
||||
byte(u>>24), |
||||
byte(u>>32), |
||||
byte(u>>40), |
||||
byte(u>>48), |
||||
byte(u>>56), |
||||
) |
||||
} |
||||
|
||||
func deref_ptr_mangler(rtype reflect.Type, mangle Mangler, count int) Mangler { |
||||
if rtype == nil || mangle == nil || count == 0 { |
||||
panic("bad input") |
||||
} |
||||
|
||||
// Get reflect2's type for later
|
||||
// unsafe interface data repacking,
|
||||
type2 := reflect2.Type2(rtype) |
||||
|
||||
return func(buf []byte, value any) []byte { |
||||
// Get raw value data.
|
||||
ptr := eface_data(value) |
||||
|
||||
// Deref n - 1 number times.
|
||||
for i := 0; i < count-1; i++ { |
||||
|
||||
if ptr == nil { |
||||
// Check for nil values
|
||||
buf = append(buf, '0') |
||||
return buf |
||||
} |
||||
|
||||
// Further deref ptr
|
||||
buf = append(buf, '1') |
||||
ptr = *(*unsafe.Pointer)(ptr) |
||||
} |
||||
|
||||
if ptr == nil { |
||||
// Final nil value check.
|
||||
buf = append(buf, '0') |
||||
return buf |
||||
} |
||||
|
||||
// Repack and mangle fully deref'd
|
||||
value = type2.UnsafeIndirect(ptr) |
||||
buf = append(buf, '1') |
||||
return mangle(buf, value) |
||||
} |
||||
} |
||||
|
||||
func iter_slice_mangler(rtype reflect.Type, mangle Mangler) Mangler { |
||||
if rtype == nil || mangle == nil { |
||||
panic("bad input") |
||||
} |
||||
|
||||
// Get reflect2's type for later
|
||||
// unsafe slice data manipulation.
|
||||
slice2 := reflect2.Type2(rtype).(*reflect2.UnsafeSliceType) |
||||
|
||||
return func(buf []byte, value any) []byte { |
||||
// Get raw value data.
|
||||
ptr := eface_data(value) |
||||
|
||||
// Get length of slice value.
|
||||
n := slice2.UnsafeLengthOf(ptr) |
||||
|
||||
for i := 0; i < n; i++ { |
||||
// Mangle data at each slice index.
|
||||
e := slice2.UnsafeGetIndex(ptr, i) |
||||
buf = mangle(buf, e) |
||||
buf = append(buf, ',') |
||||
} |
||||
|
||||
if n > 0 { |
||||
// Drop final comma.
|
||||
buf = buf[:len(buf)-1] |
||||
} |
||||
|
||||
return buf |
||||
} |
||||
} |
||||
|
||||
func iter_array_mangler(rtype reflect.Type, mangle Mangler) Mangler { |
||||
if rtype == nil || mangle == nil { |
||||
panic("bad input") |
||||
} |
||||
|
||||
// Get reflect2's type for later
|
||||
// unsafe slice data manipulation.
|
||||
array2 := reflect2.Type2(rtype).(*reflect2.UnsafeArrayType) |
||||
n := array2.Len() |
||||
|
||||
return func(buf []byte, value any) []byte { |
||||
// Get raw value data.
|
||||
ptr := eface_data(value) |
||||
|
||||
for i := 0; i < n; i++ { |
||||
// Mangle data at each slice index.
|
||||
e := array2.UnsafeGetIndex(ptr, i) |
||||
buf = mangle(buf, e) |
||||
buf = append(buf, ',') |
||||
} |
||||
|
||||
if n > 0 { |
||||
// Drop final comma.
|
||||
buf = buf[:len(buf)-1] |
||||
} |
||||
|
||||
return buf |
||||
} |
||||
} |
||||
|
||||
func iter_map_mangler(rtype reflect.Type, kmangle, emangle Mangler) Mangler { |
||||
if rtype == nil || kmangle == nil || emangle == nil { |
||||
panic("bad input") |
||||
} |
||||
|
||||
// Get reflect2's type for later
|
||||
// unsafe map data manipulation.
|
||||
map2 := reflect2.Type2(rtype).(*reflect2.UnsafeMapType) |
||||
key2, elem2 := map2.Key(), map2.Elem() |
||||
|
||||
return func(buf []byte, value any) []byte { |
||||
// Get raw value data.
|
||||
ptr := eface_data(value) |
||||
ptr = indirect_ptr(ptr) |
||||
|
||||
// Create iterator for map value.
|
||||
iter := map2.UnsafeIterate(ptr) |
||||
|
||||
// Check if empty map.
|
||||
empty := !iter.HasNext() |
||||
|
||||
for iter.HasNext() { |
||||
// Get key + elem data as ifaces.
|
||||
kptr, eptr := iter.UnsafeNext() |
||||
key := key2.UnsafeIndirect(kptr) |
||||
elem := elem2.UnsafeIndirect(eptr) |
||||
|
||||
// Mangle data for key + elem.
|
||||
buf = kmangle(buf, key) |
||||
buf = append(buf, ':') |
||||
buf = emangle(buf, elem) |
||||
buf = append(buf, ',') |
||||
} |
||||
|
||||
if !empty { |
||||
// Drop final comma.
|
||||
buf = buf[:len(buf)-1] |
||||
} |
||||
|
||||
return buf |
||||
} |
||||
} |
||||
|
||||
func iter_struct_mangler(rtype reflect.Type, manglers []Mangler) Mangler { |
||||
if rtype == nil || len(manglers) != rtype.NumField() { |
||||
panic("bad input") |
||||
} |
||||
|
||||
type field struct { |
||||
type2 reflect2.Type |
||||
field *reflect2.UnsafeStructField |
||||
mangle Mangler |
||||
} |
||||
|
||||
// Get reflect2's type for later
|
||||
// unsafe struct field data access.
|
||||
struct2 := reflect2.Type2(rtype).(*reflect2.UnsafeStructType) |
||||
|
||||
// Bundle together the fields and manglers.
|
||||
fields := make([]field, rtype.NumField()) |
||||
for i := range fields { |
||||
fields[i].field = struct2.Field(i).(*reflect2.UnsafeStructField) |
||||
fields[i].type2 = fields[i].field.Type() |
||||
fields[i].mangle = manglers[i] |
||||
if fields[i].type2 == nil || |
||||
fields[i].field == nil || |
||||
fields[i].mangle == nil { |
||||
panic("bad input") |
||||
} |
||||
} |
||||
|
||||
return func(buf []byte, value any) []byte { |
||||
// Get raw value data.
|
||||
ptr := eface_data(value) |
||||
|
||||
for i := range fields { |
||||
// Get struct field as iface via offset.
|
||||
fptr := fields[i].field.UnsafeGet(ptr) |
||||
field := fields[i].type2.UnsafeIndirect(fptr) |
||||
|
||||
// Mangle the struct field data.
|
||||
buf = fields[i].mangle(buf, field) |
||||
buf = append(buf, ',') |
||||
} |
||||
|
||||
if len(fields) > 0 { |
||||
// Drop final comma.
|
||||
buf = buf[:len(buf)-1] |
||||
} |
||||
|
||||
return buf |
||||
} |
||||
} |
||||
|
||||
func indirect_ptr(p unsafe.Pointer) unsafe.Pointer { |
||||
return unsafe.Pointer(&p) |
||||
} |
||||
|
||||
func eface_data(a any) unsafe.Pointer { |
||||
type eface struct{ _, data unsafe.Pointer } |
||||
return (*eface)(unsafe.Pointer(&a)).data |
||||
} |
||||
@ -0,0 +1,267 @@
|
||||
package mangler |
||||
|
||||
import ( |
||||
"reflect" |
||||
) |
||||
|
||||
// loadMangler is the top-most Mangler load function. It guarantees that a Mangler
|
||||
// function will be returned for given value interface{} and reflected type. Else panics.
|
||||
func loadMangler(a any, t reflect.Type) Mangler { |
||||
// Load mangler fn
|
||||
mng := load(a, t) |
||||
if mng != nil { |
||||
return mng |
||||
} |
||||
|
||||
// No mangler function could be determined
|
||||
panic("cannot mangle type: " + t.String()) |
||||
} |
||||
|
||||
// load will load a Mangler or reflect Mangler for given type and iface 'a'.
|
||||
// Note: allocates new interface value if nil provided, i.e. if coming via reflection.
|
||||
func load(a any, t reflect.Type) Mangler { |
||||
if t == nil { |
||||
// There is no reflect type to search by
|
||||
panic("cannot mangle nil interface{} type") |
||||
} |
||||
|
||||
if a == nil { |
||||
// Alloc new iface instance
|
||||
v := reflect.New(t).Elem() |
||||
a = v.Interface() |
||||
} |
||||
|
||||
// Check for Mangled implementation.
|
||||
if _, ok := a.(Mangled); ok { |
||||
return mangle_mangled |
||||
} |
||||
|
||||
// Search mangler by reflection.
|
||||
mng := loadReflect(t) |
||||
if mng != nil { |
||||
return mng |
||||
} |
||||
|
||||
// Prefer iface mangler.
|
||||
mng = loadIface(a) |
||||
if mng != nil { |
||||
return mng |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// loadIface is used as a near-last-resort interface{} type switch
|
||||
// loader for types implementating other known (slower) functions.
|
||||
func loadIface(a any) Mangler { |
||||
switch a.(type) { |
||||
case binarymarshaler: |
||||
return mangle_binary |
||||
case byteser: |
||||
return mangle_byteser |
||||
case stringer: |
||||
return mangle_stringer |
||||
case textmarshaler: |
||||
return mangle_text |
||||
case jsonmarshaler: |
||||
return mangle_json |
||||
default: |
||||
return nil |
||||
} |
||||
} |
||||
|
||||
// loadReflect will load a Mangler (or rMangler) function for the given reflected type info.
|
||||
// NOTE: this is used as the top level load function for nested reflective searches.
|
||||
func loadReflect(t reflect.Type) Mangler { |
||||
switch t.Kind() { |
||||
case reflect.Pointer: |
||||
return loadReflectPtr(t) |
||||
|
||||
case reflect.String: |
||||
return mangle_string |
||||
|
||||
case reflect.Struct: |
||||
return loadReflectStruct(t) |
||||
|
||||
case reflect.Array: |
||||
return loadReflectArray(t) |
||||
|
||||
case reflect.Slice: |
||||
return loadReflectSlice(t) |
||||
|
||||
case reflect.Map: |
||||
return loadReflectMap(t) |
||||
|
||||
case reflect.Bool: |
||||
return mangle_bool |
||||
|
||||
case reflect.Int, |
||||
reflect.Uint, |
||||
reflect.Uintptr: |
||||
return mangle_platform_int() |
||||
|
||||
case reflect.Int8, reflect.Uint8: |
||||
return mangle_8bit |
||||
|
||||
case reflect.Int16, reflect.Uint16: |
||||
return mangle_16bit |
||||
|
||||
case reflect.Int32, reflect.Uint32: |
||||
return mangle_32bit |
||||
|
||||
case reflect.Int64, reflect.Uint64: |
||||
return mangle_64bit |
||||
|
||||
case reflect.Float32: |
||||
return mangle_32bit |
||||
|
||||
case reflect.Float64: |
||||
return mangle_64bit |
||||
|
||||
case reflect.Complex64: |
||||
return mangle_64bit |
||||
|
||||
case reflect.Complex128: |
||||
return mangle_128bit |
||||
|
||||
default: |
||||
return nil |
||||
} |
||||
} |
||||
|
||||
// loadReflectPtr loads a Mangler (or rMangler) function for a ptr's element type.
|
||||
// This also handles further dereferencing of any further ptr indrections (e.g. ***int).
|
||||
func loadReflectPtr(t reflect.Type) Mangler { |
||||
var count int |
||||
|
||||
// Elem
|
||||
et := t |
||||
|
||||
// Iteratively dereference ptrs
|
||||
for et.Kind() == reflect.Pointer { |
||||
et = et.Elem() |
||||
count++ |
||||
} |
||||
|
||||
// Search for ptr elemn type mangler.
|
||||
if mng := load(nil, et); mng != nil { |
||||
return deref_ptr_mangler(et, mng, count) |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// loadReflectKnownSlice loads a Mangler function for a
|
||||
// known slice-of-element type (in this case, primtives).
|
||||
func loadReflectKnownSlice(et reflect.Type) Mangler { |
||||
switch et.Kind() { |
||||
case reflect.String: |
||||
return mangle_string_slice |
||||
|
||||
case reflect.Bool: |
||||
return mangle_bool_slice |
||||
|
||||
case reflect.Int, |
||||
reflect.Uint, |
||||
reflect.Uintptr: |
||||
return mangle_platform_int_slice() |
||||
|
||||
case reflect.Int8, reflect.Uint8: |
||||
return mangle_8bit_slice |
||||
|
||||
case reflect.Int16, reflect.Uint16: |
||||
return mangle_16bit_slice |
||||
|
||||
case reflect.Int32, reflect.Uint32: |
||||
return mangle_32bit_slice |
||||
|
||||
case reflect.Int64, reflect.Uint64: |
||||
return mangle_64bit_slice |
||||
|
||||
case reflect.Float32: |
||||
return mangle_32bit_slice |
||||
|
||||
case reflect.Float64: |
||||
return mangle_64bit_slice |
||||
|
||||
case reflect.Complex64: |
||||
return mangle_64bit_slice |
||||
|
||||
case reflect.Complex128: |
||||
return mangle_128bit_slice |
||||
|
||||
default: |
||||
return nil |
||||
} |
||||
} |
||||
|
||||
// loadReflectSlice ...
|
||||
func loadReflectSlice(t reflect.Type) Mangler { |
||||
// Element type
|
||||
et := t.Elem() |
||||
|
||||
// Preferably look for known slice mangler func
|
||||
if mng := loadReflectKnownSlice(et); mng != nil { |
||||
return mng |
||||
} |
||||
|
||||
// Fallback to nested mangler iteration.
|
||||
if mng := load(nil, et); mng != nil { |
||||
return iter_slice_mangler(t, mng) |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// loadReflectArray ...
|
||||
func loadReflectArray(t reflect.Type) Mangler { |
||||
// Element type.
|
||||
et := t.Elem() |
||||
|
||||
// Use manglers for nested iteration.
|
||||
if mng := load(nil, et); mng != nil { |
||||
return iter_array_mangler(t, mng) |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// loadReflectMap ...
|
||||
func loadReflectMap(t reflect.Type) Mangler { |
||||
// Map types.
|
||||
kt := t.Key() |
||||
et := t.Elem() |
||||
|
||||
// Load manglers.
|
||||
kmng := load(nil, kt) |
||||
emng := load(nil, et) |
||||
|
||||
// Use manglers for nested iteration.
|
||||
if kmng != nil && emng != nil { |
||||
return iter_map_mangler(t, kmng, emng) |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// loadReflectStruct ...
|
||||
func loadReflectStruct(t reflect.Type) Mangler { |
||||
var mngs []Mangler |
||||
|
||||
// Gather manglers for all fields.
|
||||
for i := 0; i < t.NumField(); i++ { |
||||
field := t.Field(i) |
||||
|
||||
// Load mangler for field type.
|
||||
mng := load(nil, field.Type) |
||||
if mng == nil { |
||||
return nil |
||||
} |
||||
|
||||
// Append next to map.
|
||||
mngs = append(mngs, mng) |
||||
} |
||||
|
||||
// Use manglers for nested iteration.
|
||||
return iter_struct_mangler(t, mngs) |
||||
} |
||||
@ -0,0 +1,154 @@
|
||||
package mangler |
||||
|
||||
import ( |
||||
"reflect" |
||||
"sync" |
||||
"unsafe" |
||||
) |
||||
|
||||
// manglers is a map of runtime
|
||||
// type ptrs => Mangler functions.
|
||||
var manglers sync.Map |
||||
|
||||
// Mangled is an interface that allows any type to implement a custom
|
||||
// Mangler function to improve performance when mangling this type.
|
||||
type Mangled interface{ Mangle(buf []byte) []byte } |
||||
|
||||
// Mangler is a function that will take an input interface value of known
|
||||
// type, and append it in mangled serialized form to the given byte buffer.
|
||||
// While the value type is an interface, the Mangler functions are accessed
|
||||
// by the value's runtime type pointer, allowing the input value type to be known.
|
||||
type Mangler func(buf []byte, value any) []byte |
||||
|
||||
// Get will fetch the Mangler function for given runtime type.
|
||||
// Note that the returned mangler will be a no-op in the case
|
||||
// that an incorrect type is passed as the value argument.
|
||||
func Get(t reflect.Type) Mangler { |
||||
var mng Mangler |
||||
|
||||
// Get raw runtime type ptr
|
||||
uptr := uintptr(eface_data(t)) |
||||
|
||||
// Look for a cached mangler
|
||||
v, ok := manglers.Load(uptr) |
||||
|
||||
if !ok { |
||||
// Load mangler function
|
||||
mng = loadMangler(nil, t) |
||||
} else { |
||||
// cast cached value
|
||||
mng = v.(Mangler) |
||||
} |
||||
|
||||
// Get platform int mangler func.
|
||||
mangle_int := mangle_platform_int() |
||||
|
||||
return func(buf []byte, value any) []byte { |
||||
// Type check passed against original type.
|
||||
if vt := reflect.TypeOf(value); vt != t { |
||||
return buf |
||||
} |
||||
|
||||
// First write the type ptr (this adds
|
||||
// a unique prefix for each runtime type).
|
||||
buf = mangle_int(buf, uptr) |
||||
|
||||
// Finally, mangle value
|
||||
return mng(buf, value) |
||||
} |
||||
} |
||||
|
||||
// Register will register the given Mangler function for use with vars of given runtime type. This allows
|
||||
// registering performant manglers for existing types not implementing Mangled (e.g. std library types).
|
||||
// NOTE: panics if there already exists a Mangler function for given type. Register on init().
|
||||
func Register(t reflect.Type, m Mangler) { |
||||
if t == nil { |
||||
// Nil interface{} types cannot be searched by, do not accept
|
||||
panic("cannot register mangler for nil interface{} type") |
||||
} |
||||
|
||||
// Get raw runtime type ptr
|
||||
uptr := uintptr(eface_data(t)) |
||||
|
||||
// Ensure this is a unique encoder
|
||||
if _, ok := manglers.Load(uptr); ok { |
||||
panic("already registered mangler for type: " + t.String()) |
||||
} |
||||
|
||||
// Cache this encoder func
|
||||
manglers.Store(uptr, m) |
||||
} |
||||
|
||||
// Append will append the mangled form of input value 'a' to buffer 'b'.
|
||||
// See mangler.String() for more information on mangled output.
|
||||
func Append(b []byte, a any) []byte { |
||||
var mng Mangler |
||||
|
||||
// Get reflect type of 'a'
|
||||
t := reflect.TypeOf(a) |
||||
|
||||
// Get raw runtime type ptr
|
||||
uptr := uintptr(eface_data(t)) |
||||
|
||||
// Look for a cached mangler
|
||||
v, ok := manglers.Load(uptr) |
||||
|
||||
if !ok { |
||||
// Load mangler into cache
|
||||
mng = loadMangler(nil, t) |
||||
manglers.Store(uptr, mng) |
||||
} else { |
||||
// cast cached value
|
||||
mng = v.(Mangler) |
||||
} |
||||
|
||||
// Get platform int mangler func.
|
||||
mangle_int := mangle_platform_int() |
||||
|
||||
// First write the type ptr (this adds
|
||||
// a unique prefix for each runtime type).
|
||||
b = mangle_int(b, uptr) |
||||
|
||||
// Finally, mangle value
|
||||
return mng(b, a) |
||||
} |
||||
|
||||
// String will return the mangled format of input value 'a'. This
|
||||
// mangled output will be unique for all default supported input types
|
||||
// during a single runtime instance. Uniqueness cannot be guaranteed
|
||||
// between separate runtime instances (whether running concurrently, or
|
||||
// the same application running at different times).
|
||||
//
|
||||
// The exact formatting of the output data should not be relied upon,
|
||||
// only that it is unique given the above constraints. Generally though,
|
||||
// the mangled output is the binary formatted text of given input data.
|
||||
//
|
||||
// Uniqueness is guaranteed for similar input data of differing types
|
||||
// (e.g. string("hello world") vs. []byte("hello world")) by prefixing
|
||||
// mangled output with the input data's runtime type pointer.
|
||||
//
|
||||
// Default supported types include:
|
||||
// - string
|
||||
// - bool
|
||||
// - int,int8,int16,int32,int64
|
||||
// - uint,uint8,uint16,uint32,uint64,uintptr
|
||||
// - float32,float64
|
||||
// - complex64,complex128
|
||||
// - arbitrary structs
|
||||
// - all type aliases of above
|
||||
// - time.Time{}
|
||||
// - url.URL{}
|
||||
// - net.IPAddr{}
|
||||
// - netip.Addr{}, netip.AddrPort{}
|
||||
// - mangler.Mangled{}
|
||||
// - fmt.Stringer{}
|
||||
// - json.Marshaler{}
|
||||
// - encoding.BinaryMarshaler{}
|
||||
// - encoding.TextMarshaler{}
|
||||
// - all pointers to the above
|
||||
// - all slices / arrays of the above
|
||||
// - all map keys / values of the above
|
||||
func String(a any) string { |
||||
b := Append(make([]byte, 0, 32), a) |
||||
return *(*string)(unsafe.Pointer(&b)) |
||||
} |
||||
@ -0,0 +1,190 @@
|
||||
package mangler |
||||
|
||||
import ( |
||||
"math/bits" |
||||
_ "unsafe" |
||||
) |
||||
|
||||
// Notes:
|
||||
// the use of unsafe conversion from the direct interface values to
|
||||
// the chosen types in each of the below functions allows us to convert
|
||||
// not only those types directly, but anything type-aliased to those
|
||||
// types. e.g. `time.Duration` directly as int64.
|
||||
|
||||
func mangle_string(buf []byte, a any) []byte { |
||||
return append(buf, *(*string)(eface_data(a))...) |
||||
} |
||||
|
||||
func mangle_string_slice(buf []byte, a any) []byte { |
||||
s := *(*[]string)(eface_data(a)) |
||||
for _, s := range s { |
||||
buf = append(buf, s...) |
||||
buf = append(buf, ',') |
||||
} |
||||
if len(s) > 0 { |
||||
buf = buf[:len(buf)-1] |
||||
} |
||||
return buf |
||||
} |
||||
|
||||
func mangle_bool(buf []byte, a any) []byte { |
||||
if *(*bool)(eface_data(a)) { |
||||
return append(buf, '1') |
||||
} |
||||
return append(buf, '0') |
||||
} |
||||
|
||||
func mangle_bool_slice(buf []byte, a any) []byte { |
||||
for _, b := range *(*[]bool)(eface_data(a)) { |
||||
if b { |
||||
buf = append(buf, '1') |
||||
} else { |
||||
buf = append(buf, '0') |
||||
} |
||||
} |
||||
return buf |
||||
} |
||||
|
||||
func mangle_8bit(buf []byte, a any) []byte { |
||||
return append(buf, *(*uint8)(eface_data(a))) |
||||
} |
||||
|
||||
func mangle_8bit_slice(buf []byte, a any) []byte { |
||||
return append(buf, *(*[]uint8)(eface_data(a))...) |
||||
} |
||||
|
||||
func mangle_16bit(buf []byte, a any) []byte { |
||||
return append_uint16(buf, *(*uint16)(eface_data(a))) |
||||
} |
||||
|
||||
func mangle_16bit_slice(buf []byte, a any) []byte { |
||||
for _, u := range *(*[]uint16)(eface_data(a)) { |
||||
buf = append_uint16(buf, u) |
||||
} |
||||
return buf |
||||
} |
||||
|
||||
func mangle_32bit(buf []byte, a any) []byte { |
||||
return append_uint32(buf, *(*uint32)(eface_data(a))) |
||||
} |
||||
|
||||
func mangle_32bit_slice(buf []byte, a any) []byte { |
||||
for _, u := range *(*[]uint32)(eface_data(a)) { |
||||
buf = append_uint32(buf, u) |
||||
} |
||||
return buf |
||||
} |
||||
|
||||
func mangle_64bit(buf []byte, a any) []byte { |
||||
return append_uint64(buf, *(*uint64)(eface_data(a))) |
||||
} |
||||
|
||||
func mangle_64bit_slice(buf []byte, a any) []byte { |
||||
for _, u := range *(*[]uint64)(eface_data(a)) { |
||||
buf = append_uint64(buf, u) |
||||
} |
||||
return buf |
||||
} |
||||
|
||||
func mangle_platform_int() Mangler { |
||||
switch bits.UintSize { |
||||
case 32: |
||||
return mangle_32bit |
||||
case 64: |
||||
return mangle_64bit |
||||
default: |
||||
panic("unexpected platform int size") |
||||
} |
||||
} |
||||
|
||||
func mangle_platform_int_slice() Mangler { |
||||
switch bits.UintSize { |
||||
case 32: |
||||
return mangle_32bit_slice |
||||
case 64: |
||||
return mangle_64bit_slice |
||||
default: |
||||
panic("unexpected platform int size") |
||||
} |
||||
} |
||||
|
||||
func mangle_128bit(buf []byte, a any) []byte { |
||||
u2 := *(*[2]uint64)(eface_data(a)) |
||||
buf = append_uint64(buf, u2[0]) |
||||
buf = append_uint64(buf, u2[1]) |
||||
return buf |
||||
} |
||||
|
||||
func mangle_128bit_slice(buf []byte, a any) []byte { |
||||
for _, u2 := range *(*[][2]uint64)(eface_data(a)) { |
||||
buf = append_uint64(buf, u2[0]) |
||||
buf = append_uint64(buf, u2[1]) |
||||
} |
||||
return buf |
||||
} |
||||
|
||||
func mangle_mangled(buf []byte, a any) []byte { |
||||
if v := a.(Mangled); v != nil { |
||||
buf = append(buf, '1') |
||||
return v.Mangle(buf) |
||||
} |
||||
buf = append(buf, '0') |
||||
return buf |
||||
} |
||||
|
||||
func mangle_binary(buf []byte, a any) []byte { |
||||
if v := a.(binarymarshaler); v != nil { |
||||
b, err := v.MarshalBinary() |
||||
if err != nil { |
||||
panic("mangle_binary: " + err.Error()) |
||||
} |
||||
buf = append(buf, '1') |
||||
return append(buf, b...) |
||||
} |
||||
buf = append(buf, '0') |
||||
return buf |
||||
} |
||||
|
||||
func mangle_byteser(buf []byte, a any) []byte { |
||||
if v := a.(byteser); v != nil { |
||||
buf = append(buf, '1') |
||||
return append(buf, v.Bytes()...) |
||||
} |
||||
buf = append(buf, '0') |
||||
return buf |
||||
} |
||||
|
||||
func mangle_stringer(buf []byte, a any) []byte { |
||||
if v := a.(stringer); v != nil { |
||||
buf = append(buf, '1') |
||||
return append(buf, v.String()...) |
||||
} |
||||
buf = append(buf, '0') |
||||
return buf |
||||
} |
||||
|
||||
func mangle_text(buf []byte, a any) []byte { |
||||
if v := a.(textmarshaler); v != nil { |
||||
b, err := v.MarshalText() |
||||
if err != nil { |
||||
panic("mangle_text: " + err.Error()) |
||||
} |
||||
buf = append(buf, '1') |
||||
return append(buf, b...) |
||||
} |
||||
buf = append(buf, '0') |
||||
return buf |
||||
} |
||||
|
||||
func mangle_json(buf []byte, a any) []byte { |
||||
if v := a.(jsonmarshaler); v != nil { |
||||
b, err := v.MarshalJSON() |
||||
if err != nil { |
||||
panic("mangle_json: " + err.Error()) |
||||
} |
||||
buf = append(buf, '1') |
||||
return append(buf, b...) |
||||
} |
||||
buf = append(buf, '0') |
||||
return buf |
||||
} |
||||
@ -1,404 +0,0 @@
|
||||
package structr |
||||
|
||||
import ( |
||||
"reflect" |
||||
"sync" |
||||
"unsafe" |
||||
|
||||
"github.com/zeebo/xxh3" |
||||
) |
||||
|
||||
var hash_pool sync.Pool |
||||
|
||||
func get_hasher() *xxh3.Hasher { |
||||
v := hash_pool.Get() |
||||
if v == nil { |
||||
v = new(xxh3.Hasher) |
||||
} |
||||
return v.(*xxh3.Hasher) |
||||
} |
||||
|
||||
func hash_sum(fields []structfield, h *xxh3.Hasher, key []any) (Hash, bool) { |
||||
if len(key) != len(fields) { |
||||
panicf("incorrect number key parts: want=%d received=%d", |
||||
len(key), |
||||
len(fields), |
||||
) |
||||
} |
||||
var zero bool |
||||
h.Reset() |
||||
for i, part := range key { |
||||
zero = fields[i].hasher(h, part) || zero |
||||
} |
||||
// See: https://github.com/Cyan4973/xxHash/issues/453#issuecomment-696838445
|
||||
//
|
||||
// In order to extract 32-bit from a good 64-bit hash result,
|
||||
// there are many possible choices, which are all valid.
|
||||
// I would typically grab the lower 32-bit and call it a day.
|
||||
//
|
||||
// Grabbing any other 32-bit (the upper part for example) is fine too.
|
||||
//
|
||||
// xoring higher and lower bits makes more sense whenever the produced hash offers dubious quality.
|
||||
// FNV, for example, has poor mixing in its lower bits, so it's better to mix with the higher bits.
|
||||
//
|
||||
// XXH3 already performs significant output mixing before returning the data,
|
||||
// so it's not beneficial to add another xorfold stage.
|
||||
return uint64ToHash(h.Sum64()), zero |
||||
} |
||||
|
||||
func hasher(t reflect.Type) func(*xxh3.Hasher, any) bool { |
||||
switch t.Kind() { |
||||
case reflect.Int, |
||||
reflect.Uint, |
||||
reflect.Uintptr: |
||||
switch unsafe.Sizeof(int(0)) { |
||||
case 4: |
||||
return hash32bit |
||||
case 8: |
||||
return hash64bit |
||||
default: |
||||
panic("unexpected platform int size") |
||||
} |
||||
|
||||
case reflect.Int8, |
||||
reflect.Uint8: |
||||
return hash8bit |
||||
|
||||
case reflect.Int16, |
||||
reflect.Uint16: |
||||
return hash16bit |
||||
|
||||
case reflect.Int32, |
||||
reflect.Uint32, |
||||
reflect.Float32: |
||||
return hash32bit |
||||
|
||||
case reflect.Int64, |
||||
reflect.Uint64, |
||||
reflect.Float64, |
||||
reflect.Complex64: |
||||
return hash64bit |
||||
|
||||
case reflect.String: |
||||
return hashstring |
||||
|
||||
case reflect.Pointer: |
||||
switch t.Elem().Kind() { |
||||
case reflect.Int, |
||||
reflect.Uint, |
||||
reflect.Uintptr: |
||||
switch unsafe.Sizeof(int(0)) { |
||||
case 4: |
||||
return hash32bitptr |
||||
case 8: |
||||
return hash64bitptr |
||||
default: |
||||
panic("unexpected platform int size") |
||||
} |
||||
|
||||
case reflect.Int8, |
||||
reflect.Uint8: |
||||
return hash8bitptr |
||||
|
||||
case reflect.Int16, |
||||
reflect.Uint16: |
||||
return hash16bitptr |
||||
|
||||
case reflect.Int32, |
||||
reflect.Uint32, |
||||
reflect.Float32: |
||||
return hash32bitptr |
||||
|
||||
case reflect.Int64, |
||||
reflect.Uint64, |
||||
reflect.Float64, |
||||
reflect.Complex64: |
||||
return hash64bitptr |
||||
|
||||
case reflect.String: |
||||
return hashstringptr |
||||
} |
||||
|
||||
case reflect.Slice: |
||||
switch t.Elem().Kind() { |
||||
case reflect.Int, |
||||
reflect.Uint, |
||||
reflect.Uintptr: |
||||
switch unsafe.Sizeof(int(0)) { |
||||
case 4: |
||||
return hash32bitslice |
||||
case 8: |
||||
return hash64bitslice |
||||
default: |
||||
panic("unexpected platform int size") |
||||
} |
||||
|
||||
case reflect.Int8, |
||||
reflect.Uint8: |
||||
return hash8bitslice |
||||
|
||||
case reflect.Int16, |
||||
reflect.Uint16: |
||||
return hash16bitslice |
||||
|
||||
case reflect.Int32, |
||||
reflect.Uint32, |
||||
reflect.Float32: |
||||
return hash32bitslice |
||||
|
||||
case reflect.Int64, |
||||
reflect.Uint64, |
||||
reflect.Float64, |
||||
reflect.Complex64: |
||||
return hash64bitslice |
||||
|
||||
case reflect.String: |
||||
return hashstringslice |
||||
} |
||||
} |
||||
switch { |
||||
case t.Implements(reflect.TypeOf((*interface{ MarshalBinary() ([]byte, error) })(nil)).Elem()): |
||||
return hashbinarymarshaler |
||||
|
||||
case t.Implements(reflect.TypeOf((*interface{ Bytes() []byte })(nil)).Elem()): |
||||
return hashbytesmethod |
||||
|
||||
case t.Implements(reflect.TypeOf((*interface{ String() string })(nil)).Elem()): |
||||
return hashstringmethod |
||||
|
||||
case t.Implements(reflect.TypeOf((*interface{ MarshalText() ([]byte, error) })(nil)).Elem()): |
||||
return hashtextmarshaler |
||||
|
||||
case t.Implements(reflect.TypeOf((*interface{ MarshalJSON() ([]byte, error) })(nil)).Elem()): |
||||
return hashjsonmarshaler |
||||
} |
||||
panic("unhashable type") |
||||
} |
||||
|
||||
func hash8bit(h *xxh3.Hasher, a any) bool { |
||||
u := *(*uint8)(data_ptr(a)) |
||||
_, _ = h.Write([]byte{u}) |
||||
return u == 0 |
||||
} |
||||
|
||||
func hash8bitptr(h *xxh3.Hasher, a any) bool { |
||||
u := (*uint8)(data_ptr(a)) |
||||
if u == nil { |
||||
_, _ = h.Write([]byte{ |
||||
0, |
||||
}) |
||||
return true |
||||
} else { |
||||
_, _ = h.Write([]byte{ |
||||
1, |
||||
byte(*u), |
||||
}) |
||||
return false |
||||
} |
||||
} |
||||
|
||||
func hash8bitslice(h *xxh3.Hasher, a any) bool { |
||||
b := *(*[]byte)(data_ptr(a)) |
||||
_, _ = h.Write(b) |
||||
return b == nil |
||||
} |
||||
|
||||
func hash16bit(h *xxh3.Hasher, a any) bool { |
||||
u := *(*uint16)(data_ptr(a)) |
||||
_, _ = h.Write([]byte{ |
||||
byte(u), |
||||
byte(u >> 8), |
||||
}) |
||||
return u == 0 |
||||
} |
||||
|
||||
func hash16bitptr(h *xxh3.Hasher, a any) bool { |
||||
u := (*uint16)(data_ptr(a)) |
||||
if u == nil { |
||||
_, _ = h.Write([]byte{ |
||||
0, |
||||
}) |
||||
return true |
||||
} else { |
||||
_, _ = h.Write([]byte{ |
||||
1, |
||||
byte(*u), |
||||
byte(*u >> 8), |
||||
}) |
||||
return false |
||||
} |
||||
} |
||||
|
||||
func hash16bitslice(h *xxh3.Hasher, a any) bool { |
||||
u := *(*[]uint16)(data_ptr(a)) |
||||
for i := range u { |
||||
_, _ = h.Write([]byte{ |
||||
byte(u[i]), |
||||
byte(u[i] >> 8), |
||||
}) |
||||
} |
||||
return u == nil |
||||
} |
||||
|
||||
func hash32bit(h *xxh3.Hasher, a any) bool { |
||||
u := *(*uint32)(data_ptr(a)) |
||||
_, _ = h.Write([]byte{ |
||||
byte(u), |
||||
byte(u >> 8), |
||||
byte(u >> 16), |
||||
byte(u >> 24), |
||||
}) |
||||
return u == 0 |
||||
} |
||||
|
||||
func hash32bitptr(h *xxh3.Hasher, a any) bool { |
||||
u := (*uint32)(data_ptr(a)) |
||||
if u == nil { |
||||
_, _ = h.Write([]byte{ |
||||
0, |
||||
}) |
||||
return true |
||||
} else { |
||||
_, _ = h.Write([]byte{ |
||||
1, |
||||
byte(*u), |
||||
byte(*u >> 8), |
||||
byte(*u >> 16), |
||||
byte(*u >> 24), |
||||
}) |
||||
return false |
||||
} |
||||
} |
||||
|
||||
func hash32bitslice(h *xxh3.Hasher, a any) bool { |
||||
u := *(*[]uint32)(data_ptr(a)) |
||||
for i := range u { |
||||
_, _ = h.Write([]byte{ |
||||
byte(u[i]), |
||||
byte(u[i] >> 8), |
||||
byte(u[i] >> 16), |
||||
byte(u[i] >> 24), |
||||
}) |
||||
} |
||||
return u == nil |
||||
} |
||||
|
||||
func hash64bit(h *xxh3.Hasher, a any) bool { |
||||
u := *(*uint64)(data_ptr(a)) |
||||
_, _ = h.Write([]byte{ |
||||
byte(u), |
||||
byte(u >> 8), |
||||
byte(u >> 16), |
||||
byte(u >> 24), |
||||
byte(u >> 32), |
||||
byte(u >> 40), |
||||
byte(u >> 48), |
||||
byte(u >> 56), |
||||
}) |
||||
return u == 0 |
||||
} |
||||
|
||||
func hash64bitptr(h *xxh3.Hasher, a any) bool { |
||||
u := (*uint64)(data_ptr(a)) |
||||
if u == nil { |
||||
_, _ = h.Write([]byte{ |
||||
0, |
||||
}) |
||||
return true |
||||
} else { |
||||
_, _ = h.Write([]byte{ |
||||
1, |
||||
byte(*u), |
||||
byte(*u >> 8), |
||||
byte(*u >> 16), |
||||
byte(*u >> 24), |
||||
byte(*u >> 32), |
||||
byte(*u >> 40), |
||||
byte(*u >> 48), |
||||
byte(*u >> 56), |
||||
}) |
||||
return false |
||||
} |
||||
} |
||||
|
||||
func hash64bitslice(h *xxh3.Hasher, a any) bool { |
||||
u := *(*[]uint64)(data_ptr(a)) |
||||
for i := range u { |
||||
_, _ = h.Write([]byte{ |
||||
byte(u[i]), |
||||
byte(u[i] >> 8), |
||||
byte(u[i] >> 16), |
||||
byte(u[i] >> 24), |
||||
byte(u[i] >> 32), |
||||
byte(u[i] >> 40), |
||||
byte(u[i] >> 48), |
||||
byte(u[i] >> 56), |
||||
}) |
||||
} |
||||
return u == nil |
||||
} |
||||
|
||||
func hashstring(h *xxh3.Hasher, a any) bool { |
||||
s := *(*string)(data_ptr(a)) |
||||
_, _ = h.WriteString(s) |
||||
return s == "" |
||||
} |
||||
|
||||
func hashstringptr(h *xxh3.Hasher, a any) bool { |
||||
s := (*string)(data_ptr(a)) |
||||
if s == nil { |
||||
_, _ = h.Write([]byte{ |
||||
0, |
||||
}) |
||||
return true |
||||
} else { |
||||
_, _ = h.Write([]byte{ |
||||
1, |
||||
}) |
||||
_, _ = h.WriteString(*s) |
||||
return false |
||||
} |
||||
} |
||||
|
||||
func hashstringslice(h *xxh3.Hasher, a any) bool { |
||||
s := *(*[]string)(data_ptr(a)) |
||||
for i := range s { |
||||
_, _ = h.WriteString(s[i]) |
||||
} |
||||
return s == nil |
||||
} |
||||
|
||||
func hashbinarymarshaler(h *xxh3.Hasher, a any) bool { |
||||
i := a.(interface{ MarshalBinary() ([]byte, error) }) |
||||
b, _ := i.MarshalBinary() |
||||
_, _ = h.Write(b) |
||||
return b == nil |
||||
} |
||||
|
||||
func hashbytesmethod(h *xxh3.Hasher, a any) bool { |
||||
i := a.(interface{ Bytes() []byte }) |
||||
b := i.Bytes() |
||||
_, _ = h.Write(b) |
||||
return b == nil |
||||
} |
||||
|
||||
func hashstringmethod(h *xxh3.Hasher, a any) bool { |
||||
i := a.(interface{ String() string }) |
||||
s := i.String() |
||||
_, _ = h.WriteString(s) |
||||
return s == "" |
||||
} |
||||
|
||||
func hashtextmarshaler(h *xxh3.Hasher, a any) bool { |
||||
i := a.(interface{ MarshalText() ([]byte, error) }) |
||||
b, _ := i.MarshalText() |
||||
_, _ = h.Write(b) |
||||
return b == nil |
||||
} |
||||
|
||||
func hashjsonmarshaler(h *xxh3.Hasher, a any) bool { |
||||
i := a.(interface{ MarshalJSON() ([]byte, error) }) |
||||
b, _ := i.MarshalJSON() |
||||
_, _ = h.Write(b) |
||||
return b == nil |
||||
} |
||||
@ -1,14 +0,0 @@
|
||||
//go:build structr_32bit_hash
|
||||
// +build structr_32bit_hash
|
||||
|
||||
package structr |
||||
|
||||
// Hash is the current compiler
|
||||
// flag defined cache key hash
|
||||
// checksum type. Here; uint32.
|
||||
type Hash uint32 |
||||
|
||||
// uint64ToHash converts uint64 to currently Hash type.
|
||||
func uint64ToHash(u uint64) Hash { |
||||
return Hash(u >> 32) |
||||
} |
||||
@ -1,21 +0,0 @@
|
||||
//go:build structr_48bit_hash
|
||||
// +build structr_48bit_hash
|
||||
|
||||
package structr |
||||
|
||||
// Hash is the current compiler
|
||||
// flag defined cache key hash
|
||||
// checksum type. Here; uint48.
|
||||
type Hash [6]byte |
||||
|
||||
// uint64ToHash converts uint64 to currently Hash type.
|
||||
func uint64ToHash(u uint64) Hash { |
||||
return Hash{ |
||||
0: byte(u), |
||||
1: byte(u >> 8), |
||||
2: byte(u >> 16), |
||||
3: byte(u >> 24), |
||||
4: byte(u >> 32), |
||||
5: byte(u >> 40), |
||||
} |
||||
} |
||||
@ -1,14 +0,0 @@
|
||||
//go:build !structr_32bit_hash && !structr_48bit_hash
|
||||
// +build !structr_32bit_hash,!structr_48bit_hash
|
||||
|
||||
package structr |
||||
|
||||
// Hash is the current compiler
|
||||
// flag defined cache key hash
|
||||
// checksum type. Here; uint64.
|
||||
type Hash uint64 |
||||
|
||||
// uint64ToHash converts uint64 to currently Hash type.
|
||||
func uint64ToHash(u uint64) Hash { |
||||
return Hash(u) |
||||
} |
||||
@ -0,0 +1,59 @@
|
||||
package structr |
||||
|
||||
import ( |
||||
"sync" |
||||
"unsafe" |
||||
) |
||||
|
||||
type indexed_item struct { |
||||
// linked list elem this item
|
||||
// is stored in a main list.
|
||||
elem list_elem |
||||
|
||||
// indexed stores the indices
|
||||
// this item is stored under.
|
||||
indexed []*index_entry |
||||
|
||||
// cached data with type.
|
||||
data interface{} |
||||
} |
||||
|
||||
var indexed_item_pool sync.Pool |
||||
|
||||
// new_indexed_item returns a new prepared indexed_item.
|
||||
func new_indexed_item() *indexed_item { |
||||
v := indexed_item_pool.Get() |
||||
if v == nil { |
||||
v = new(indexed_item) |
||||
} |
||||
item := v.(*indexed_item) |
||||
ptr := unsafe.Pointer(item) |
||||
item.elem.data = ptr |
||||
return item |
||||
} |
||||
|
||||
// free_indexed_item releases the indexed_item.
|
||||
func free_indexed_item(item *indexed_item) { |
||||
item.elem.data = nil |
||||
item.indexed = item.indexed[:0] |
||||
item.data = nil |
||||
indexed_item_pool.Put(item) |
||||
} |
||||
|
||||
// drop_index will drop the given index entry from item's indexed.
|
||||
// note this also handles freeing the index_entry memory (e.g. to pool)
|
||||
func (i *indexed_item) drop_index(entry *index_entry) { |
||||
for x := 0; x < len(i.indexed); x++ { |
||||
if i.indexed[x] != entry { |
||||
// Prof. Obiwan:
|
||||
// this is not the index
|
||||
// we are looking for.
|
||||
continue |
||||
} |
||||
|
||||
// Move all index entries down + reslice.
|
||||
copy(i.indexed[x:], i.indexed[x+1:]) |
||||
i.indexed = i.indexed[:len(i.indexed)-1] |
||||
break |
||||
} |
||||
} |
||||
@ -0,0 +1,58 @@
|
||||
package structr |
||||
|
||||
import ( |
||||
"sync" |
||||
|
||||
"codeberg.org/gruf/go-byteutil" |
||||
) |
||||
|
||||
// Key represents one key to
|
||||
// lookup (potentially) stored
|
||||
// entries in an Index.
|
||||
type Key struct { |
||||
raw []any |
||||
key string |
||||
} |
||||
|
||||
// Key returns the underlying cache key string.
|
||||
// NOTE: this will not be log output friendly.
|
||||
func (k Key) Key() string { |
||||
return k.key |
||||
} |
||||
|
||||
// Equal returns whether keys are equal.
|
||||
func (k Key) Equal(o Key) bool { |
||||
return k.key == o.key |
||||
} |
||||
|
||||
// Value returns the raw slice of
|
||||
// values that comprise this Key.
|
||||
func (k Key) Values() []any { |
||||
return k.raw |
||||
} |
||||
|
||||
// Zero indicates a zero value key.
|
||||
func (k Key) Zero() bool { |
||||
return k.raw == nil |
||||
} |
||||
|
||||
var buf_pool sync.Pool |
||||
|
||||
// new_buffer returns a new initialized byte buffer.
|
||||
func new_buffer() *byteutil.Buffer { |
||||
v := buf_pool.Get() |
||||
if v == nil { |
||||
buf := new(byteutil.Buffer) |
||||
buf.B = make([]byte, 0, 512) |
||||
v = buf |
||||
} |
||||
return v.(*byteutil.Buffer) |
||||
} |
||||
|
||||
// free_buffer releases the byte buffer.
|
||||
func free_buffer(buf *byteutil.Buffer) { |
||||
if cap(buf.B) > int(^uint16(0)) { |
||||
return // drop large bufs
|
||||
} |
||||
buf_pool.Put(buf) |
||||
} |
||||
@ -0,0 +1,306 @@
|
||||
package structr |
||||
|
||||
import ( |
||||
"reflect" |
||||
"sync" |
||||
"unsafe" |
||||
) |
||||
|
||||
// QueueConfig defines config vars
|
||||
// for initializing a struct queue.
|
||||
type QueueConfig[StructType any] struct { |
||||
|
||||
// Indices defines indices to create
|
||||
// in the Queue for the receiving
|
||||
// generic struct parameter type.
|
||||
Indices []IndexConfig |
||||
|
||||
// Pop is called when queue values
|
||||
// are popped, during calls to any
|
||||
// of the Pop___() series of fns.
|
||||
Pop func(StructType) |
||||
} |
||||
|
||||
// Queue provides a structure model queue with
|
||||
// automated indexing and popping by any init
|
||||
// defined lookups of field combinations.
|
||||
type Queue[StructType any] struct { |
||||
|
||||
// indices used in storing passed struct
|
||||
// types by user defined sets of fields.
|
||||
indices []Index |
||||
|
||||
// main underlying
|
||||
// struct item queue.
|
||||
queue list |
||||
|
||||
// hook functions.
|
||||
copy func(StructType) StructType |
||||
pop func(StructType) |
||||
|
||||
// protective mutex, guards:
|
||||
// - Queue{}.queue
|
||||
// - Index{}.data
|
||||
// - Queue{} hook fns
|
||||
mutex sync.Mutex |
||||
} |
||||
|
||||
// Init initializes the queue with given configuration
|
||||
// including struct fields to index, and necessary fns.
|
||||
func (q *Queue[T]) Init(config QueueConfig[T]) { |
||||
t := reflect.TypeOf((*T)(nil)).Elem() |
||||
|
||||
if len(config.Indices) == 0 { |
||||
panic("no indices provided") |
||||
} |
||||
|
||||
// Safely copy over
|
||||
// provided config.
|
||||
q.mutex.Lock() |
||||
q.indices = make([]Index, len(config.Indices)) |
||||
for i, cfg := range config.Indices { |
||||
q.indices[i].ptr = unsafe.Pointer(q) |
||||
q.indices[i].init(t, cfg, 0) |
||||
} |
||||
q.pop = config.Pop |
||||
q.mutex.Unlock() |
||||
} |
||||
|
||||
// Index selects index with given name from queue, else panics.
|
||||
func (q *Queue[T]) Index(name string) *Index { |
||||
for i := range q.indices { |
||||
if q.indices[i].name == name { |
||||
return &q.indices[i] |
||||
} |
||||
} |
||||
panic("unknown index: " + name) |
||||
} |
||||
|
||||
// PopFront pops the current value at front of the queue.
|
||||
func (q *Queue[T]) PopFront() (T, bool) { |
||||
t := q.PopFrontN(1) |
||||
if len(t) == 0 { |
||||
var t T |
||||
return t, false |
||||
} |
||||
return t[0], true |
||||
} |
||||
|
||||
// PopBack pops the current value at back of the queue.
|
||||
func (q *Queue[T]) PopBack() (T, bool) { |
||||
t := q.PopBackN(1) |
||||
if len(t) == 0 { |
||||
var t T |
||||
return t, false |
||||
} |
||||
return t[0], true |
||||
} |
||||
|
||||
// PopFrontN attempts to pop n values from front of the queue.
|
||||
func (q *Queue[T]) PopFrontN(n int) []T { |
||||
return q.pop_n(n, func() *list_elem { |
||||
return q.queue.head |
||||
}) |
||||
} |
||||
|
||||
// PopBackN attempts to pop n values from back of the queue.
|
||||
func (q *Queue[T]) PopBackN(n int) []T { |
||||
return q.pop_n(n, func() *list_elem { |
||||
return q.queue.tail |
||||
}) |
||||
} |
||||
|
||||
// Pop attempts to pop values from queue indexed under any of keys.
|
||||
func (q *Queue[T]) Pop(index *Index, keys ...Key) []T { |
||||
if index == nil { |
||||
panic("no index given") |
||||
} else if index.ptr != unsafe.Pointer(q) { |
||||
panic("invalid index for queue") |
||||
} |
||||
|
||||
// Acquire lock.
|
||||
q.mutex.Lock() |
||||
|
||||
// Preallocate expected ret slice.
|
||||
values := make([]T, 0, len(keys)) |
||||
|
||||
for i := range keys { |
||||
// Delete all items under key from index, collecting
|
||||
// value items and dropping them from all their indices.
|
||||
index.delete(keys[i], func(item *indexed_item) { |
||||
|
||||
// Append deleted to values.
|
||||
value := item.data.(T) |
||||
values = append(values, value) |
||||
|
||||
// Delete queued.
|
||||
q.delete(item) |
||||
}) |
||||
} |
||||
|
||||
// Get func ptrs.
|
||||
pop := q.pop |
||||
|
||||
// Done with lock.
|
||||
q.mutex.Unlock() |
||||
|
||||
if pop != nil { |
||||
// Pass all popped values
|
||||
// to given user hook (if set).
|
||||
for _, value := range values { |
||||
pop(value) |
||||
} |
||||
} |
||||
|
||||
return values |
||||
} |
||||
|
||||
// PushFront pushes values to front of queue.
|
||||
func (q *Queue[T]) PushFront(values ...T) { |
||||
q.mutex.Lock() |
||||
for i := range values { |
||||
item := q.index(values[i]) |
||||
q.queue.push_front(&item.elem) |
||||
} |
||||
q.mutex.Unlock() |
||||
} |
||||
|
||||
// PushBack pushes values to back of queue.
|
||||
func (q *Queue[T]) PushBack(values ...T) { |
||||
q.mutex.Lock() |
||||
for i := range values { |
||||
item := q.index(values[i]) |
||||
q.queue.push_back(&item.elem) |
||||
} |
||||
q.mutex.Unlock() |
||||
} |
||||
|
||||
// MoveFront attempts to move values indexed under any of keys to the front of the queue.
|
||||
func (q *Queue[T]) MoveFront(index *Index, keys ...Key) { |
||||
q.mutex.Lock() |
||||
for i := range keys { |
||||
index.get(keys[i], func(item *indexed_item) { |
||||
q.queue.move_front(&item.elem) |
||||
}) |
||||
} |
||||
q.mutex.Unlock() |
||||
} |
||||
|
||||
// MoveBack attempts to move values indexed under any of keys to the back of the queue.
|
||||
func (q *Queue[T]) MoveBack(index *Index, keys ...Key) { |
||||
q.mutex.Lock() |
||||
for i := range keys { |
||||
index.get(keys[i], func(item *indexed_item) { |
||||
q.queue.move_back(&item.elem) |
||||
}) |
||||
} |
||||
q.mutex.Unlock() |
||||
} |
||||
|
||||
// Len returns the current length of queue.
|
||||
func (q *Queue[T]) Len() int { |
||||
q.mutex.Lock() |
||||
l := q.queue.len |
||||
q.mutex.Unlock() |
||||
return l |
||||
} |
||||
|
||||
func (q *Queue[T]) pop_n(n int, next func() *list_elem) []T { |
||||
if next == nil { |
||||
panic("nil fn") |
||||
} |
||||
|
||||
// Acquire lock.
|
||||
q.mutex.Lock() |
||||
|
||||
// Preallocate ret slice.
|
||||
values := make([]T, 0, n) |
||||
|
||||
// Iterate over 'n' items.
|
||||
for i := 0; i < n; i++ { |
||||
|
||||
// Get next elem.
|
||||
next := next() |
||||
if next == nil { |
||||
|
||||
// reached
|
||||
// end.
|
||||
break |
||||
} |
||||
|
||||
// Cast the indexed item from elem.
|
||||
item := (*indexed_item)(next.data) |
||||
|
||||
// Append deleted to values.
|
||||
value := item.data.(T) |
||||
values = append(values, value) |
||||
|
||||
// Delete queued.
|
||||
q.delete(item) |
||||
} |
||||
|
||||
// Get func ptrs.
|
||||
pop := q.pop |
||||
|
||||
// Done with lock.
|
||||
q.mutex.Unlock() |
||||
|
||||
if pop != nil { |
||||
// Pass all popped values
|
||||
// to given user hook (if set).
|
||||
for _, value := range values { |
||||
pop(value) |
||||
} |
||||
} |
||||
|
||||
return values |
||||
} |
||||
|
||||
func (q *Queue[T]) index(value T) *indexed_item { |
||||
item := new_indexed_item() |
||||
|
||||
// Set item value.
|
||||
item.data = value |
||||
|
||||
// Acquire key buf.
|
||||
buf := new_buffer() |
||||
|
||||
for i := range q.indices { |
||||
// Get current index ptr.
|
||||
idx := &(q.indices[i]) |
||||
|
||||
// Extract fields comprising index key.
|
||||
parts := extract_fields(value, idx.fields) |
||||
|
||||
// Calculate index key.
|
||||
key := idx.key(buf, parts) |
||||
if key.Zero() { |
||||
continue |
||||
} |
||||
|
||||
// Append item to index.
|
||||
idx.append(key, item) |
||||
} |
||||
|
||||
// Done with buf.
|
||||
free_buffer(buf) |
||||
|
||||
return item |
||||
} |
||||
|
||||
func (q *Queue[T]) delete(item *indexed_item) { |
||||
for len(item.indexed) != 0 { |
||||
// Pop last indexed entry from list.
|
||||
entry := item.indexed[len(item.indexed)-1] |
||||
item.indexed = item.indexed[:len(item.indexed)-1] |
||||
|
||||
// Drop index_entry from index.
|
||||
entry.index.delete_entry(entry) |
||||
} |
||||
|
||||
// Drop entry from queue list.
|
||||
q.queue.remove(&item.elem) |
||||
|
||||
// Free now-unused item.
|
||||
free_indexed_item(item) |
||||
} |
||||
@ -1,78 +0,0 @@
|
||||
package structr |
||||
|
||||
import ( |
||||
"sync" |
||||
"unsafe" |
||||
) |
||||
|
||||
var result_pool sync.Pool |
||||
|
||||
type result struct { |
||||
// linked list elem this result is
|
||||
// stored under in Cache.lruList.
|
||||
elem list_elem |
||||
|
||||
// indexed stores the indices
|
||||
// this result is stored under.
|
||||
indexed []*index_entry |
||||
|
||||
// cached data (we maintain
|
||||
// the type data here using
|
||||
// an interface as any one
|
||||
// instance can be T / error).
|
||||
data interface{} |
||||
} |
||||
|
||||
func result_acquire[T any](c *Cache[T]) *result { |
||||
// Acquire from pool.
|
||||
v := result_pool.Get() |
||||
if v == nil { |
||||
v = new(result) |
||||
} |
||||
|
||||
// Cast result value.
|
||||
res := v.(*result) |
||||
|
||||
// Push result elem to front of LRU list.
|
||||
list_push_front(&c.lruList, &res.elem) |
||||
res.elem.data = unsafe.Pointer(res) |
||||
|
||||
return res |
||||
} |
||||
|
||||
func result_release[T any](c *Cache[T], res *result) { |
||||
// Remove result elem from LRU list.
|
||||
list_remove(&c.lruList, &res.elem) |
||||
res.elem.data = nil |
||||
|
||||
// Reset all result fields.
|
||||
res.indexed = res.indexed[:0] |
||||
res.data = nil |
||||
|
||||
// Release to pool.
|
||||
result_pool.Put(res) |
||||
} |
||||
|
||||
func result_drop_index[T any](res *result, index *Index[T]) { |
||||
for i := 0; i < len(res.indexed); i++ { |
||||
|
||||
if res.indexed[i].index != unsafe.Pointer(index) { |
||||
// Prof. Obiwan:
|
||||
// this is not the index
|
||||
// we are looking for.
|
||||
continue |
||||
} |
||||
|
||||
// Get index entry ptr.
|
||||
entry := res.indexed[i] |
||||
|
||||
// Move all index entries down + reslice.
|
||||
copy(res.indexed[i:], res.indexed[i+1:]) |
||||
res.indexed = res.indexed[:len(res.indexed)-1] |
||||
|
||||
// Release to memory pool.
|
||||
index_entry_release(entry) |
||||
|
||||
return |
||||
} |
||||
} |
||||
@ -1,6 +0,0 @@
|
||||
upstream |
||||
*.pprof |
||||
xxh3.test |
||||
.vscode |
||||
*.txt |
||||
_compat |
||||
@ -1,25 +0,0 @@
|
||||
xxHash Library |
||||
Copyright (c) 2012-2014, Yann Collet |
||||
Copyright (c) 2019, Jeff Wendling |
||||
All rights reserved. |
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, |
||||
are permitted provided that the following conditions are met: |
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this |
||||
list of conditions and the following disclaimer. |
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice, this |
||||
list of conditions and the following disclaimer in the documentation and/or |
||||
other materials provided with the distribution. |
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR |
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
@ -1,27 +0,0 @@
|
||||
.PHONY: all vet |
||||
all: genasm _compat |
||||
|
||||
genasm: avo/avx.go avo/sse.go |
||||
cd ./avo; go generate gen.go
|
||||
|
||||
clean: |
||||
rm accum_vector_avx_amd64.s
|
||||
rm accum_vector_sse_amd64.s
|
||||
rm _compat
|
||||
|
||||
upstream/xxhash.o: upstream/xxhash.h |
||||
( cd upstream && make )
|
||||
|
||||
_compat: _compat.c upstream/xxhash.o |
||||
gcc -o _compat _compat.c ./upstream/xxhash.o
|
||||
|
||||
vet: |
||||
GOOS=linux GOARCH=386 GO386=softfloat go vet ./...
|
||||
GOOS=windows GOARCH=386 GO386=softfloat go vet ./...
|
||||
GOOS=linux GOARCH=amd64 go vet ./...
|
||||
GOOS=windows GOARCH=amd64 go vet ./...
|
||||
GOOS=darwin GOARCH=amd64 go vet ./...
|
||||
GOOS=linux GOARCH=arm go vet ./...
|
||||
GOOS=linux GOARCH=arm64 go vet ./...
|
||||
GOOS=windows GOARCH=arm64 go vet ./...
|
||||
GOOS=darwin GOARCH=arm64 go vet ./...
|
||||
@ -1,38 +0,0 @@
|
||||
# XXH3 |
||||
[](https://godoc.org/github.com/zeebo/xxh3) |
||||
[](https://sourcegraph.com/github.com/zeebo/xxh3?badge) |
||||
[](https://goreportcard.com/report/github.com/zeebo/xxh3) |
||||
|
||||
This package is a port of the [xxh3](https://github.com/Cyan4973/xxHash) library to Go. |
||||
|
||||
Upstream has fixed the output as of v0.8.0, and this package matches that. |
||||
|
||||
--- |
||||
|
||||
# Benchmarks |
||||
|
||||
Run on my `i7-8850H CPU @ 2.60GHz` |
||||
|
||||
## Small Sizes |
||||
|
||||
| Bytes | Rate | |
||||
|-----------|--------------------------------------| |
||||
|` 0 ` |` 0.74 ns/op ` | |
||||
|` 1-3 ` |` 4.19 ns/op (0.24 GB/s - 0.71 GB/s) `| |
||||
|` 4-8 ` |` 4.16 ns/op (0.97 GB/s - 1.98 GB/s) `| |
||||
|` 9-16 ` |` 4.46 ns/op (2.02 GB/s - 3.58 GB/s) `| |
||||
|` 17-32 ` |` 6.22 ns/op (2.76 GB/s - 5.15 GB/s) `| |
||||
|` 33-64 ` |` 8.00 ns/op (4.13 GB/s - 8.13 GB/s) `| |
||||
|` 65-96 ` |` 11.0 ns/op (5.91 GB/s - 8.84 GB/s) `| |
||||
|` 97-128 ` |` 12.8 ns/op (7.68 GB/s - 10.0 GB/s) `| |
||||
|
||||
## Large Sizes |
||||
|
||||
| Bytes | Rate | SSE2 Rate | AVX2 Rate | |
||||
|---------|--------------------------|--------------------------|--------------------------| |
||||
|` 129 ` |` 13.6 ns/op (9.45 GB/s) `| | | |
||||
|` 240 ` |` 23.8 ns/op (10.1 GB/s) `| | | |
||||
|` 241 ` |` 40.5 ns/op (5.97 GB/s) `|` 23.3 ns/op (10.4 GB/s) `|` 20.1 ns/op (12.0 GB/s) `| |
||||
|` 512 ` |` 69.8 ns/op (7.34 GB/s) `|` 30.4 ns/op (16.9 GB/s) `|` 24.7 ns/op (20.7 GB/s) `| |
||||
|` 1024 ` |` 132 ns/op (7.77 GB/s) `|` 48.9 ns/op (20.9 GB/s) `|` 37.7 ns/op (27.2 GB/s) `| |
||||
|` 100KB `|` 13.0 us/op (7.88 GB/s) `|` 4.05 us/op (25.3 GB/s) `|` 2.31 us/op (44.3 GB/s) `| |
||||
@ -1,39 +0,0 @@
|
||||
#include "upstream/xxhash.h" |
||||
#include <stdio.h> |
||||
|
||||
int main() { |
||||
unsigned char buf[4096]; |
||||
for (int i = 0; i < 4096; i++) { |
||||
buf[i] = (unsigned char)((i+1)%251); |
||||
} |
||||
|
||||
printf("var testVecs64 = []uint64{\n"); |
||||
for (int i = 0; i < 4096; i++) { |
||||
if (i % 4 == 0) { |
||||
printf("\t"); |
||||
} |
||||
|
||||
uint64_t h = XXH3_64bits(buf, (size_t)i); |
||||
printf("0x%lx, ", h); |
||||
|
||||
if (i % 4 == 3) { |
||||
printf("\n\t"); |
||||
} |
||||
} |
||||
printf("}\n\n"); |
||||
|
||||
printf("var testVecs128 = [][2]uint64{\n"); |
||||
for (int i = 0; i < 4096; i++) { |
||||
if (i % 4 == 0) { |
||||
printf("\t"); |
||||
} |
||||
|
||||
XXH128_hash_t h = XXH3_128bits(buf, (size_t)i); |
||||
printf("{0x%lx, 0x%lx}, ", h.high64, h.low64); |
||||
|
||||
if (i % 4 == 3) { |
||||
printf("\n"); |
||||
} |
||||
} |
||||
printf("}\n\n"); |
||||
} |
||||
@ -1,542 +0,0 @@
|
||||
package xxh3 |
||||
|
||||
// avx512Switch is the size at which the avx512 code is used.
|
||||
// Bigger blocks benefit more.
|
||||
const avx512Switch = 1 << 10 |
||||
|
||||
func accumScalar(accs *[8]u64, p, secret ptr, l u64) { |
||||
if secret != key { |
||||
accumScalarSeed(accs, p, secret, l) |
||||
return |
||||
} |
||||
for l > _block { |
||||
k := secret |
||||
|
||||
// accs
|
||||
for i := 0; i < 16; i++ { |
||||
dv0 := readU64(p, 8*0) |
||||
dk0 := dv0 ^ readU64(k, 8*0) |
||||
accs[1] += dv0 |
||||
accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) |
||||
|
||||
dv1 := readU64(p, 8*1) |
||||
dk1 := dv1 ^ readU64(k, 8*1) |
||||
accs[0] += dv1 |
||||
accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) |
||||
|
||||
dv2 := readU64(p, 8*2) |
||||
dk2 := dv2 ^ readU64(k, 8*2) |
||||
accs[3] += dv2 |
||||
accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) |
||||
|
||||
dv3 := readU64(p, 8*3) |
||||
dk3 := dv3 ^ readU64(k, 8*3) |
||||
accs[2] += dv3 |
||||
accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) |
||||
|
||||
dv4 := readU64(p, 8*4) |
||||
dk4 := dv4 ^ readU64(k, 8*4) |
||||
accs[5] += dv4 |
||||
accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) |
||||
|
||||
dv5 := readU64(p, 8*5) |
||||
dk5 := dv5 ^ readU64(k, 8*5) |
||||
accs[4] += dv5 |
||||
accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) |
||||
|
||||
dv6 := readU64(p, 8*6) |
||||
dk6 := dv6 ^ readU64(k, 8*6) |
||||
accs[7] += dv6 |
||||
accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) |
||||
|
||||
dv7 := readU64(p, 8*7) |
||||
dk7 := dv7 ^ readU64(k, 8*7) |
||||
accs[6] += dv7 |
||||
accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) |
||||
|
||||
l -= _stripe |
||||
if l > 0 { |
||||
p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8) |
||||
} |
||||
} |
||||
|
||||
// scramble accs
|
||||
accs[0] ^= accs[0] >> 47 |
||||
accs[0] ^= key64_128 |
||||
accs[0] *= prime32_1 |
||||
|
||||
accs[1] ^= accs[1] >> 47 |
||||
accs[1] ^= key64_136 |
||||
accs[1] *= prime32_1 |
||||
|
||||
accs[2] ^= accs[2] >> 47 |
||||
accs[2] ^= key64_144 |
||||
accs[2] *= prime32_1 |
||||
|
||||
accs[3] ^= accs[3] >> 47 |
||||
accs[3] ^= key64_152 |
||||
accs[3] *= prime32_1 |
||||
|
||||
accs[4] ^= accs[4] >> 47 |
||||
accs[4] ^= key64_160 |
||||
accs[4] *= prime32_1 |
||||
|
||||
accs[5] ^= accs[5] >> 47 |
||||
accs[5] ^= key64_168 |
||||
accs[5] *= prime32_1 |
||||
|
||||
accs[6] ^= accs[6] >> 47 |
||||
accs[6] ^= key64_176 |
||||
accs[6] *= prime32_1 |
||||
|
||||
accs[7] ^= accs[7] >> 47 |
||||
accs[7] ^= key64_184 |
||||
accs[7] *= prime32_1 |
||||
} |
||||
|
||||
if l > 0 { |
||||
t, k := (l-1)/_stripe, secret |
||||
|
||||
for i := u64(0); i < t; i++ { |
||||
dv0 := readU64(p, 8*0) |
||||
dk0 := dv0 ^ readU64(k, 8*0) |
||||
accs[1] += dv0 |
||||
accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) |
||||
|
||||
dv1 := readU64(p, 8*1) |
||||
dk1 := dv1 ^ readU64(k, 8*1) |
||||
accs[0] += dv1 |
||||
accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) |
||||
|
||||
dv2 := readU64(p, 8*2) |
||||
dk2 := dv2 ^ readU64(k, 8*2) |
||||
accs[3] += dv2 |
||||
accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) |
||||
|
||||
dv3 := readU64(p, 8*3) |
||||
dk3 := dv3 ^ readU64(k, 8*3) |
||||
accs[2] += dv3 |
||||
accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) |
||||
|
||||
dv4 := readU64(p, 8*4) |
||||
dk4 := dv4 ^ readU64(k, 8*4) |
||||
accs[5] += dv4 |
||||
accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) |
||||
|
||||
dv5 := readU64(p, 8*5) |
||||
dk5 := dv5 ^ readU64(k, 8*5) |
||||
accs[4] += dv5 |
||||
accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) |
||||
|
||||
dv6 := readU64(p, 8*6) |
||||
dk6 := dv6 ^ readU64(k, 8*6) |
||||
accs[7] += dv6 |
||||
accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) |
||||
|
||||
dv7 := readU64(p, 8*7) |
||||
dk7 := dv7 ^ readU64(k, 8*7) |
||||
accs[6] += dv7 |
||||
accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) |
||||
|
||||
l -= _stripe |
||||
if l > 0 { |
||||
p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8) |
||||
} |
||||
} |
||||
|
||||
if l > 0 { |
||||
p = ptr(ui(p) - uintptr(_stripe-l)) |
||||
|
||||
dv0 := readU64(p, 8*0) |
||||
dk0 := dv0 ^ key64_121 |
||||
accs[1] += dv0 |
||||
accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) |
||||
|
||||
dv1 := readU64(p, 8*1) |
||||
dk1 := dv1 ^ key64_129 |
||||
accs[0] += dv1 |
||||
accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) |
||||
|
||||
dv2 := readU64(p, 8*2) |
||||
dk2 := dv2 ^ key64_137 |
||||
accs[3] += dv2 |
||||
accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) |
||||
|
||||
dv3 := readU64(p, 8*3) |
||||
dk3 := dv3 ^ key64_145 |
||||
accs[2] += dv3 |
||||
accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) |
||||
|
||||
dv4 := readU64(p, 8*4) |
||||
dk4 := dv4 ^ key64_153 |
||||
accs[5] += dv4 |
||||
accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) |
||||
|
||||
dv5 := readU64(p, 8*5) |
||||
dk5 := dv5 ^ key64_161 |
||||
accs[4] += dv5 |
||||
accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) |
||||
|
||||
dv6 := readU64(p, 8*6) |
||||
dk6 := dv6 ^ key64_169 |
||||
accs[7] += dv6 |
||||
accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) |
||||
|
||||
dv7 := readU64(p, 8*7) |
||||
dk7 := dv7 ^ key64_177 |
||||
accs[6] += dv7 |
||||
accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) |
||||
} |
||||
} |
||||
} |
||||
|
||||
func accumBlockScalar(accs *[8]u64, p, secret ptr) { |
||||
if secret != key { |
||||
accumBlockScalarSeed(accs, p, secret) |
||||
return |
||||
} |
||||
// accs
|
||||
for i := 0; i < 16; i++ { |
||||
dv0 := readU64(p, 8*0) |
||||
dk0 := dv0 ^ readU64(secret, 8*0) |
||||
accs[1] += dv0 |
||||
accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) |
||||
|
||||
dv1 := readU64(p, 8*1) |
||||
dk1 := dv1 ^ readU64(secret, 8*1) |
||||
accs[0] += dv1 |
||||
accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) |
||||
|
||||
dv2 := readU64(p, 8*2) |
||||
dk2 := dv2 ^ readU64(secret, 8*2) |
||||
accs[3] += dv2 |
||||
accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) |
||||
|
||||
dv3 := readU64(p, 8*3) |
||||
dk3 := dv3 ^ readU64(secret, 8*3) |
||||
accs[2] += dv3 |
||||
accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) |
||||
|
||||
dv4 := readU64(p, 8*4) |
||||
dk4 := dv4 ^ readU64(secret, 8*4) |
||||
accs[5] += dv4 |
||||
accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) |
||||
|
||||
dv5 := readU64(p, 8*5) |
||||
dk5 := dv5 ^ readU64(secret, 8*5) |
||||
accs[4] += dv5 |
||||
accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) |
||||
|
||||
dv6 := readU64(p, 8*6) |
||||
dk6 := dv6 ^ readU64(secret, 8*6) |
||||
accs[7] += dv6 |
||||
accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) |
||||
|
||||
dv7 := readU64(p, 8*7) |
||||
dk7 := dv7 ^ readU64(secret, 8*7) |
||||
accs[6] += dv7 |
||||
accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) |
||||
|
||||
p, secret = ptr(ui(p)+_stripe), ptr(ui(secret)+8) |
||||
} |
||||
|
||||
// scramble accs
|
||||
accs[0] ^= accs[0] >> 47 |
||||
accs[0] ^= key64_128 |
||||
accs[0] *= prime32_1 |
||||
|
||||
accs[1] ^= accs[1] >> 47 |
||||
accs[1] ^= key64_136 |
||||
accs[1] *= prime32_1 |
||||
|
||||
accs[2] ^= accs[2] >> 47 |
||||
accs[2] ^= key64_144 |
||||
accs[2] *= prime32_1 |
||||
|
||||
accs[3] ^= accs[3] >> 47 |
||||
accs[3] ^= key64_152 |
||||
accs[3] *= prime32_1 |
||||
|
||||
accs[4] ^= accs[4] >> 47 |
||||
accs[4] ^= key64_160 |
||||
accs[4] *= prime32_1 |
||||
|
||||
accs[5] ^= accs[5] >> 47 |
||||
accs[5] ^= key64_168 |
||||
accs[5] *= prime32_1 |
||||
|
||||
accs[6] ^= accs[6] >> 47 |
||||
accs[6] ^= key64_176 |
||||
accs[6] *= prime32_1 |
||||
|
||||
accs[7] ^= accs[7] >> 47 |
||||
accs[7] ^= key64_184 |
||||
accs[7] *= prime32_1 |
||||
} |
||||
|
||||
// accumScalarSeed should be used with custom key.
|
||||
func accumScalarSeed(accs *[8]u64, p, secret ptr, l u64) { |
||||
for l > _block { |
||||
k := secret |
||||
|
||||
// accs
|
||||
for i := 0; i < 16; i++ { |
||||
dv0 := readU64(p, 8*0) |
||||
dk0 := dv0 ^ readU64(k, 8*0) |
||||
accs[1] += dv0 |
||||
accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) |
||||
|
||||
dv1 := readU64(p, 8*1) |
||||
dk1 := dv1 ^ readU64(k, 8*1) |
||||
accs[0] += dv1 |
||||
accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) |
||||
|
||||
dv2 := readU64(p, 8*2) |
||||
dk2 := dv2 ^ readU64(k, 8*2) |
||||
accs[3] += dv2 |
||||
accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) |
||||
|
||||
dv3 := readU64(p, 8*3) |
||||
dk3 := dv3 ^ readU64(k, 8*3) |
||||
accs[2] += dv3 |
||||
accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) |
||||
|
||||
dv4 := readU64(p, 8*4) |
||||
dk4 := dv4 ^ readU64(k, 8*4) |
||||
accs[5] += dv4 |
||||
accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) |
||||
|
||||
dv5 := readU64(p, 8*5) |
||||
dk5 := dv5 ^ readU64(k, 8*5) |
||||
accs[4] += dv5 |
||||
accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) |
||||
|
||||
dv6 := readU64(p, 8*6) |
||||
dk6 := dv6 ^ readU64(k, 8*6) |
||||
accs[7] += dv6 |
||||
accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) |
||||
|
||||
dv7 := readU64(p, 8*7) |
||||
dk7 := dv7 ^ readU64(k, 8*7) |
||||
accs[6] += dv7 |
||||
accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) |
||||
|
||||
l -= _stripe |
||||
if l > 0 { |
||||
p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8) |
||||
} |
||||
} |
||||
|
||||
// scramble accs
|
||||
accs[0] ^= accs[0] >> 47 |
||||
accs[0] ^= readU64(secret, 128) |
||||
accs[0] *= prime32_1 |
||||
|
||||
accs[1] ^= accs[1] >> 47 |
||||
accs[1] ^= readU64(secret, 136) |
||||
accs[1] *= prime32_1 |
||||
|
||||
accs[2] ^= accs[2] >> 47 |
||||
accs[2] ^= readU64(secret, 144) |
||||
accs[2] *= prime32_1 |
||||
|
||||
accs[3] ^= accs[3] >> 47 |
||||
accs[3] ^= readU64(secret, 152) |
||||
accs[3] *= prime32_1 |
||||
|
||||
accs[4] ^= accs[4] >> 47 |
||||
accs[4] ^= readU64(secret, 160) |
||||
accs[4] *= prime32_1 |
||||
|
||||
accs[5] ^= accs[5] >> 47 |
||||
accs[5] ^= readU64(secret, 168) |
||||
accs[5] *= prime32_1 |
||||
|
||||
accs[6] ^= accs[6] >> 47 |
||||
accs[6] ^= readU64(secret, 176) |
||||
accs[6] *= prime32_1 |
||||
|
||||
accs[7] ^= accs[7] >> 47 |
||||
accs[7] ^= readU64(secret, 184) |
||||
accs[7] *= prime32_1 |
||||
} |
||||
|
||||
if l > 0 { |
||||
t, k := (l-1)/_stripe, secret |
||||
|
||||
for i := u64(0); i < t; i++ { |
||||
dv0 := readU64(p, 8*0) |
||||
dk0 := dv0 ^ readU64(k, 8*0) |
||||
accs[1] += dv0 |
||||
accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) |
||||
|
||||
dv1 := readU64(p, 8*1) |
||||
dk1 := dv1 ^ readU64(k, 8*1) |
||||
accs[0] += dv1 |
||||
accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) |
||||
|
||||
dv2 := readU64(p, 8*2) |
||||
dk2 := dv2 ^ readU64(k, 8*2) |
||||
accs[3] += dv2 |
||||
accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) |
||||
|
||||
dv3 := readU64(p, 8*3) |
||||
dk3 := dv3 ^ readU64(k, 8*3) |
||||
accs[2] += dv3 |
||||
accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) |
||||
|
||||
dv4 := readU64(p, 8*4) |
||||
dk4 := dv4 ^ readU64(k, 8*4) |
||||
accs[5] += dv4 |
||||
accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) |
||||
|
||||
dv5 := readU64(p, 8*5) |
||||
dk5 := dv5 ^ readU64(k, 8*5) |
||||
accs[4] += dv5 |
||||
accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) |
||||
|
||||
dv6 := readU64(p, 8*6) |
||||
dk6 := dv6 ^ readU64(k, 8*6) |
||||
accs[7] += dv6 |
||||
accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) |
||||
|
||||
dv7 := readU64(p, 8*7) |
||||
dk7 := dv7 ^ readU64(k, 8*7) |
||||
accs[6] += dv7 |
||||
accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) |
||||
|
||||
l -= _stripe |
||||
if l > 0 { |
||||
p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8) |
||||
} |
||||
} |
||||
|
||||
if l > 0 { |
||||
p = ptr(ui(p) - uintptr(_stripe-l)) |
||||
|
||||
dv0 := readU64(p, 8*0) |
||||
dk0 := dv0 ^ readU64(secret, 121) |
||||
accs[1] += dv0 |
||||
accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) |
||||
|
||||
dv1 := readU64(p, 8*1) |
||||
dk1 := dv1 ^ readU64(secret, 129) |
||||
accs[0] += dv1 |
||||
accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) |
||||
|
||||
dv2 := readU64(p, 8*2) |
||||
dk2 := dv2 ^ readU64(secret, 137) |
||||
accs[3] += dv2 |
||||
accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) |
||||
|
||||
dv3 := readU64(p, 8*3) |
||||
dk3 := dv3 ^ readU64(secret, 145) |
||||
accs[2] += dv3 |
||||
accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) |
||||
|
||||
dv4 := readU64(p, 8*4) |
||||
dk4 := dv4 ^ readU64(secret, 153) |
||||
accs[5] += dv4 |
||||
accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) |
||||
|
||||
dv5 := readU64(p, 8*5) |
||||
dk5 := dv5 ^ readU64(secret, 161) |
||||
accs[4] += dv5 |
||||
accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) |
||||
|
||||
dv6 := readU64(p, 8*6) |
||||
dk6 := dv6 ^ readU64(secret, 169) |
||||
accs[7] += dv6 |
||||
accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) |
||||
|
||||
dv7 := readU64(p, 8*7) |
||||
dk7 := dv7 ^ readU64(secret, 177) |
||||
accs[6] += dv7 |
||||
accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) |
||||
} |
||||
} |
||||
} |
||||
|
||||
// accumBlockScalarSeed should be used with custom key.
|
||||
func accumBlockScalarSeed(accs *[8]u64, p, secret ptr) { |
||||
// accs
|
||||
{ |
||||
secret := secret |
||||
for i := 0; i < 16; i++ { |
||||
dv0 := readU64(p, 8*0) |
||||
dk0 := dv0 ^ readU64(secret, 8*0) |
||||
accs[1] += dv0 |
||||
accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) |
||||
|
||||
dv1 := readU64(p, 8*1) |
||||
dk1 := dv1 ^ readU64(secret, 8*1) |
||||
accs[0] += dv1 |
||||
accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) |
||||
|
||||
dv2 := readU64(p, 8*2) |
||||
dk2 := dv2 ^ readU64(secret, 8*2) |
||||
accs[3] += dv2 |
||||
accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) |
||||
|
||||
dv3 := readU64(p, 8*3) |
||||
dk3 := dv3 ^ readU64(secret, 8*3) |
||||
accs[2] += dv3 |
||||
accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) |
||||
|
||||
dv4 := readU64(p, 8*4) |
||||
dk4 := dv4 ^ readU64(secret, 8*4) |
||||
accs[5] += dv4 |
||||
accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) |
||||
|
||||
dv5 := readU64(p, 8*5) |
||||
dk5 := dv5 ^ readU64(secret, 8*5) |
||||
accs[4] += dv5 |
||||
accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) |
||||
|
||||
dv6 := readU64(p, 8*6) |
||||
dk6 := dv6 ^ readU64(secret, 8*6) |
||||
accs[7] += dv6 |
||||
accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) |
||||
|
||||
dv7 := readU64(p, 8*7) |
||||
dk7 := dv7 ^ readU64(secret, 8*7) |
||||
accs[6] += dv7 |
||||
accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) |
||||
|
||||
p, secret = ptr(ui(p)+_stripe), ptr(ui(secret)+8) |
||||
} |
||||
} |
||||
|
||||
// scramble accs
|
||||
accs[0] ^= accs[0] >> 47 |
||||
accs[0] ^= readU64(secret, 128) |
||||
accs[0] *= prime32_1 |
||||
|
||||
accs[1] ^= accs[1] >> 47 |
||||
accs[1] ^= readU64(secret, 136) |
||||
accs[1] *= prime32_1 |
||||
|
||||
accs[2] ^= accs[2] >> 47 |
||||
accs[2] ^= readU64(secret, 144) |
||||
accs[2] *= prime32_1 |
||||
|
||||
accs[3] ^= accs[3] >> 47 |
||||
accs[3] ^= readU64(secret, 152) |
||||
accs[3] *= prime32_1 |
||||
|
||||
accs[4] ^= accs[4] >> 47 |
||||
accs[4] ^= readU64(secret, 160) |
||||
accs[4] *= prime32_1 |
||||
|
||||
accs[5] ^= accs[5] >> 47 |
||||
accs[5] ^= readU64(secret, 168) |
||||
accs[5] *= prime32_1 |
||||
|
||||
accs[6] ^= accs[6] >> 47 |
||||
accs[6] ^= readU64(secret, 176) |
||||
accs[6] *= prime32_1 |
||||
|
||||
accs[7] ^= accs[7] >> 47 |
||||
accs[7] ^= readU64(secret, 184) |
||||
accs[7] *= prime32_1 |
||||
} |
||||
@ -1,40 +0,0 @@
|
||||
package xxh3 |
||||
|
||||
import ( |
||||
"unsafe" |
||||
|
||||
"github.com/klauspost/cpuid/v2" |
||||
) |
||||
|
||||
var ( |
||||
hasAVX2 = cpuid.CPU.Has(cpuid.AVX2) |
||||
hasSSE2 = cpuid.CPU.Has(cpuid.SSE2) // Always true on amd64
|
||||
hasAVX512 = cpuid.CPU.Has(cpuid.AVX512F) |
||||
) |
||||
|
||||
//go:noescape
|
||||
func accumAVX2(acc *[8]u64, data, key unsafe.Pointer, len u64) |
||||
|
||||
//go:noescape
|
||||
func accumAVX512(acc *[8]u64, data, key unsafe.Pointer, len u64) |
||||
|
||||
//go:noescape
|
||||
func accumSSE(acc *[8]u64, data, key unsafe.Pointer, len u64) |
||||
|
||||
//go:noescape
|
||||
func accumBlockAVX2(acc *[8]u64, data, key unsafe.Pointer) |
||||
|
||||
//go:noescape
|
||||
func accumBlockSSE(acc *[8]u64, data, key unsafe.Pointer) |
||||
|
||||
func withOverrides(avx512, avx2, sse2 bool, cb func()) { |
||||
avx512Orig, avx2Orig, sse2Orig := hasAVX512, hasAVX2, hasSSE2 |
||||
hasAVX512, hasAVX2, hasSSE2 = avx512, avx2, sse2 |
||||
defer func() { hasAVX512, hasAVX2, hasSSE2 = avx512Orig, avx2Orig, sse2Orig }() |
||||
cb() |
||||
} |
||||
|
||||
func withAVX512(cb func()) { withOverrides(hasAVX512, false, false, cb) } |
||||
func withAVX2(cb func()) { withOverrides(false, hasAVX2, false, cb) } |
||||
func withSSE2(cb func()) { withOverrides(false, false, hasSSE2, cb) } |
||||
func withGeneric(cb func()) { withOverrides(false, false, false, cb) } |
||||
@ -1,25 +0,0 @@
|
||||
//go:build !amd64
|
||||
// +build !amd64
|
||||
|
||||
package xxh3 |
||||
|
||||
import ( |
||||
"unsafe" |
||||
) |
||||
|
||||
const ( |
||||
hasAVX2 = false |
||||
hasSSE2 = false |
||||
hasAVX512 = false |
||||
) |
||||
|
||||
func accumAVX2(acc *[8]u64, data, key unsafe.Pointer, len u64) { panic("unreachable") } |
||||
func accumSSE(acc *[8]u64, data, key unsafe.Pointer, len u64) { panic("unreachable") } |
||||
func accumBlockAVX2(acc *[8]u64, data, key unsafe.Pointer) { panic("unreachable") } |
||||
func accumBlockSSE(acc *[8]u64, data, key unsafe.Pointer) { panic("unreachable") } |
||||
func accumAVX512(acc *[8]u64, data, key unsafe.Pointer, len u64) { panic("unreachable") } |
||||
|
||||
func withAVX512(cb func()) { cb() } |
||||
func withAVX2(cb func()) { cb() } |
||||
func withSSE2(cb func()) { cb() } |
||||
func withGeneric(cb func()) { cb() } |
||||
@ -1,379 +0,0 @@
|
||||
// Code generated by command: go run gen.go -avx512 -out ../accum_vector_avx512_amd64.s -pkg xxh3. DO NOT EDIT. |
||||
|
||||
#include "textflag.h" |
||||
|
||||
DATA prime_avx512<>+0(SB)/8, $0x000000009e3779b1 |
||||
DATA prime_avx512<>+8(SB)/8, $0x000000009e3779b1 |
||||
DATA prime_avx512<>+16(SB)/8, $0x000000009e3779b1 |
||||
DATA prime_avx512<>+24(SB)/8, $0x000000009e3779b1 |
||||
DATA prime_avx512<>+32(SB)/8, $0x000000009e3779b1 |
||||
DATA prime_avx512<>+40(SB)/8, $0x000000009e3779b1 |
||||
DATA prime_avx512<>+48(SB)/8, $0x000000009e3779b1 |
||||
DATA prime_avx512<>+56(SB)/8, $0x000000009e3779b1 |
||||
GLOBL prime_avx512<>(SB), RODATA|NOPTR, $64 |
||||
|
||||
// func accumAVX512(acc *[8]uint64, data *byte, key *byte, len uint64) |
||||
// Requires: AVX, AVX512F, MMX+ |
||||
TEXT ·accumAVX512(SB), NOSPLIT, $0-32 |
||||
MOVQ acc+0(FP), AX |
||||
MOVQ data+8(FP), CX |
||||
MOVQ key+16(FP), DX |
||||
MOVQ len+24(FP), BX |
||||
VMOVDQU64 (AX), Z1 |
||||
VMOVDQU64 prime_avx512<>+0(SB), Z0 |
||||
VMOVDQU64 (DX), Z2 |
||||
VMOVDQU64 8(DX), Z3 |
||||
VMOVDQU64 16(DX), Z4 |
||||
VMOVDQU64 24(DX), Z5 |
||||
VMOVDQU64 32(DX), Z6 |
||||
VMOVDQU64 40(DX), Z7 |
||||
VMOVDQU64 48(DX), Z8 |
||||
VMOVDQU64 56(DX), Z9 |
||||
VMOVDQU64 64(DX), Z10 |
||||
VMOVDQU64 72(DX), Z11 |
||||
VMOVDQU64 80(DX), Z12 |
||||
VMOVDQU64 88(DX), Z13 |
||||
VMOVDQU64 96(DX), Z14 |
||||
VMOVDQU64 104(DX), Z15 |
||||
VMOVDQU64 112(DX), Z16 |
||||
VMOVDQU64 120(DX), Z17 |
||||
VMOVDQU64 128(DX), Z18 |
||||
VMOVDQU64 121(DX), Z19 |
||||
|
||||
accum_large: |
||||
CMPQ BX, $0x00000400 |
||||
JLE accum |
||||
VMOVDQU64 (CX), Z20 |
||||
PREFETCHT0 1024(CX) |
||||
VPXORD Z2, Z20, Z21 |
||||
VPSHUFD $0x31, Z21, Z22 |
||||
VPMULUDQ Z21, Z22, Z21 |
||||
VPSHUFD $0x4e, Z20, Z20 |
||||
VPADDQ Z1, Z20, Z1 |
||||
VPADDQ Z1, Z21, Z1 |
||||
VMOVDQU64 64(CX), Z20 |
||||
PREFETCHT0 1088(CX) |
||||
VPXORD Z3, Z20, Z21 |
||||
VPSHUFD $0x31, Z21, Z22 |
||||
VPMULUDQ Z21, Z22, Z21 |
||||
VPSHUFD $0x4e, Z20, Z20 |
||||
VPADDQ Z1, Z20, Z1 |
||||
VPADDQ Z1, Z21, Z1 |
||||
VMOVDQU64 128(CX), Z20 |
||||
PREFETCHT0 1152(CX) |
||||
VPXORD Z4, Z20, Z21 |
||||
VPSHUFD $0x31, Z21, Z22 |
||||
VPMULUDQ Z21, Z22, Z21 |
||||
VPSHUFD $0x4e, Z20, Z20 |
||||
VPADDQ Z1, Z20, Z1 |
||||
VPADDQ Z1, Z21, Z1 |
||||
VMOVDQU64 192(CX), Z20 |
||||
PREFETCHT0 1216(CX) |
||||
VPXORD Z5, Z20, Z21 |
||||
VPSHUFD $0x31, Z21, Z22 |
||||
VPMULUDQ Z21, Z22, Z21 |
||||
VPSHUFD $0x4e, Z20, Z20 |
||||
VPADDQ Z1, Z20, Z1 |
||||
VPADDQ Z1, Z21, Z1 |
||||
VMOVDQU64 256(CX), Z20 |
||||
PREFETCHT0 1280(CX) |
||||
VPXORD Z6, Z20, Z21 |
||||
VPSHUFD $0x31, Z21, Z22 |
||||
VPMULUDQ Z21, Z22, Z21 |
||||
VPSHUFD $0x4e, Z20, Z20 |
||||
VPADDQ Z1, Z20, Z1 |
||||
VPADDQ Z1, Z21, Z1 |
||||
VMOVDQU64 320(CX), Z20 |
||||
PREFETCHT0 1344(CX) |
||||
VPXORD Z7, Z20, Z21 |
||||
VPSHUFD $0x31, Z21, Z22 |
||||
VPMULUDQ Z21, Z22, Z21 |
||||
VPSHUFD $0x4e, Z20, Z20 |
||||
VPADDQ Z1, Z20, Z1 |
||||
VPADDQ Z1, Z21, Z1 |
||||
VMOVDQU64 384(CX), Z20 |
||||
PREFETCHT0 1408(CX) |
||||
VPXORD Z8, Z20, Z21 |
||||
VPSHUFD $0x31, Z21, Z22 |
||||
VPMULUDQ Z21, Z22, Z21 |
||||
VPSHUFD $0x4e, Z20, Z20 |
||||
VPADDQ Z1, Z20, Z1 |
||||
VPADDQ Z1, Z21, Z1 |
||||
VMOVDQU64 448(CX), Z20 |
||||
PREFETCHT0 1472(CX) |
||||
VPXORD Z9, Z20, Z21 |
||||
VPSHUFD $0x31, Z21, Z22 |
||||
VPMULUDQ Z21, Z22, Z21 |
||||
VPSHUFD $0x4e, Z20, Z20 |
||||
VPADDQ Z1, Z20, Z1 |
||||
VPADDQ Z1, Z21, Z1 |
||||
VMOVDQU64 512(CX), Z20 |
||||
PREFETCHT0 1536(CX) |
||||
VPXORD Z10, Z20, Z21 |
||||
VPSHUFD $0x31, Z21, Z22 |
||||
VPMULUDQ Z21, Z22, Z21 |
||||
VPSHUFD $0x4e, Z20, Z20 |
||||
VPADDQ Z1, Z20, Z1 |
||||
VPADDQ Z1, Z21, Z1 |
||||
VMOVDQU64 576(CX), Z20 |
||||
PREFETCHT0 1600(CX) |
||||
VPXORD Z11, Z20, Z21 |
||||
VPSHUFD $0x31, Z21, Z22 |
||||
VPMULUDQ Z21, Z22, Z21 |
||||
VPSHUFD $0x4e, Z20, Z20 |
||||
VPADDQ Z1, Z20, Z1 |
||||
VPADDQ Z1, Z21, Z1 |
||||
VMOVDQU64 640(CX), Z20 |
||||
PREFETCHT0 1664(CX) |
||||
VPXORD Z12, Z20, Z21 |
||||
VPSHUFD $0x31, Z21, Z22 |
||||
VPMULUDQ Z21, Z22, Z21 |
||||
VPSHUFD $0x4e, Z20, Z20 |
||||
VPADDQ Z1, Z20, Z1 |
||||
VPADDQ Z1, Z21, Z1 |
||||
VMOVDQU64 704(CX), Z20 |
||||
PREFETCHT0 1728(CX) |
||||
VPXORD Z13, Z20, Z21 |
||||
VPSHUFD $0x31, Z21, Z22 |
||||
VPMULUDQ Z21, Z22, Z21 |
||||
VPSHUFD $0x4e, Z20, Z20 |
||||
VPADDQ Z1, Z20, Z1 |
||||
VPADDQ Z1, Z21, Z1 |
||||
VMOVDQU64 768(CX), Z20 |
||||
PREFETCHT0 1792(CX) |
||||
VPXORD Z14, Z20, Z21 |
||||
VPSHUFD $0x31, Z21, Z22 |
||||
VPMULUDQ Z21, Z22, Z21 |
||||
VPSHUFD $0x4e, Z20, Z20 |
||||
VPADDQ Z1, Z20, Z1 |
||||
VPADDQ Z1, Z21, Z1 |
||||
VMOVDQU64 832(CX), Z20 |
||||
PREFETCHT0 1856(CX) |
||||
VPXORD Z15, Z20, Z21 |
||||
VPSHUFD $0x31, Z21, Z22 |
||||
VPMULUDQ Z21, Z22, Z21 |
||||
VPSHUFD $0x4e, Z20, Z20 |
||||
VPADDQ Z1, Z20, Z1 |
||||
VPADDQ Z1, Z21, Z1 |
||||
VMOVDQU64 896(CX), Z20 |
||||
PREFETCHT0 1920(CX) |
||||
VPXORD Z16, Z20, Z21 |
||||
VPSHUFD $0x31, Z21, Z22 |
||||
VPMULUDQ Z21, Z22, Z21 |
||||
VPSHUFD $0x4e, Z20, Z20 |
||||
VPADDQ Z1, Z20, Z1 |
||||
VPADDQ Z1, Z21, Z1 |
||||
VMOVDQU64 960(CX), Z20 |
||||
PREFETCHT0 1984(CX) |
||||
VPXORD Z17, Z20, Z21 |
||||
VPSHUFD $0x31, Z21, Z22 |
||||
VPMULUDQ Z21, Z22, Z21 |
||||
VPSHUFD $0x4e, Z20, Z20 |
||||
VPADDQ Z1, Z20, Z1 |
||||
VPADDQ Z1, Z21, Z1 |
||||
ADDQ $0x00000400, CX |
||||
SUBQ $0x00000400, BX |
||||
VPSRLQ $0x2f, Z1, Z20 |
||||
VPTERNLOGD $0x96, Z1, Z18, Z20 |
||||
VPMULUDQ Z0, Z20, Z1 |
||||
VPSHUFD $0xf5, Z20, Z20 |
||||
VPMULUDQ Z0, Z20, Z20 |
||||
VPSLLQ $0x20, Z20, Z20 |
||||
VPADDQ Z1, Z20, Z1 |
||||
JMP accum_large |
||||
|
||||
accum: |
||||
CMPQ BX, $0x40 |
||||
JLE finalize |
||||
VMOVDQU64 (CX), Z0 |
||||
VPXORD Z2, Z0, Z2 |
||||
VPSHUFD $0x31, Z2, Z18 |
||||
VPMULUDQ Z2, Z18, Z2 |
||||
VPSHUFD $0x4e, Z0, Z0 |
||||
VPADDQ Z1, Z0, Z1 |
||||
VPADDQ Z1, Z2, Z1 |
||||
ADDQ $0x00000040, CX |
||||
SUBQ $0x00000040, BX |
||||
CMPQ BX, $0x40 |
||||
JLE finalize |
||||
VMOVDQU64 (CX), Z0 |
||||
VPXORD Z3, Z0, Z2 |
||||
VPSHUFD $0x31, Z2, Z3 |
||||
VPMULUDQ Z2, Z3, Z2 |
||||
VPSHUFD $0x4e, Z0, Z0 |
||||
VPADDQ Z1, Z0, Z1 |
||||
VPADDQ Z1, Z2, Z1 |
||||
ADDQ $0x00000040, CX |
||||
SUBQ $0x00000040, BX |
||||
CMPQ BX, $0x40 |
||||
JLE finalize |
||||
VMOVDQU64 (CX), Z0 |
||||
VPXORD Z4, Z0, Z2 |
||||
VPSHUFD $0x31, Z2, Z3 |
||||
VPMULUDQ Z2, Z3, Z2 |
||||
VPSHUFD $0x4e, Z0, Z0 |
||||
VPADDQ Z1, Z0, Z1 |
||||
VPADDQ Z1, Z2, Z1 |
||||
ADDQ $0x00000040, CX |
||||
SUBQ $0x00000040, BX |
||||
CMPQ BX, $0x40 |
||||
JLE finalize |
||||
VMOVDQU64 (CX), Z0 |
||||
VPXORD Z5, Z0, Z2 |
||||
VPSHUFD $0x31, Z2, Z3 |
||||
VPMULUDQ Z2, Z3, Z2 |
||||
VPSHUFD $0x4e, Z0, Z0 |
||||
VPADDQ Z1, Z0, Z1 |
||||
VPADDQ Z1, Z2, Z1 |
||||
ADDQ $0x00000040, CX |
||||
SUBQ $0x00000040, BX |
||||
CMPQ BX, $0x40 |
||||
JLE finalize |
||||
VMOVDQU64 (CX), Z0 |
||||
VPXORD Z6, Z0, Z2 |
||||
VPSHUFD $0x31, Z2, Z3 |
||||
VPMULUDQ Z2, Z3, Z2 |
||||
VPSHUFD $0x4e, Z0, Z0 |
||||
VPADDQ Z1, Z0, Z1 |
||||
VPADDQ Z1, Z2, Z1 |
||||
ADDQ $0x00000040, CX |
||||
SUBQ $0x00000040, BX |
||||
CMPQ BX, $0x40 |
||||
JLE finalize |
||||
VMOVDQU64 (CX), Z0 |
||||
VPXORD Z7, Z0, Z2 |
||||
VPSHUFD $0x31, Z2, Z3 |
||||
VPMULUDQ Z2, Z3, Z2 |
||||
VPSHUFD $0x4e, Z0, Z0 |
||||
VPADDQ Z1, Z0, Z1 |
||||
VPADDQ Z1, Z2, Z1 |
||||
ADDQ $0x00000040, CX |
||||
SUBQ $0x00000040, BX |
||||
CMPQ BX, $0x40 |
||||
JLE finalize |
||||
VMOVDQU64 (CX), Z0 |
||||
VPXORD Z8, Z0, Z2 |
||||
VPSHUFD $0x31, Z2, Z3 |
||||
VPMULUDQ Z2, Z3, Z2 |
||||
VPSHUFD $0x4e, Z0, Z0 |
||||
VPADDQ Z1, Z0, Z1 |
||||
VPADDQ Z1, Z2, Z1 |
||||
ADDQ $0x00000040, CX |
||||
SUBQ $0x00000040, BX |
||||
CMPQ BX, $0x40 |
||||
JLE finalize |
||||
VMOVDQU64 (CX), Z0 |
||||
VPXORD Z9, Z0, Z2 |
||||
VPSHUFD $0x31, Z2, Z3 |
||||
VPMULUDQ Z2, Z3, Z2 |
||||
VPSHUFD $0x4e, Z0, Z0 |
||||
VPADDQ Z1, Z0, Z1 |
||||
VPADDQ Z1, Z2, Z1 |
||||
ADDQ $0x00000040, CX |
||||
SUBQ $0x00000040, BX |
||||
CMPQ BX, $0x40 |
||||
JLE finalize |
||||
VMOVDQU64 (CX), Z0 |
||||
VPXORD Z10, Z0, Z2 |
||||
VPSHUFD $0x31, Z2, Z3 |
||||
VPMULUDQ Z2, Z3, Z2 |
||||
VPSHUFD $0x4e, Z0, Z0 |
||||
VPADDQ Z1, Z0, Z1 |
||||
VPADDQ Z1, Z2, Z1 |
||||
ADDQ $0x00000040, CX |
||||
SUBQ $0x00000040, BX |
||||
CMPQ BX, $0x40 |
||||
JLE finalize |
||||
VMOVDQU64 (CX), Z0 |
||||
VPXORD Z11, Z0, Z2 |
||||
VPSHUFD $0x31, Z2, Z3 |
||||
VPMULUDQ Z2, Z3, Z2 |
||||
VPSHUFD $0x4e, Z0, Z0 |
||||
VPADDQ Z1, Z0, Z1 |
||||
VPADDQ Z1, Z2, Z1 |
||||
ADDQ $0x00000040, CX |
||||
SUBQ $0x00000040, BX |
||||
CMPQ BX, $0x40 |
||||
JLE finalize |
||||
VMOVDQU64 (CX), Z0 |
||||
VPXORD Z12, Z0, Z2 |
||||
VPSHUFD $0x31, Z2, Z3 |
||||
VPMULUDQ Z2, Z3, Z2 |
||||
VPSHUFD $0x4e, Z0, Z0 |
||||
VPADDQ Z1, Z0, Z1 |
||||
VPADDQ Z1, Z2, Z1 |
||||
ADDQ $0x00000040, CX |
||||
SUBQ $0x00000040, BX |
||||
CMPQ BX, $0x40 |
||||
JLE finalize |
||||
VMOVDQU64 (CX), Z0 |
||||
VPXORD Z13, Z0, Z2 |
||||
VPSHUFD $0x31, Z2, Z3 |
||||
VPMULUDQ Z2, Z3, Z2 |
||||
VPSHUFD $0x4e, Z0, Z0 |
||||
VPADDQ Z1, Z0, Z1 |
||||
VPADDQ Z1, Z2, Z1 |
||||
ADDQ $0x00000040, CX |
||||
SUBQ $0x00000040, BX |
||||
CMPQ BX, $0x40 |
||||
JLE finalize |
||||
VMOVDQU64 (CX), Z0 |
||||
VPXORD Z14, Z0, Z2 |
||||
VPSHUFD $0x31, Z2, Z3 |
||||
VPMULUDQ Z2, Z3, Z2 |
||||
VPSHUFD $0x4e, Z0, Z0 |
||||
VPADDQ Z1, Z0, Z1 |
||||
VPADDQ Z1, Z2, Z1 |
||||
ADDQ $0x00000040, CX |
||||
SUBQ $0x00000040, BX |
||||
CMPQ BX, $0x40 |
||||
JLE finalize |
||||
VMOVDQU64 (CX), Z0 |
||||
VPXORD Z15, Z0, Z2 |
||||
VPSHUFD $0x31, Z2, Z3 |
||||
VPMULUDQ Z2, Z3, Z2 |
||||
VPSHUFD $0x4e, Z0, Z0 |
||||
VPADDQ Z1, Z0, Z1 |
||||
VPADDQ Z1, Z2, Z1 |
||||
ADDQ $0x00000040, CX |
||||
SUBQ $0x00000040, BX |
||||
CMPQ BX, $0x40 |
||||
JLE finalize |
||||
VMOVDQU64 (CX), Z0 |
||||
VPXORD Z16, Z0, Z2 |
||||
VPSHUFD $0x31, Z2, Z3 |
||||
VPMULUDQ Z2, Z3, Z2 |
||||
VPSHUFD $0x4e, Z0, Z0 |
||||
VPADDQ Z1, Z0, Z1 |
||||
VPADDQ Z1, Z2, Z1 |
||||
ADDQ $0x00000040, CX |
||||
SUBQ $0x00000040, BX |
||||
CMPQ BX, $0x40 |
||||
JLE finalize |
||||
VMOVDQU64 (CX), Z0 |
||||
VPXORD Z17, Z0, Z2 |
||||
VPSHUFD $0x31, Z2, Z3 |
||||
VPMULUDQ Z2, Z3, Z2 |
||||
VPSHUFD $0x4e, Z0, Z0 |
||||
VPADDQ Z1, Z0, Z1 |
||||
VPADDQ Z1, Z2, Z1 |
||||
ADDQ $0x00000040, CX |
||||
SUBQ $0x00000040, BX |
||||
|
||||
finalize: |
||||
CMPQ BX, $0x00 |
||||
JE return |
||||
SUBQ $0x40, CX |
||||
ADDQ BX, CX |
||||
VMOVDQU64 (CX), Z0 |
||||
VPXORD Z19, Z0, Z2 |
||||
VPSHUFD $0x31, Z2, Z3 |
||||
VPMULUDQ Z2, Z3, Z2 |
||||
VPSHUFD $0x4e, Z0, Z0 |
||||
VPADDQ Z1, Z0, Z1 |
||||
VPADDQ Z1, Z2, Z1 |
||||
|
||||
return: |
||||
VMOVDQU64 Z1, (AX) |
||||
VZEROUPPER |
||||
RET |
||||
@ -1,586 +0,0 @@
|
||||
// Code generated by command: go run gen.go -avx -out ../accum_vector_avx_amd64.s -pkg xxh3. DO NOT EDIT. |
||||
|
||||
#include "textflag.h" |
||||
|
||||
DATA prime_avx<>+0(SB)/8, $0x000000009e3779b1 |
||||
DATA prime_avx<>+8(SB)/8, $0x000000009e3779b1 |
||||
DATA prime_avx<>+16(SB)/8, $0x000000009e3779b1 |
||||
DATA prime_avx<>+24(SB)/8, $0x000000009e3779b1 |
||||
GLOBL prime_avx<>(SB), RODATA|NOPTR, $32 |
||||
|
||||
// func accumAVX2(acc *[8]uint64, data *byte, key *byte, len uint64) |
||||
// Requires: AVX, AVX2, MMX+ |
||||
TEXT ·accumAVX2(SB), NOSPLIT, $0-32 |
||||
MOVQ acc+0(FP), AX |
||||
MOVQ data+8(FP), CX |
||||
MOVQ key+16(FP), DX |
||||
MOVQ key+16(FP), BX |
||||
MOVQ len+24(FP), SI |
||||
VMOVDQU (AX), Y1 |
||||
VMOVDQU 32(AX), Y2 |
||||
VMOVDQU prime_avx<>+0(SB), Y0 |
||||
|
||||
accum_large: |
||||
CMPQ SI, $0x00000400 |
||||
JLE accum |
||||
VMOVDQU (CX), Y3 |
||||
VMOVDQU 32(CX), Y6 |
||||
PREFETCHT0 512(CX) |
||||
VPXOR (DX), Y3, Y4 |
||||
VPXOR 32(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 64(CX), Y3 |
||||
VMOVDQU 96(CX), Y6 |
||||
PREFETCHT0 576(CX) |
||||
VPXOR 8(DX), Y3, Y4 |
||||
VPXOR 40(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 128(CX), Y3 |
||||
VMOVDQU 160(CX), Y6 |
||||
PREFETCHT0 640(CX) |
||||
VPXOR 16(DX), Y3, Y4 |
||||
VPXOR 48(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 192(CX), Y3 |
||||
VMOVDQU 224(CX), Y6 |
||||
PREFETCHT0 704(CX) |
||||
VPXOR 24(DX), Y3, Y4 |
||||
VPXOR 56(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 256(CX), Y3 |
||||
VMOVDQU 288(CX), Y6 |
||||
PREFETCHT0 768(CX) |
||||
VPXOR 32(DX), Y3, Y4 |
||||
VPXOR 64(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 320(CX), Y3 |
||||
VMOVDQU 352(CX), Y6 |
||||
PREFETCHT0 832(CX) |
||||
VPXOR 40(DX), Y3, Y4 |
||||
VPXOR 72(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 384(CX), Y3 |
||||
VMOVDQU 416(CX), Y6 |
||||
PREFETCHT0 896(CX) |
||||
VPXOR 48(DX), Y3, Y4 |
||||
VPXOR 80(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 448(CX), Y3 |
||||
VMOVDQU 480(CX), Y6 |
||||
PREFETCHT0 960(CX) |
||||
VPXOR 56(DX), Y3, Y4 |
||||
VPXOR 88(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 512(CX), Y3 |
||||
VMOVDQU 544(CX), Y6 |
||||
PREFETCHT0 1024(CX) |
||||
VPXOR 64(DX), Y3, Y4 |
||||
VPXOR 96(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 576(CX), Y3 |
||||
VMOVDQU 608(CX), Y6 |
||||
PREFETCHT0 1088(CX) |
||||
VPXOR 72(DX), Y3, Y4 |
||||
VPXOR 104(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 640(CX), Y3 |
||||
VMOVDQU 672(CX), Y6 |
||||
PREFETCHT0 1152(CX) |
||||
VPXOR 80(DX), Y3, Y4 |
||||
VPXOR 112(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 704(CX), Y3 |
||||
VMOVDQU 736(CX), Y6 |
||||
PREFETCHT0 1216(CX) |
||||
VPXOR 88(DX), Y3, Y4 |
||||
VPXOR 120(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 768(CX), Y3 |
||||
VMOVDQU 800(CX), Y6 |
||||
PREFETCHT0 1280(CX) |
||||
VPXOR 96(DX), Y3, Y4 |
||||
VPXOR 128(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 832(CX), Y3 |
||||
VMOVDQU 864(CX), Y6 |
||||
PREFETCHT0 1344(CX) |
||||
VPXOR 104(DX), Y3, Y4 |
||||
VPXOR 136(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 896(CX), Y3 |
||||
VMOVDQU 928(CX), Y6 |
||||
PREFETCHT0 1408(CX) |
||||
VPXOR 112(DX), Y3, Y4 |
||||
VPXOR 144(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 960(CX), Y3 |
||||
VMOVDQU 992(CX), Y6 |
||||
PREFETCHT0 1472(CX) |
||||
VPXOR 120(DX), Y3, Y4 |
||||
VPXOR 152(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
ADDQ $0x00000400, CX |
||||
SUBQ $0x00000400, SI |
||||
VPSRLQ $0x2f, Y1, Y3 |
||||
VPXOR Y1, Y3, Y3 |
||||
VPXOR 128(DX), Y3, Y3 |
||||
VPMULUDQ Y0, Y3, Y1 |
||||
VPSHUFD $0xf5, Y3, Y3 |
||||
VPMULUDQ Y0, Y3, Y3 |
||||
VPSLLQ $0x20, Y3, Y3 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPSRLQ $0x2f, Y2, Y3 |
||||
VPXOR Y2, Y3, Y3 |
||||
VPXOR 160(DX), Y3, Y3 |
||||
VPMULUDQ Y0, Y3, Y2 |
||||
VPSHUFD $0xf5, Y3, Y3 |
||||
VPMULUDQ Y0, Y3, Y3 |
||||
VPSLLQ $0x20, Y3, Y3 |
||||
VPADDQ Y2, Y3, Y2 |
||||
JMP accum_large |
||||
|
||||
accum: |
||||
CMPQ SI, $0x40 |
||||
JLE finalize |
||||
VMOVDQU (CX), Y0 |
||||
VMOVDQU 32(CX), Y5 |
||||
VPXOR (BX), Y0, Y3 |
||||
VPXOR 32(BX), Y5, Y6 |
||||
VPSHUFD $0x31, Y3, Y4 |
||||
VPSHUFD $0x31, Y6, Y7 |
||||
VPMULUDQ Y3, Y4, Y3 |
||||
VPMULUDQ Y6, Y7, Y6 |
||||
VPSHUFD $0x4e, Y0, Y0 |
||||
VPSHUFD $0x4e, Y5, Y5 |
||||
VPADDQ Y1, Y0, Y1 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y2, Y5, Y2 |
||||
VPADDQ Y2, Y6, Y2 |
||||
ADDQ $0x00000040, CX |
||||
SUBQ $0x00000040, SI |
||||
ADDQ $0x00000008, BX |
||||
JMP accum |
||||
|
||||
finalize: |
||||
CMPQ SI, $0x00 |
||||
JE return |
||||
SUBQ $0x40, CX |
||||
ADDQ SI, CX |
||||
VMOVDQU (CX), Y0 |
||||
VMOVDQU 32(CX), Y5 |
||||
VPXOR 121(DX), Y0, Y3 |
||||
VPXOR 153(DX), Y5, Y6 |
||||
VPSHUFD $0x31, Y3, Y4 |
||||
VPSHUFD $0x31, Y6, Y7 |
||||
VPMULUDQ Y3, Y4, Y3 |
||||
VPMULUDQ Y6, Y7, Y6 |
||||
VPSHUFD $0x4e, Y0, Y0 |
||||
VPSHUFD $0x4e, Y5, Y5 |
||||
VPADDQ Y1, Y0, Y1 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y2, Y5, Y2 |
||||
VPADDQ Y2, Y6, Y2 |
||||
|
||||
return: |
||||
VMOVDQU Y1, (AX) |
||||
VMOVDQU Y2, 32(AX) |
||||
VZEROUPPER |
||||
RET |
||||
|
||||
// func accumBlockAVX2(acc *[8]uint64, data *byte, key *byte) |
||||
// Requires: AVX, AVX2 |
||||
TEXT ·accumBlockAVX2(SB), NOSPLIT, $0-24 |
||||
MOVQ acc+0(FP), AX |
||||
MOVQ data+8(FP), CX |
||||
MOVQ key+16(FP), DX |
||||
VMOVDQU (AX), Y1 |
||||
VMOVDQU 32(AX), Y2 |
||||
VMOVDQU prime_avx<>+0(SB), Y0 |
||||
VMOVDQU (CX), Y3 |
||||
VMOVDQU 32(CX), Y6 |
||||
VPXOR (DX), Y3, Y4 |
||||
VPXOR 32(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 64(CX), Y3 |
||||
VMOVDQU 96(CX), Y6 |
||||
VPXOR 8(DX), Y3, Y4 |
||||
VPXOR 40(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 128(CX), Y3 |
||||
VMOVDQU 160(CX), Y6 |
||||
VPXOR 16(DX), Y3, Y4 |
||||
VPXOR 48(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 192(CX), Y3 |
||||
VMOVDQU 224(CX), Y6 |
||||
VPXOR 24(DX), Y3, Y4 |
||||
VPXOR 56(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 256(CX), Y3 |
||||
VMOVDQU 288(CX), Y6 |
||||
VPXOR 32(DX), Y3, Y4 |
||||
VPXOR 64(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 320(CX), Y3 |
||||
VMOVDQU 352(CX), Y6 |
||||
VPXOR 40(DX), Y3, Y4 |
||||
VPXOR 72(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 384(CX), Y3 |
||||
VMOVDQU 416(CX), Y6 |
||||
VPXOR 48(DX), Y3, Y4 |
||||
VPXOR 80(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 448(CX), Y3 |
||||
VMOVDQU 480(CX), Y6 |
||||
VPXOR 56(DX), Y3, Y4 |
||||
VPXOR 88(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 512(CX), Y3 |
||||
VMOVDQU 544(CX), Y6 |
||||
VPXOR 64(DX), Y3, Y4 |
||||
VPXOR 96(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 576(CX), Y3 |
||||
VMOVDQU 608(CX), Y6 |
||||
VPXOR 72(DX), Y3, Y4 |
||||
VPXOR 104(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 640(CX), Y3 |
||||
VMOVDQU 672(CX), Y6 |
||||
VPXOR 80(DX), Y3, Y4 |
||||
VPXOR 112(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 704(CX), Y3 |
||||
VMOVDQU 736(CX), Y6 |
||||
VPXOR 88(DX), Y3, Y4 |
||||
VPXOR 120(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 768(CX), Y3 |
||||
VMOVDQU 800(CX), Y6 |
||||
VPXOR 96(DX), Y3, Y4 |
||||
VPXOR 128(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 832(CX), Y3 |
||||
VMOVDQU 864(CX), Y6 |
||||
VPXOR 104(DX), Y3, Y4 |
||||
VPXOR 136(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 896(CX), Y3 |
||||
VMOVDQU 928(CX), Y6 |
||||
VPXOR 112(DX), Y3, Y4 |
||||
VPXOR 144(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VMOVDQU 960(CX), Y3 |
||||
VMOVDQU 992(CX), Y6 |
||||
VPXOR 120(DX), Y3, Y4 |
||||
VPXOR 152(DX), Y6, Y7 |
||||
VPSHUFD $0x31, Y4, Y5 |
||||
VPSHUFD $0x31, Y7, Y8 |
||||
VPMULUDQ Y4, Y5, Y4 |
||||
VPMULUDQ Y7, Y8, Y7 |
||||
VPSHUFD $0x4e, Y3, Y3 |
||||
VPSHUFD $0x4e, Y6, Y6 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPADDQ Y1, Y4, Y1 |
||||
VPADDQ Y2, Y6, Y2 |
||||
VPADDQ Y2, Y7, Y2 |
||||
VPSRLQ $0x2f, Y1, Y3 |
||||
VPXOR Y1, Y3, Y3 |
||||
VPXOR 128(DX), Y3, Y3 |
||||
VPMULUDQ Y0, Y3, Y1 |
||||
VPSHUFD $0xf5, Y3, Y3 |
||||
VPMULUDQ Y0, Y3, Y3 |
||||
VPSLLQ $0x20, Y3, Y3 |
||||
VPADDQ Y1, Y3, Y1 |
||||
VPSRLQ $0x2f, Y2, Y3 |
||||
VPXOR Y2, Y3, Y3 |
||||
VPXOR 160(DX), Y3, Y3 |
||||
VPMULUDQ Y0, Y3, Y2 |
||||
VPSHUFD $0xf5, Y3, Y3 |
||||
VPMULUDQ Y0, Y3, Y3 |
||||
VPSLLQ $0x20, Y3, Y3 |
||||
VPADDQ Y2, Y3, Y2 |
||||
VMOVDQU Y1, (AX) |
||||
VMOVDQU Y2, 32(AX) |
||||
VZEROUPPER |
||||
RET |
||||
File diff suppressed because it is too large
Load Diff
@ -1,97 +0,0 @@
|
||||
package xxh3 |
||||
|
||||
const ( |
||||
_stripe = 64 |
||||
_block = 1024 |
||||
|
||||
prime32_1 = 2654435761 |
||||
prime32_2 = 2246822519 |
||||
prime32_3 = 3266489917 |
||||
|
||||
prime64_1 = 11400714785074694791 |
||||
prime64_2 = 14029467366897019727 |
||||
prime64_3 = 1609587929392839161 |
||||
prime64_4 = 9650029242287828579 |
||||
prime64_5 = 2870177450012600261 |
||||
) |
||||
|
||||
var key = ptr(&[...]u8{ |
||||
0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe /* 8 */, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, /* 16 */ |
||||
0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb /* 24 */, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, /* 32 */ |
||||
0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78 /* 40 */, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, /* 48 */ |
||||
0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e /* 56 */, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, /* 64 */ |
||||
0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb /* 72 */, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, /* 80 */ |
||||
0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e /* 88 */, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, /* 96 */ |
||||
0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f /* 104 */, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, /* 112 */ |
||||
0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31 /* 120 */, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, /* 128 */ |
||||
0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3 /* 136 */, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, /* 144 */ |
||||
0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49 /* 152 */, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, /* 160 */ |
||||
0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc /* 168 */, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, /* 176 */ |
||||
0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28 /* 184 */, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, /* 192 */ |
||||
}) |
||||
|
||||
const ( |
||||
key64_000 u64 = 0xbe4ba423396cfeb8 |
||||
key64_008 u64 = 0x1cad21f72c81017c |
||||
key64_016 u64 = 0xdb979083e96dd4de |
||||
key64_024 u64 = 0x1f67b3b7a4a44072 |
||||
key64_032 u64 = 0x78e5c0cc4ee679cb |
||||
key64_040 u64 = 0x2172ffcc7dd05a82 |
||||
key64_048 u64 = 0x8e2443f7744608b8 |
||||
key64_056 u64 = 0x4c263a81e69035e0 |
||||
key64_064 u64 = 0xcb00c391bb52283c |
||||
key64_072 u64 = 0xa32e531b8b65d088 |
||||
key64_080 u64 = 0x4ef90da297486471 |
||||
key64_088 u64 = 0xd8acdea946ef1938 |
||||
key64_096 u64 = 0x3f349ce33f76faa8 |
||||
key64_104 u64 = 0x1d4f0bc7c7bbdcf9 |
||||
key64_112 u64 = 0x3159b4cd4be0518a |
||||
key64_120 u64 = 0x647378d9c97e9fc8 |
||||
key64_128 u64 = 0xc3ebd33483acc5ea |
||||
key64_136 u64 = 0xeb6313faffa081c5 |
||||
key64_144 u64 = 0x49daf0b751dd0d17 |
||||
key64_152 u64 = 0x9e68d429265516d3 |
||||
key64_160 u64 = 0xfca1477d58be162b |
||||
key64_168 u64 = 0xce31d07ad1b8f88f |
||||
key64_176 u64 = 0x280416958f3acb45 |
||||
key64_184 u64 = 0x7e404bbbcafbd7af |
||||
|
||||
key64_103 u64 = 0x4f0bc7c7bbdcf93f |
||||
key64_111 u64 = 0x59b4cd4be0518a1d |
||||
key64_119 u64 = 0x7378d9c97e9fc831 |
||||
key64_127 u64 = 0xebd33483acc5ea64 |
||||
|
||||
key64_121 u64 = 0xea647378d9c97e9f |
||||
key64_129 u64 = 0xc5c3ebd33483acc5 |
||||
key64_137 u64 = 0x17eb6313faffa081 |
||||
key64_145 u64 = 0xd349daf0b751dd0d |
||||
key64_153 u64 = 0x2b9e68d429265516 |
||||
key64_161 u64 = 0x8ffca1477d58be16 |
||||
key64_169 u64 = 0x45ce31d07ad1b8f8 |
||||
key64_177 u64 = 0xaf280416958f3acb |
||||
|
||||
key64_011 = 0x6dd4de1cad21f72c |
||||
key64_019 = 0xa44072db979083e9 |
||||
key64_027 = 0xe679cb1f67b3b7a4 |
||||
key64_035 = 0xd05a8278e5c0cc4e |
||||
key64_043 = 0x4608b82172ffcc7d |
||||
key64_051 = 0x9035e08e2443f774 |
||||
key64_059 = 0x52283c4c263a81e6 |
||||
key64_067 = 0x65d088cb00c391bb |
||||
|
||||
key64_117 = 0xd9c97e9fc83159b4 |
||||
key64_125 = 0x3483acc5ea647378 |
||||
key64_133 = 0xfaffa081c5c3ebd3 |
||||
key64_141 = 0xb751dd0d17eb6313 |
||||
key64_149 = 0x29265516d349daf0 |
||||
key64_157 = 0x7d58be162b9e68d4 |
||||
key64_165 = 0x7ad1b8f88ffca147 |
||||
key64_173 = 0x958f3acb45ce31d0 |
||||
) |
||||
|
||||
const ( |
||||
key32_000 u32 = 0xbe4ba423 |
||||
key32_004 u32 = 0x396cfeb8 |
||||
key32_008 u32 = 0x1cad21f7 |
||||
key32_012 u32 = 0x2c81017c |
||||
) |
||||
@ -1,253 +0,0 @@
|
||||
package xxh3 |
||||
|
||||
import ( |
||||
"math/bits" |
||||
) |
||||
|
||||
// Hash128 returns the 128-bit hash of the byte slice.
|
||||
func Hash128(b []byte) Uint128 { |
||||
return hashAny128(*(*str)(ptr(&b))) |
||||
} |
||||
|
||||
// HashString128 returns the 128-bit hash of the string slice.
|
||||
func HashString128(s string) Uint128 { |
||||
return hashAny128(*(*str)(ptr(&s))) |
||||
} |
||||
|
||||
func hashAny128(s str) (acc u128) { |
||||
p, l := s.p, s.l |
||||
|
||||
switch { |
||||
case l <= 16: |
||||
switch { |
||||
case l > 8: // 9-16
|
||||
const bitflipl = key64_032 ^ key64_040 |
||||
const bitfliph = key64_048 ^ key64_056 |
||||
|
||||
input_lo := readU64(p, 0) |
||||
input_hi := readU64(p, ui(l)-8) |
||||
|
||||
m128_h, m128_l := bits.Mul64(input_lo^input_hi^bitflipl, prime64_1) |
||||
|
||||
m128_l += uint64(l-1) << 54 |
||||
input_hi ^= bitfliph |
||||
|
||||
m128_h += input_hi + uint64(uint32(input_hi))*(prime32_2-1) |
||||
|
||||
m128_l ^= bits.ReverseBytes64(m128_h) |
||||
|
||||
acc.Hi, acc.Lo = bits.Mul64(m128_l, prime64_2) |
||||
acc.Hi += m128_h * prime64_2 |
||||
|
||||
acc.Lo = xxh3Avalanche(acc.Lo) |
||||
acc.Hi = xxh3Avalanche(acc.Hi) |
||||
|
||||
return acc |
||||
|
||||
case l > 3: // 4-8
|
||||
const bitflip = key64_016 ^ key64_024 |
||||
|
||||
input_lo := readU32(p, 0) |
||||
input_hi := readU32(p, ui(l)-4) |
||||
input_64 := u64(input_lo) + u64(input_hi)<<32 |
||||
keyed := input_64 ^ bitflip |
||||
|
||||
acc.Hi, acc.Lo = bits.Mul64(keyed, prime64_1+(uint64(l)<<2)) |
||||
|
||||
acc.Hi += acc.Lo << 1 |
||||
acc.Lo ^= acc.Hi >> 3 |
||||
|
||||
acc.Lo ^= acc.Lo >> 35 |
||||
acc.Lo *= 0x9fb21c651e98df25 |
||||
acc.Lo ^= acc.Lo >> 28 |
||||
acc.Hi = xxh3Avalanche(acc.Hi) |
||||
|
||||
return acc |
||||
|
||||
case l == 3: // 3
|
||||
c12 := u64(readU16(p, 0)) |
||||
c3 := u64(readU8(p, 2)) |
||||
acc.Lo = c12<<16 + c3 + 3<<8 |
||||
|
||||
case l > 1: // 2
|
||||
c12 := u64(readU16(p, 0)) |
||||
acc.Lo = c12*(1<<24+1)>>8 + 2<<8 |
||||
|
||||
case l == 1: // 1
|
||||
c1 := u64(readU8(p, 0)) |
||||
acc.Lo = c1*(1<<24+1<<16+1) + 1<<8 |
||||
|
||||
default: // 0
|
||||
return u128{0x99aa06d3014798d8, 0x6001c324468d497f} |
||||
} |
||||
|
||||
acc.Hi = uint64(bits.RotateLeft32(bits.ReverseBytes32(uint32(acc.Lo)), 13)) |
||||
acc.Lo ^= uint64(key32_000 ^ key32_004) |
||||
acc.Hi ^= uint64(key32_008 ^ key32_012) |
||||
|
||||
acc.Lo = xxh64AvalancheSmall(acc.Lo) |
||||
acc.Hi = xxh64AvalancheSmall(acc.Hi) |
||||
|
||||
return acc |
||||
|
||||
case l <= 128: |
||||
acc.Lo = u64(l) * prime64_1 |
||||
|
||||
if l > 32 { |
||||
if l > 64 { |
||||
if l > 96 { |
||||
in8, in7 := readU64(p, ui(l)-8*8), readU64(p, ui(l)-7*8) |
||||
i6, i7 := readU64(p, 6*8), readU64(p, 7*8) |
||||
|
||||
acc.Hi += mulFold64(in8^key64_112, in7^key64_120) |
||||
acc.Hi ^= i6 + i7 |
||||
acc.Lo += mulFold64(i6^key64_096, i7^key64_104) |
||||
acc.Lo ^= in8 + in7 |
||||
|
||||
} // 96
|
||||
|
||||
in6, in5 := readU64(p, ui(l)-6*8), readU64(p, ui(l)-5*8) |
||||
i4, i5 := readU64(p, 4*8), readU64(p, 5*8) |
||||
|
||||
acc.Hi += mulFold64(in6^key64_080, in5^key64_088) |
||||
acc.Hi ^= i4 + i5 |
||||
acc.Lo += mulFold64(i4^key64_064, i5^key64_072) |
||||
acc.Lo ^= in6 + in5 |
||||
|
||||
} // 64
|
||||
|
||||
in4, in3 := readU64(p, ui(l)-4*8), readU64(p, ui(l)-3*8) |
||||
i2, i3 := readU64(p, 2*8), readU64(p, 3*8) |
||||
|
||||
acc.Hi += mulFold64(in4^key64_048, in3^key64_056) |
||||
acc.Hi ^= i2 + i3 |
||||
acc.Lo += mulFold64(i2^key64_032, i3^key64_040) |
||||
acc.Lo ^= in4 + in3 |
||||
|
||||
} // 32
|
||||
|
||||
in2, in1 := readU64(p, ui(l)-2*8), readU64(p, ui(l)-1*8) |
||||
i0, i1 := readU64(p, 0*8), readU64(p, 1*8) |
||||
|
||||
acc.Hi += mulFold64(in2^key64_016, in1^key64_024) |
||||
acc.Hi ^= i0 + i1 |
||||
acc.Lo += mulFold64(i0^key64_000, i1^key64_008) |
||||
acc.Lo ^= in2 + in1 |
||||
|
||||
acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+(u64(l)*prime64_2), acc.Hi+acc.Lo |
||||
|
||||
acc.Hi = -xxh3Avalanche(acc.Hi) |
||||
acc.Lo = xxh3Avalanche(acc.Lo) |
||||
|
||||
return acc |
||||
|
||||
case l <= 240: |
||||
acc.Lo = u64(l) * prime64_1 |
||||
|
||||
{ |
||||
i0, i1, i2, i3 := readU64(p, 0*8), readU64(p, 1*8), readU64(p, 2*8), readU64(p, 3*8) |
||||
|
||||
acc.Hi += mulFold64(i2^key64_016, i3^key64_024) |
||||
acc.Hi ^= i0 + i1 |
||||
acc.Lo += mulFold64(i0^key64_000, i1^key64_008) |
||||
acc.Lo ^= i2 + i3 |
||||
} |
||||
|
||||
{ |
||||
i0, i1, i2, i3 := readU64(p, 4*8), readU64(p, 5*8), readU64(p, 6*8), readU64(p, 7*8) |
||||
|
||||
acc.Hi += mulFold64(i2^key64_048, i3^key64_056) |
||||
acc.Hi ^= i0 + i1 |
||||
acc.Lo += mulFold64(i0^key64_032, i1^key64_040) |
||||
acc.Lo ^= i2 + i3 |
||||
} |
||||
|
||||
{ |
||||
i0, i1, i2, i3 := readU64(p, 8*8), readU64(p, 9*8), readU64(p, 10*8), readU64(p, 11*8) |
||||
|
||||
acc.Hi += mulFold64(i2^key64_080, i3^key64_088) |
||||
acc.Hi ^= i0 + i1 |
||||
acc.Lo += mulFold64(i0^key64_064, i1^key64_072) |
||||
acc.Lo ^= i2 + i3 |
||||
} |
||||
|
||||
{ |
||||
i0, i1, i2, i3 := readU64(p, 12*8), readU64(p, 13*8), readU64(p, 14*8), readU64(p, 15*8) |
||||
|
||||
acc.Hi += mulFold64(i2^key64_112, i3^key64_120) |
||||
acc.Hi ^= i0 + i1 |
||||
acc.Lo += mulFold64(i0^key64_096, i1^key64_104) |
||||
acc.Lo ^= i2 + i3 |
||||
} |
||||
|
||||
// avalanche
|
||||
acc.Hi = xxh3Avalanche(acc.Hi) |
||||
acc.Lo = xxh3Avalanche(acc.Lo) |
||||
|
||||
// trailing groups after 128
|
||||
top := ui(l) &^ 31 |
||||
for i := ui(4 * 32); i < top; i += 32 { |
||||
i0, i1, i2, i3 := readU64(p, i+0), readU64(p, i+8), readU64(p, i+16), readU64(p, i+24) |
||||
k0, k1, k2, k3 := readU64(key, i-125), readU64(key, i-117), readU64(key, i-109), readU64(key, i-101) |
||||
|
||||
acc.Hi += mulFold64(i2^k2, i3^k3) |
||||
acc.Hi ^= i0 + i1 |
||||
acc.Lo += mulFold64(i0^k0, i1^k1) |
||||
acc.Lo ^= i2 + i3 |
||||
} |
||||
|
||||
// last 32 bytes
|
||||
{ |
||||
i0, i1, i2, i3 := readU64(p, ui(l)-32), readU64(p, ui(l)-24), readU64(p, ui(l)-16), readU64(p, ui(l)-8) |
||||
|
||||
acc.Hi += mulFold64(i0^key64_119, i1^key64_127) |
||||
acc.Hi ^= i2 + i3 |
||||
acc.Lo += mulFold64(i2^key64_103, i3^key64_111) |
||||
acc.Lo ^= i0 + i1 |
||||
} |
||||
|
||||
acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+(u64(l)*prime64_2), acc.Hi+acc.Lo |
||||
|
||||
acc.Hi = -xxh3Avalanche(acc.Hi) |
||||
acc.Lo = xxh3Avalanche(acc.Lo) |
||||
|
||||
return acc |
||||
|
||||
default: |
||||
acc.Lo = u64(l) * prime64_1 |
||||
acc.Hi = ^(u64(l) * prime64_2) |
||||
|
||||
accs := [8]u64{ |
||||
prime32_3, prime64_1, prime64_2, prime64_3, |
||||
prime64_4, prime32_2, prime64_5, prime32_1, |
||||
} |
||||
|
||||
if hasAVX512 && l >= avx512Switch { |
||||
accumAVX512(&accs, p, key, u64(l)) |
||||
} else if hasAVX2 { |
||||
accumAVX2(&accs, p, key, u64(l)) |
||||
} else if hasSSE2 { |
||||
accumSSE(&accs, p, key, u64(l)) |
||||
} else { |
||||
accumScalar(&accs, p, key, u64(l)) |
||||
} |
||||
|
||||
// merge accs
|
||||
acc.Lo += mulFold64(accs[0]^key64_011, accs[1]^key64_019) |
||||
acc.Hi += mulFold64(accs[0]^key64_117, accs[1]^key64_125) |
||||
|
||||
acc.Lo += mulFold64(accs[2]^key64_027, accs[3]^key64_035) |
||||
acc.Hi += mulFold64(accs[2]^key64_133, accs[3]^key64_141) |
||||
|
||||
acc.Lo += mulFold64(accs[4]^key64_043, accs[5]^key64_051) |
||||
acc.Hi += mulFold64(accs[4]^key64_149, accs[5]^key64_157) |
||||
|
||||
acc.Lo += mulFold64(accs[6]^key64_059, accs[7]^key64_067) |
||||
acc.Hi += mulFold64(accs[6]^key64_165, accs[7]^key64_173) |
||||
|
||||
acc.Lo = xxh3Avalanche(acc.Lo) |
||||
acc.Hi = xxh3Avalanche(acc.Hi) |
||||
|
||||
return acc |
||||
} |
||||
} |
||||
@ -1,264 +0,0 @@
|
||||
package xxh3 |
||||
|
||||
import ( |
||||
"math/bits" |
||||
) |
||||
|
||||
// Hash128Seed returns the 128-bit hash of the byte slice.
|
||||
func Hash128Seed(b []byte, seed uint64) Uint128 { |
||||
return hashAny128Seed(*(*str)(ptr(&b)), seed) |
||||
} |
||||
|
||||
// HashString128Seed returns the 128-bit hash of the string slice.
|
||||
func HashString128Seed(s string, seed uint64) Uint128 { |
||||
return hashAny128Seed(*(*str)(ptr(&s)), seed) |
||||
} |
||||
|
||||
func hashAny128Seed(s str, seed uint64) (acc u128) { |
||||
p, l := s.p, s.l |
||||
|
||||
switch { |
||||
case l <= 16: |
||||
switch { |
||||
case l > 8: // 9-16
|
||||
bitflipl := (key64_032 ^ key64_040) - seed |
||||
bitfliph := (key64_048 ^ key64_056) + seed |
||||
|
||||
input_lo := readU64(p, 0) |
||||
input_hi := readU64(p, ui(l)-8) |
||||
|
||||
m128_h, m128_l := bits.Mul64(input_lo^input_hi^bitflipl, prime64_1) |
||||
|
||||
m128_l += uint64(l-1) << 54 |
||||
input_hi ^= bitfliph |
||||
|
||||
m128_h += input_hi + uint64(uint32(input_hi))*(prime32_2-1) |
||||
|
||||
m128_l ^= bits.ReverseBytes64(m128_h) |
||||
|
||||
acc.Hi, acc.Lo = bits.Mul64(m128_l, prime64_2) |
||||
acc.Hi += m128_h * prime64_2 |
||||
|
||||
acc.Lo = xxh3Avalanche(acc.Lo) |
||||
acc.Hi = xxh3Avalanche(acc.Hi) |
||||
|
||||
return acc |
||||
|
||||
case l > 3: // 4-8
|
||||
seed ^= u64(bits.ReverseBytes32(u32(seed))) << 32 |
||||
bitflip := (key64_016 ^ key64_024) + seed |
||||
input_lo := readU32(p, 0) |
||||
input_hi := readU32(p, ui(l)-4) |
||||
input_64 := u64(input_lo) + u64(input_hi)<<32 |
||||
keyed := input_64 ^ bitflip |
||||
|
||||
acc.Hi, acc.Lo = bits.Mul64(keyed, prime64_1+(uint64(l)<<2)) |
||||
|
||||
acc.Hi += acc.Lo << 1 |
||||
acc.Lo ^= acc.Hi >> 3 |
||||
|
||||
acc.Lo ^= acc.Lo >> 35 |
||||
acc.Lo *= 0x9fb21c651e98df25 |
||||
acc.Lo ^= acc.Lo >> 28 |
||||
acc.Hi = xxh3Avalanche(acc.Hi) |
||||
|
||||
return acc |
||||
|
||||
case l == 3: // 3
|
||||
c12 := u64(readU16(p, 0)) |
||||
c3 := u64(readU8(p, 2)) |
||||
acc.Lo = c12<<16 + c3 + 3<<8 |
||||
|
||||
case l > 1: // 2
|
||||
c12 := u64(readU16(p, 0)) |
||||
acc.Lo = c12*(1<<24+1)>>8 + 2<<8 |
||||
|
||||
case l == 1: // 1
|
||||
c1 := u64(readU8(p, 0)) |
||||
acc.Lo = c1*(1<<24+1<<16+1) + 1<<8 |
||||
|
||||
default: // 0
|
||||
bitflipl := key64_064 ^ key64_072 ^ seed |
||||
bitfliph := key64_080 ^ key64_088 ^ seed |
||||
return u128{Lo: xxh64AvalancheFull(bitflipl), Hi: xxh64AvalancheFull(bitfliph)} |
||||
} |
||||
|
||||
acc.Hi = uint64(bits.RotateLeft32(bits.ReverseBytes32(uint32(acc.Lo)), 13)) |
||||
acc.Lo ^= uint64(key32_000^key32_004) + seed |
||||
acc.Hi ^= uint64(key32_008^key32_012) - seed |
||||
|
||||
acc.Lo = xxh64AvalancheFull(acc.Lo) |
||||
acc.Hi = xxh64AvalancheFull(acc.Hi) |
||||
|
||||
return acc |
||||
|
||||
case l <= 128: |
||||
acc.Lo = u64(l) * prime64_1 |
||||
|
||||
if l > 32 { |
||||
if l > 64 { |
||||
if l > 96 { |
||||
in8, in7 := readU64(p, ui(l)-8*8), readU64(p, ui(l)-7*8) |
||||
i6, i7 := readU64(p, 6*8), readU64(p, 7*8) |
||||
|
||||
acc.Hi += mulFold64(in8^(key64_112+seed), in7^(key64_120-seed)) |
||||
acc.Hi ^= i6 + i7 |
||||
acc.Lo += mulFold64(i6^(key64_096+seed), i7^(key64_104-seed)) |
||||
acc.Lo ^= in8 + in7 |
||||
|
||||
} // 96
|
||||
|
||||
in6, in5 := readU64(p, ui(l)-6*8), readU64(p, ui(l)-5*8) |
||||
i4, i5 := readU64(p, 4*8), readU64(p, 5*8) |
||||
|
||||
acc.Hi += mulFold64(in6^(key64_080+seed), in5^(key64_088-seed)) |
||||
acc.Hi ^= i4 + i5 |
||||
acc.Lo += mulFold64(i4^(key64_064+seed), i5^(key64_072-seed)) |
||||
acc.Lo ^= in6 + in5 |
||||
|
||||
} // 64
|
||||
|
||||
in4, in3 := readU64(p, ui(l)-4*8), readU64(p, ui(l)-3*8) |
||||
i2, i3 := readU64(p, 2*8), readU64(p, 3*8) |
||||
|
||||
acc.Hi += mulFold64(in4^(key64_048+seed), in3^(key64_056-seed)) |
||||
acc.Hi ^= i2 + i3 |
||||
acc.Lo += mulFold64(i2^(key64_032+seed), i3^(key64_040-seed)) |
||||
acc.Lo ^= in4 + in3 |
||||
|
||||
} // 32
|
||||
|
||||
in2, in1 := readU64(p, ui(l)-2*8), readU64(p, ui(l)-1*8) |
||||
i0, i1 := readU64(p, 0*8), readU64(p, 1*8) |
||||
|
||||
acc.Hi += mulFold64(in2^(key64_016+seed), in1^(key64_024-seed)) |
||||
acc.Hi ^= i0 + i1 |
||||
acc.Lo += mulFold64(i0^(key64_000+seed), i1^(key64_008-seed)) |
||||
acc.Lo ^= in2 + in1 |
||||
|
||||
acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+((u64(l)-seed)*prime64_2), acc.Hi+acc.Lo |
||||
|
||||
acc.Hi = -xxh3Avalanche(acc.Hi) |
||||
acc.Lo = xxh3Avalanche(acc.Lo) |
||||
|
||||
return acc |
||||
|
||||
case l <= 240: |
||||
acc.Lo = u64(l) * prime64_1 |
||||
|
||||
{ |
||||
i0, i1, i2, i3 := readU64(p, 0*8), readU64(p, 1*8), readU64(p, 2*8), readU64(p, 3*8) |
||||
|
||||
acc.Hi += mulFold64(i2^(key64_016+seed), i3^(key64_024-seed)) |
||||
acc.Hi ^= i0 + i1 |
||||
acc.Lo += mulFold64(i0^(key64_000+seed), i1^(key64_008-seed)) |
||||
acc.Lo ^= i2 + i3 |
||||
} |
||||
|
||||
{ |
||||
i0, i1, i2, i3 := readU64(p, 4*8), readU64(p, 5*8), readU64(p, 6*8), readU64(p, 7*8) |
||||
|
||||
acc.Hi += mulFold64(i2^(key64_048+seed), i3^(key64_056-seed)) |
||||
acc.Hi ^= i0 + i1 |
||||
acc.Lo += mulFold64(i0^(key64_032+seed), i1^(key64_040-seed)) |
||||
acc.Lo ^= i2 + i3 |
||||
} |
||||
|
||||
{ |
||||
i0, i1, i2, i3 := readU64(p, 8*8), readU64(p, 9*8), readU64(p, 10*8), readU64(p, 11*8) |
||||
|
||||
acc.Hi += mulFold64(i2^(key64_080+seed), i3^(key64_088-seed)) |
||||
acc.Hi ^= i0 + i1 |
||||
acc.Lo += mulFold64(i0^(key64_064+seed), i1^(key64_072-seed)) |
||||
acc.Lo ^= i2 + i3 |
||||
} |
||||
|
||||
{ |
||||
i0, i1, i2, i3 := readU64(p, 12*8), readU64(p, 13*8), readU64(p, 14*8), readU64(p, 15*8) |
||||
|
||||
acc.Hi += mulFold64(i2^(key64_112+seed), i3^(key64_120-seed)) |
||||
acc.Hi ^= i0 + i1 |
||||
acc.Lo += mulFold64(i0^(key64_096+seed), i1^(key64_104-seed)) |
||||
acc.Lo ^= i2 + i3 |
||||
} |
||||
|
||||
// avalanche
|
||||
acc.Hi = xxh3Avalanche(acc.Hi) |
||||
acc.Lo = xxh3Avalanche(acc.Lo) |
||||
|
||||
// trailing groups after 128
|
||||
top := ui(l) &^ 31 |
||||
for i := ui(4 * 32); i < top; i += 32 { |
||||
i0, i1, i2, i3 := readU64(p, i+0), readU64(p, i+8), readU64(p, i+16), readU64(p, i+24) |
||||
k0, k1, k2, k3 := readU64(key, i-125)+seed, readU64(key, i-117)-seed, readU64(key, i-109)+seed, readU64(key, i-101)-seed |
||||
|
||||
acc.Hi += mulFold64(i2^k2, i3^k3) |
||||
acc.Hi ^= i0 + i1 |
||||
acc.Lo += mulFold64(i0^k0, i1^k1) |
||||
acc.Lo ^= i2 + i3 |
||||
} |
||||
|
||||
// last 32 bytes
|
||||
{ |
||||
i0, i1, i2, i3 := readU64(p, ui(l)-32), readU64(p, ui(l)-24), readU64(p, ui(l)-16), readU64(p, ui(l)-8) |
||||
|
||||
seed := 0 - seed |
||||
acc.Hi += mulFold64(i0^(key64_119+seed), i1^(key64_127-seed)) |
||||
acc.Hi ^= i2 + i3 |
||||
acc.Lo += mulFold64(i2^(key64_103+seed), i3^(key64_111-seed)) |
||||
acc.Lo ^= i0 + i1 |
||||
} |
||||
|
||||
acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+((u64(l)-seed)*prime64_2), acc.Hi+acc.Lo |
||||
|
||||
acc.Hi = -xxh3Avalanche(acc.Hi) |
||||
acc.Lo = xxh3Avalanche(acc.Lo) |
||||
|
||||
return acc |
||||
|
||||
default: |
||||
acc.Lo = u64(l) * prime64_1 |
||||
acc.Hi = ^(u64(l) * prime64_2) |
||||
|
||||
secret := key |
||||
if seed != 0 { |
||||
secret = ptr(&[secretSize]byte{}) |
||||
initSecret(secret, seed) |
||||
} |
||||
|
||||
accs := [8]u64{ |
||||
prime32_3, prime64_1, prime64_2, prime64_3, |
||||
prime64_4, prime32_2, prime64_5, prime32_1, |
||||
} |
||||
|
||||
if hasAVX512 && l >= avx512Switch { |
||||
accumAVX512(&accs, p, secret, u64(l)) |
||||
} else if hasAVX2 { |
||||
accumAVX2(&accs, p, secret, u64(l)) |
||||
} else if hasSSE2 { |
||||
accumSSE(&accs, p, secret, u64(l)) |
||||
} else { |
||||
accumScalar(&accs, p, secret, u64(l)) |
||||
} |
||||
|
||||
// merge accs
|
||||
const hi_off = 117 - 11 |
||||
|
||||
acc.Lo += mulFold64(accs[0]^readU64(secret, 11), accs[1]^readU64(secret, 19)) |
||||
acc.Hi += mulFold64(accs[0]^readU64(secret, 11+hi_off), accs[1]^readU64(secret, 19+hi_off)) |
||||
|
||||
acc.Lo += mulFold64(accs[2]^readU64(secret, 27), accs[3]^readU64(secret, 35)) |
||||
acc.Hi += mulFold64(accs[2]^readU64(secret, 27+hi_off), accs[3]^readU64(secret, 35+hi_off)) |
||||
|
||||
acc.Lo += mulFold64(accs[4]^readU64(secret, 43), accs[5]^readU64(secret, 51)) |
||||
acc.Hi += mulFold64(accs[4]^readU64(secret, 43+hi_off), accs[5]^readU64(secret, 51+hi_off)) |
||||
|
||||
acc.Lo += mulFold64(accs[6]^readU64(secret, 59), accs[7]^readU64(secret, 67)) |
||||
acc.Hi += mulFold64(accs[6]^readU64(secret, 59+hi_off), accs[7]^readU64(secret, 67+hi_off)) |
||||
|
||||
acc.Lo = xxh3Avalanche(acc.Lo) |
||||
acc.Hi = xxh3Avalanche(acc.Hi) |
||||
|
||||
return acc |
||||
} |
||||
} |
||||
@ -1,126 +0,0 @@
|
||||
package xxh3 |
||||
|
||||
import "math/bits" |
||||
|
||||
// Hash returns the hash of the byte slice.
|
||||
func Hash(b []byte) uint64 { |
||||
return hashAny(*(*str)(ptr(&b))) |
||||
} |
||||
|
||||
// Hash returns the hash of the string slice.
|
||||
func HashString(s string) uint64 { |
||||
return hashAny(*(*str)(ptr(&s))) |
||||
} |
||||
|
||||
func hashAny(s str) (acc u64) { |
||||
p, l := s.p, s.l |
||||
|
||||
switch { |
||||
case l <= 16: |
||||
switch { |
||||
case l > 8: // 9-16
|
||||
inputlo := readU64(p, 0) ^ (key64_024 ^ key64_032) |
||||
inputhi := readU64(p, ui(l)-8) ^ (key64_040 ^ key64_048) |
||||
folded := mulFold64(inputlo, inputhi) |
||||
return xxh3Avalanche(u64(l) + bits.ReverseBytes64(inputlo) + inputhi + folded) |
||||
|
||||
case l > 3: // 4-8
|
||||
input1 := readU32(p, 0) |
||||
input2 := readU32(p, ui(l)-4) |
||||
input64 := u64(input2) + u64(input1)<<32 |
||||
keyed := input64 ^ (key64_008 ^ key64_016) |
||||
return rrmxmx(keyed, u64(l)) |
||||
|
||||
case l == 3: // 3
|
||||
c12 := u64(readU16(p, 0)) |
||||
c3 := u64(readU8(p, 2)) |
||||
acc = c12<<16 + c3 + 3<<8 |
||||
|
||||
case l > 1: // 2
|
||||
c12 := u64(readU16(p, 0)) |
||||
acc = c12*(1<<24+1)>>8 + 2<<8 |
||||
|
||||
case l == 1: // 1
|
||||
c1 := u64(readU8(p, 0)) |
||||
acc = c1*(1<<24+1<<16+1) + 1<<8 |
||||
|
||||
default: // 0
|
||||
return 0x2d06800538d394c2 // xxh_avalanche(key64_056 ^ key64_064)
|
||||
} |
||||
|
||||
acc ^= u64(key32_000 ^ key32_004) |
||||
return xxhAvalancheSmall(acc) |
||||
|
||||
case l <= 128: |
||||
acc = u64(l) * prime64_1 |
||||
|
||||
if l > 32 { |
||||
if l > 64 { |
||||
if l > 96 { |
||||
acc += mulFold64(readU64(p, 6*8)^key64_096, readU64(p, 7*8)^key64_104) |
||||
acc += mulFold64(readU64(p, ui(l)-8*8)^key64_112, readU64(p, ui(l)-7*8)^key64_120) |
||||
} // 96
|
||||
acc += mulFold64(readU64(p, 4*8)^key64_064, readU64(p, 5*8)^key64_072) |
||||
acc += mulFold64(readU64(p, ui(l)-6*8)^key64_080, readU64(p, ui(l)-5*8)^key64_088) |
||||
} // 64
|
||||
acc += mulFold64(readU64(p, 2*8)^key64_032, readU64(p, 3*8)^key64_040) |
||||
acc += mulFold64(readU64(p, ui(l)-4*8)^key64_048, readU64(p, ui(l)-3*8)^key64_056) |
||||
} // 32
|
||||
acc += mulFold64(readU64(p, 0*8)^key64_000, readU64(p, 1*8)^key64_008) |
||||
acc += mulFold64(readU64(p, ui(l)-2*8)^key64_016, readU64(p, ui(l)-1*8)^key64_024) |
||||
|
||||
return xxh3Avalanche(acc) |
||||
|
||||
case l <= 240: |
||||
acc = u64(l) * prime64_1 |
||||
|
||||
acc += mulFold64(readU64(p, 0*16+0)^key64_000, readU64(p, 0*16+8)^key64_008) |
||||
acc += mulFold64(readU64(p, 1*16+0)^key64_016, readU64(p, 1*16+8)^key64_024) |
||||
acc += mulFold64(readU64(p, 2*16+0)^key64_032, readU64(p, 2*16+8)^key64_040) |
||||
acc += mulFold64(readU64(p, 3*16+0)^key64_048, readU64(p, 3*16+8)^key64_056) |
||||
acc += mulFold64(readU64(p, 4*16+0)^key64_064, readU64(p, 4*16+8)^key64_072) |
||||
acc += mulFold64(readU64(p, 5*16+0)^key64_080, readU64(p, 5*16+8)^key64_088) |
||||
acc += mulFold64(readU64(p, 6*16+0)^key64_096, readU64(p, 6*16+8)^key64_104) |
||||
acc += mulFold64(readU64(p, 7*16+0)^key64_112, readU64(p, 7*16+8)^key64_120) |
||||
|
||||
// avalanche
|
||||
acc = xxh3Avalanche(acc) |
||||
|
||||
// trailing groups after 128
|
||||
top := ui(l) &^ 15 |
||||
for i := ui(8 * 16); i < top; i += 16 { |
||||
acc += mulFold64(readU64(p, i+0)^readU64(key, i-125), readU64(p, i+8)^readU64(key, i-117)) |
||||
} |
||||
|
||||
// last 16 bytes
|
||||
acc += mulFold64(readU64(p, ui(l)-16)^key64_119, readU64(p, ui(l)-8)^key64_127) |
||||
|
||||
return xxh3Avalanche(acc) |
||||
|
||||
default: |
||||
acc = u64(l) * prime64_1 |
||||
|
||||
accs := [8]u64{ |
||||
prime32_3, prime64_1, prime64_2, prime64_3, |
||||
prime64_4, prime32_2, prime64_5, prime32_1, |
||||
} |
||||
|
||||
if hasAVX512 && l >= avx512Switch { |
||||
accumAVX512(&accs, p, key, u64(l)) |
||||
} else if hasAVX2 { |
||||
accumAVX2(&accs, p, key, u64(l)) |
||||
} else if hasSSE2 { |
||||
accumSSE(&accs, p, key, u64(l)) |
||||
} else { |
||||
accumScalar(&accs, p, key, u64(l)) |
||||
} |
||||
|
||||
// merge accs
|
||||
acc += mulFold64(accs[0]^key64_011, accs[1]^key64_019) |
||||
acc += mulFold64(accs[2]^key64_027, accs[3]^key64_035) |
||||
acc += mulFold64(accs[4]^key64_043, accs[5]^key64_051) |
||||
acc += mulFold64(accs[6]^key64_059, accs[7]^key64_067) |
||||
|
||||
return xxh3Avalanche(acc) |
||||
} |
||||
} |
||||
@ -1,134 +0,0 @@
|
||||
package xxh3 |
||||
|
||||
import "math/bits" |
||||
|
||||
// HashSeed returns the hash of the byte slice with given seed.
|
||||
func HashSeed(b []byte, seed uint64) uint64 { |
||||
return hashAnySeed(*(*str)(ptr(&b)), seed) |
||||
|
||||
} |
||||
|
||||
// HashStringSeed returns the hash of the string slice with given seed.
|
||||
func HashStringSeed(s string, seed uint64) uint64 { |
||||
return hashAnySeed(*(*str)(ptr(&s)), seed) |
||||
} |
||||
|
||||
func hashAnySeed(s str, seed uint64) (acc u64) { |
||||
p, l := s.p, s.l |
||||
|
||||
switch { |
||||
case l <= 16: |
||||
switch { |
||||
case l > 8: |
||||
inputlo := readU64(p, 0) ^ (key64_024 ^ key64_032 + seed) |
||||
inputhi := readU64(p, ui(l)-8) ^ (key64_040 ^ key64_048 - seed) |
||||
folded := mulFold64(inputlo, inputhi) |
||||
return xxh3Avalanche(u64(l) + bits.ReverseBytes64(inputlo) + inputhi + folded) |
||||
|
||||
case l > 3: |
||||
seed ^= u64(bits.ReverseBytes32(u32(seed))) << 32 |
||||
input1 := readU32(p, 0) |
||||
input2 := readU32(p, ui(l)-4) |
||||
input64 := u64(input2) + u64(input1)<<32 |
||||
keyed := input64 ^ (key64_008 ^ key64_016 - seed) |
||||
return rrmxmx(keyed, u64(l)) |
||||
|
||||
case l == 3: // 3
|
||||
c12 := u64(readU16(p, 0)) |
||||
c3 := u64(readU8(p, 2)) |
||||
acc = c12<<16 + c3 + 3<<8 |
||||
|
||||
case l > 1: // 2
|
||||
c12 := u64(readU16(p, 0)) |
||||
acc = c12*(1<<24+1)>>8 + 2<<8 |
||||
|
||||
case l == 1: // 1
|
||||
c1 := u64(readU8(p, 0)) |
||||
acc = c1*(1<<24+1<<16+1) + 1<<8 |
||||
|
||||
default: |
||||
return xxhAvalancheSmall(seed ^ key64_056 ^ key64_064) |
||||
} |
||||
|
||||
acc ^= u64(key32_000^key32_004) + seed |
||||
return xxhAvalancheSmall(acc) |
||||
|
||||
case l <= 128: |
||||
acc = u64(l) * prime64_1 |
||||
|
||||
if l > 32 { |
||||
if l > 64 { |
||||
if l > 96 { |
||||
acc += mulFold64(readU64(p, 6*8)^(key64_096+seed), readU64(p, 7*8)^(key64_104-seed)) |
||||
acc += mulFold64(readU64(p, ui(l)-8*8)^(key64_112+seed), readU64(p, ui(l)-7*8)^(key64_120-seed)) |
||||
} // 96
|
||||
acc += mulFold64(readU64(p, 4*8)^(key64_064+seed), readU64(p, 5*8)^(key64_072-seed)) |
||||
acc += mulFold64(readU64(p, ui(l)-6*8)^(key64_080+seed), readU64(p, ui(l)-5*8)^(key64_088-seed)) |
||||
} // 64
|
||||
acc += mulFold64(readU64(p, 2*8)^(key64_032+seed), readU64(p, 3*8)^(key64_040-seed)) |
||||
acc += mulFold64(readU64(p, ui(l)-4*8)^(key64_048+seed), readU64(p, ui(l)-3*8)^(key64_056-seed)) |
||||
} // 32
|
||||
acc += mulFold64(readU64(p, 0*8)^(key64_000+seed), readU64(p, 1*8)^(key64_008-seed)) |
||||
acc += mulFold64(readU64(p, ui(l)-2*8)^(key64_016+seed), readU64(p, ui(l)-1*8)^(key64_024-seed)) |
||||
|
||||
return xxh3Avalanche(acc) |
||||
|
||||
case l <= 240: |
||||
acc = u64(l) * prime64_1 |
||||
|
||||
acc += mulFold64(readU64(p, 0*16+0)^(key64_000+seed), readU64(p, 0*16+8)^(key64_008-seed)) |
||||
acc += mulFold64(readU64(p, 1*16+0)^(key64_016+seed), readU64(p, 1*16+8)^(key64_024-seed)) |
||||
acc += mulFold64(readU64(p, 2*16+0)^(key64_032+seed), readU64(p, 2*16+8)^(key64_040-seed)) |
||||
acc += mulFold64(readU64(p, 3*16+0)^(key64_048+seed), readU64(p, 3*16+8)^(key64_056-seed)) |
||||
acc += mulFold64(readU64(p, 4*16+0)^(key64_064+seed), readU64(p, 4*16+8)^(key64_072-seed)) |
||||
acc += mulFold64(readU64(p, 5*16+0)^(key64_080+seed), readU64(p, 5*16+8)^(key64_088-seed)) |
||||
acc += mulFold64(readU64(p, 6*16+0)^(key64_096+seed), readU64(p, 6*16+8)^(key64_104-seed)) |
||||
acc += mulFold64(readU64(p, 7*16+0)^(key64_112+seed), readU64(p, 7*16+8)^(key64_120-seed)) |
||||
|
||||
// avalanche
|
||||
acc = xxh3Avalanche(acc) |
||||
|
||||
// trailing groups after 128
|
||||
top := ui(l) &^ 15 |
||||
for i := ui(8 * 16); i < top; i += 16 { |
||||
acc += mulFold64(readU64(p, i+0)^(readU64(key, i-125)+seed), readU64(p, i+8)^(readU64(key, i-117)-seed)) |
||||
} |
||||
|
||||
// last 16 bytes
|
||||
acc += mulFold64(readU64(p, ui(l)-16)^(key64_119+seed), readU64(p, ui(l)-8)^(key64_127-seed)) |
||||
|
||||
return xxh3Avalanche(acc) |
||||
|
||||
default: |
||||
acc = u64(l) * prime64_1 |
||||
|
||||
secret := key |
||||
if seed != 0 { |
||||
secret = ptr(&[secretSize]byte{}) |
||||
initSecret(secret, seed) |
||||
} |
||||
|
||||
accs := [8]u64{ |
||||
prime32_3, prime64_1, prime64_2, prime64_3, |
||||
prime64_4, prime32_2, prime64_5, prime32_1, |
||||
} |
||||
|
||||
if hasAVX512 && l >= avx512Switch { |
||||
accumAVX512(&accs, p, secret, u64(l)) |
||||
} else if hasAVX2 { |
||||
accumAVX2(&accs, p, secret, u64(l)) |
||||
} else if hasSSE2 { |
||||
accumSSE(&accs, p, secret, u64(l)) |
||||
} else { |
||||
accumScalarSeed(&accs, p, secret, u64(l)) |
||||
} |
||||
|
||||
// merge accs
|
||||
acc += mulFold64(accs[0]^readU64(secret, 11), accs[1]^readU64(secret, 19)) |
||||
acc += mulFold64(accs[2]^readU64(secret, 27), accs[3]^readU64(secret, 35)) |
||||
acc += mulFold64(accs[4]^readU64(secret, 43), accs[5]^readU64(secret, 51)) |
||||
acc += mulFold64(accs[6]^readU64(secret, 59), accs[7]^readU64(secret, 67)) |
||||
|
||||
return xxh3Avalanche(acc) |
||||
} |
||||
} |
||||
@ -1,239 +0,0 @@
|
||||
package xxh3 |
||||
|
||||
import ( |
||||
"encoding/binary" |
||||
"hash" |
||||
) |
||||
|
||||
// Hasher implements the hash.Hash interface
|
||||
type Hasher struct { |
||||
acc [8]u64 |
||||
blk u64 |
||||
len u64 |
||||
key ptr |
||||
buf [_block + _stripe]byte |
||||
seed u64 |
||||
} |
||||
|
||||
var ( |
||||
_ hash.Hash = (*Hasher)(nil) |
||||
_ hash.Hash64 = (*Hasher)(nil) |
||||
) |
||||
|
||||
// New returns a new Hasher that implements the hash.Hash interface.
|
||||
func New() *Hasher { |
||||
return new(Hasher) |
||||
} |
||||
|
||||
// NewSeed returns a new Hasher that implements the hash.Hash interface.
|
||||
func NewSeed(seed uint64) *Hasher { |
||||
var h Hasher |
||||
h.Reset() |
||||
h.seed = seed |
||||
h.key = key |
||||
|
||||
// Only initiate once, not on reset.
|
||||
if seed != 0 { |
||||
h.key = ptr(&[secretSize]byte{}) |
||||
initSecret(h.key, seed) |
||||
} |
||||
return &h |
||||
} |
||||
|
||||
// Reset resets the Hash to its initial state.
|
||||
func (h *Hasher) Reset() { |
||||
h.acc = [8]u64{ |
||||
prime32_3, prime64_1, prime64_2, prime64_3, |
||||
prime64_4, prime32_2, prime64_5, prime32_1, |
||||
} |
||||
h.blk = 0 |
||||
h.len = 0 |
||||
} |
||||
|
||||
// BlockSize returns the hash's underlying block size.
|
||||
// The Write method will accept any amount of data, but
|
||||
// it may operate more efficiently if all writes are a
|
||||
// multiple of the block size.
|
||||
func (h *Hasher) BlockSize() int { return _stripe } |
||||
|
||||
// Size returns the number of bytes Sum will return.
|
||||
func (h *Hasher) Size() int { return 8 } |
||||
|
||||
// Sum appends the current hash to b and returns the resulting slice.
|
||||
// It does not change the underlying hash state.
|
||||
func (h *Hasher) Sum(b []byte) []byte { |
||||
var tmp [8]byte |
||||
binary.BigEndian.PutUint64(tmp[:], h.Sum64()) |
||||
return append(b, tmp[:]...) |
||||
} |
||||
|
||||
// Write adds more data to the running hash.
|
||||
// It never returns an error.
|
||||
func (h *Hasher) Write(buf []byte) (int, error) { |
||||
h.update(buf) |
||||
return len(buf), nil |
||||
} |
||||
|
||||
// WriteString adds more data to the running hash.
|
||||
// It never returns an error.
|
||||
func (h *Hasher) WriteString(buf string) (int, error) { |
||||
h.updateString(buf) |
||||
return len(buf), nil |
||||
} |
||||
|
||||
func (h *Hasher) update(buf []byte) { |
||||
// relies on the data pointer being the first word in the string header
|
||||
h.updateString(*(*string)(ptr(&buf))) |
||||
} |
||||
|
||||
func (h *Hasher) updateString(buf string) { |
||||
if h.key == nil { |
||||
h.key = key |
||||
h.Reset() |
||||
} |
||||
|
||||
// On first write, if more than 1 block, process without copy.
|
||||
for h.len == 0 && len(buf) > len(h.buf) { |
||||
if hasAVX2 { |
||||
accumBlockAVX2(&h.acc, *(*ptr)(ptr(&buf)), h.key) |
||||
} else if hasSSE2 { |
||||
accumBlockSSE(&h.acc, *(*ptr)(ptr(&buf)), h.key) |
||||
} else { |
||||
accumBlockScalar(&h.acc, *(*ptr)(ptr(&buf)), h.key) |
||||
} |
||||
buf = buf[_block:] |
||||
h.blk++ |
||||
} |
||||
|
||||
for len(buf) > 0 { |
||||
if h.len < u64(len(h.buf)) { |
||||
n := copy(h.buf[h.len:], buf) |
||||
h.len += u64(n) |
||||
buf = buf[n:] |
||||
continue |
||||
} |
||||
|
||||
if hasAVX2 { |
||||
accumBlockAVX2(&h.acc, ptr(&h.buf), h.key) |
||||
} else if hasSSE2 { |
||||
accumBlockSSE(&h.acc, ptr(&h.buf), h.key) |
||||
} else { |
||||
accumBlockScalar(&h.acc, ptr(&h.buf), h.key) |
||||
} |
||||
|
||||
h.blk++ |
||||
h.len = _stripe |
||||
copy(h.buf[:_stripe], h.buf[_block:]) |
||||
} |
||||
} |
||||
|
||||
// Sum64 returns the 64-bit hash of the written data.
|
||||
func (h *Hasher) Sum64() uint64 { |
||||
if h.key == nil { |
||||
h.key = key |
||||
h.Reset() |
||||
} |
||||
|
||||
if h.blk == 0 { |
||||
if h.seed == 0 { |
||||
return Hash(h.buf[:h.len]) |
||||
} |
||||
return HashSeed(h.buf[:h.len], h.seed) |
||||
} |
||||
|
||||
l := h.blk*_block + h.len |
||||
acc := l * prime64_1 |
||||
accs := h.acc |
||||
|
||||
if h.len > 0 { |
||||
// We are only ever doing 1 block here, so no avx512.
|
||||
if hasAVX2 { |
||||
accumAVX2(&accs, ptr(&h.buf[0]), h.key, h.len) |
||||
} else if hasSSE2 { |
||||
accumSSE(&accs, ptr(&h.buf[0]), h.key, h.len) |
||||
} else { |
||||
accumScalar(&accs, ptr(&h.buf[0]), h.key, h.len) |
||||
} |
||||
} |
||||
|
||||
if h.seed == 0 { |
||||
acc += mulFold64(accs[0]^key64_011, accs[1]^key64_019) |
||||
acc += mulFold64(accs[2]^key64_027, accs[3]^key64_035) |
||||
acc += mulFold64(accs[4]^key64_043, accs[5]^key64_051) |
||||
acc += mulFold64(accs[6]^key64_059, accs[7]^key64_067) |
||||
} else { |
||||
secret := h.key |
||||
acc += mulFold64(accs[0]^readU64(secret, 11), accs[1]^readU64(secret, 19)) |
||||
acc += mulFold64(accs[2]^readU64(secret, 27), accs[3]^readU64(secret, 35)) |
||||
acc += mulFold64(accs[4]^readU64(secret, 43), accs[5]^readU64(secret, 51)) |
||||
acc += mulFold64(accs[6]^readU64(secret, 59), accs[7]^readU64(secret, 67)) |
||||
} |
||||
|
||||
acc = xxh3Avalanche(acc) |
||||
|
||||
return acc |
||||
} |
||||
|
||||
// Sum128 returns the 128-bit hash of the written data.
|
||||
func (h *Hasher) Sum128() Uint128 { |
||||
if h.key == nil { |
||||
h.key = key |
||||
h.Reset() |
||||
} |
||||
|
||||
if h.blk == 0 { |
||||
if h.seed == 0 { |
||||
return Hash128(h.buf[:h.len]) |
||||
} |
||||
return Hash128Seed(h.buf[:h.len], h.seed) |
||||
} |
||||
|
||||
l := h.blk*_block + h.len |
||||
acc := Uint128{Lo: l * prime64_1, Hi: ^(l * prime64_2)} |
||||
accs := h.acc |
||||
|
||||
if h.len > 0 { |
||||
// We are only ever doing 1 block here, so no avx512.
|
||||
if hasAVX2 { |
||||
accumAVX2(&accs, ptr(&h.buf[0]), h.key, h.len) |
||||
} else if hasSSE2 { |
||||
accumSSE(&accs, ptr(&h.buf[0]), h.key, h.len) |
||||
} else { |
||||
accumScalar(&accs, ptr(&h.buf[0]), h.key, h.len) |
||||
} |
||||
} |
||||
|
||||
if h.seed == 0 { |
||||
acc.Lo += mulFold64(accs[0]^key64_011, accs[1]^key64_019) |
||||
acc.Hi += mulFold64(accs[0]^key64_117, accs[1]^key64_125) |
||||
|
||||
acc.Lo += mulFold64(accs[2]^key64_027, accs[3]^key64_035) |
||||
acc.Hi += mulFold64(accs[2]^key64_133, accs[3]^key64_141) |
||||
|
||||
acc.Lo += mulFold64(accs[4]^key64_043, accs[5]^key64_051) |
||||
acc.Hi += mulFold64(accs[4]^key64_149, accs[5]^key64_157) |
||||
|
||||
acc.Lo += mulFold64(accs[6]^key64_059, accs[7]^key64_067) |
||||
acc.Hi += mulFold64(accs[6]^key64_165, accs[7]^key64_173) |
||||
} else { |
||||
secret := h.key |
||||
const hi_off = 117 - 11 |
||||
|
||||
acc.Lo += mulFold64(accs[0]^readU64(secret, 11), accs[1]^readU64(secret, 19)) |
||||
acc.Hi += mulFold64(accs[0]^readU64(secret, 11+hi_off), accs[1]^readU64(secret, 19+hi_off)) |
||||
|
||||
acc.Lo += mulFold64(accs[2]^readU64(secret, 27), accs[3]^readU64(secret, 35)) |
||||
acc.Hi += mulFold64(accs[2]^readU64(secret, 27+hi_off), accs[3]^readU64(secret, 35+hi_off)) |
||||
|
||||
acc.Lo += mulFold64(accs[4]^readU64(secret, 43), accs[5]^readU64(secret, 51)) |
||||
acc.Hi += mulFold64(accs[4]^readU64(secret, 43+hi_off), accs[5]^readU64(secret, 51+hi_off)) |
||||
|
||||
acc.Lo += mulFold64(accs[6]^readU64(secret, 59), accs[7]^readU64(secret, 67)) |
||||
acc.Hi += mulFold64(accs[6]^readU64(secret, 59+hi_off), accs[7]^readU64(secret, 67+hi_off)) |
||||
} |
||||
|
||||
acc.Lo = xxh3Avalanche(acc.Lo) |
||||
acc.Hi = xxh3Avalanche(acc.Hi) |
||||
|
||||
return acc |
||||
} |
||||
@ -1,129 +0,0 @@
|
||||
package xxh3 |
||||
|
||||
import ( |
||||
"math/bits" |
||||
"unsafe" |
||||
) |
||||
|
||||
// Uint128 is a 128 bit value.
|
||||
// The actual value can be thought of as u.Hi<<64 | u.Lo.
|
||||
type Uint128 struct { |
||||
Hi, Lo uint64 |
||||
} |
||||
|
||||
// Bytes returns the uint128 as an array of bytes in canonical form (big-endian encoded).
|
||||
func (u Uint128) Bytes() [16]byte { |
||||
return [16]byte{ |
||||
byte(u.Hi >> 0x38), byte(u.Hi >> 0x30), byte(u.Hi >> 0x28), byte(u.Hi >> 0x20), |
||||
byte(u.Hi >> 0x18), byte(u.Hi >> 0x10), byte(u.Hi >> 0x08), byte(u.Hi), |
||||
byte(u.Lo >> 0x38), byte(u.Lo >> 0x30), byte(u.Lo >> 0x28), byte(u.Lo >> 0x20), |
||||
byte(u.Lo >> 0x18), byte(u.Lo >> 0x10), byte(u.Lo >> 0x08), byte(u.Lo), |
||||
} |
||||
} |
||||
|
||||
type ( |
||||
ptr = unsafe.Pointer |
||||
ui = uintptr |
||||
|
||||
u8 = uint8 |
||||
u32 = uint32 |
||||
u64 = uint64 |
||||
u128 = Uint128 |
||||
) |
||||
|
||||
type str struct { |
||||
p ptr |
||||
l uint |
||||
} |
||||
|
||||
func readU8(p ptr, o ui) uint8 { |
||||
return *(*uint8)(ptr(ui(p) + o)) |
||||
} |
||||
|
||||
func readU16(p ptr, o ui) uint16 { |
||||
b := (*[2]byte)(ptr(ui(p) + o)) |
||||
return uint16(b[0]) | uint16(b[1])<<8 |
||||
} |
||||
|
||||
func readU32(p ptr, o ui) uint32 { |
||||
b := (*[4]byte)(ptr(ui(p) + o)) |
||||
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 |
||||
} |
||||
|
||||
func readU64(p ptr, o ui) uint64 { |
||||
b := (*[8]byte)(ptr(ui(p) + o)) |
||||
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | |
||||
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 |
||||
} |
||||
|
||||
func writeU64(p ptr, o ui, v u64) { |
||||
b := (*[8]byte)(ptr(ui(p) + o)) |
||||
b[0] = byte(v) |
||||
b[1] = byte(v >> 8) |
||||
b[2] = byte(v >> 16) |
||||
b[3] = byte(v >> 24) |
||||
b[4] = byte(v >> 32) |
||||
b[5] = byte(v >> 40) |
||||
b[6] = byte(v >> 48) |
||||
b[7] = byte(v >> 56) |
||||
} |
||||
|
||||
const secretSize = 192 |
||||
|
||||
func initSecret(secret ptr, seed u64) { |
||||
for i := ui(0); i < secretSize/16; i++ { |
||||
lo := readU64(key, 16*i) + seed |
||||
hi := readU64(key, 16*i+8) - seed |
||||
writeU64(secret, 16*i, lo) |
||||
writeU64(secret, 16*i+8, hi) |
||||
} |
||||
} |
||||
|
||||
func xxh64AvalancheSmall(x u64) u64 { |
||||
// x ^= x >> 33 // x must be < 32 bits
|
||||
// x ^= u64(key32_000 ^ key32_004) // caller must do this
|
||||
x *= prime64_2 |
||||
x ^= x >> 29 |
||||
x *= prime64_3 |
||||
x ^= x >> 32 |
||||
return x |
||||
} |
||||
|
||||
func xxhAvalancheSmall(x u64) u64 { |
||||
x ^= x >> 33 |
||||
x *= prime64_2 |
||||
x ^= x >> 29 |
||||
x *= prime64_3 |
||||
x ^= x >> 32 |
||||
return x |
||||
} |
||||
|
||||
func xxh64AvalancheFull(x u64) u64 { |
||||
x ^= x >> 33 |
||||
x *= prime64_2 |
||||
x ^= x >> 29 |
||||
x *= prime64_3 |
||||
x ^= x >> 32 |
||||
return x |
||||
} |
||||
|
||||
func xxh3Avalanche(x u64) u64 { |
||||
x ^= x >> 37 |
||||
x *= 0x165667919e3779f9 |
||||
x ^= x >> 32 |
||||
return x |
||||
} |
||||
|
||||
func rrmxmx(h64 u64, len u64) u64 { |
||||
h64 ^= bits.RotateLeft64(h64, 49) ^ bits.RotateLeft64(h64, 24) |
||||
h64 *= 0x9fb21c651e98df25 |
||||
h64 ^= (h64 >> 35) + len |
||||
h64 *= 0x9fb21c651e98df25 |
||||
h64 ^= (h64 >> 28) |
||||
return h64 |
||||
} |
||||
|
||||
func mulFold64(x, y u64) u64 { |
||||
hi, lo := bits.Mul64(x, y) |
||||
return hi ^ lo |
||||
} |
||||
Loading…
Reference in new issue