Groups | Search | Server Info | Keyboard shortcuts | Login | Register [http] [https] [nntp] [nntps]
Groups > comp.lang.c > #385561
| From | Michael S <already5chosen@yahoo.com> |
|---|---|
| Newsgroups | comp.lang.c |
| Subject | Re: Good hash for pointers |
| Date | 2024-06-05 00:59 +0300 |
| Organization | A noiseless patient Spider |
| Message-ID | <20240605005916.00001b33@yahoo.com> (permalink) |
| References | (6 earlier) <8634q5hjsp.fsf@linuxsc.com> <v2vmhr$3ffjk$1@raubtier-asyl.eternal-september.org> <86le3wfsmd.fsf@linuxsc.com> <v2voe7$3fr50$1@raubtier-asyl.eternal-september.org> <86ed9ofq14.fsf@linuxsc.com> |
On Sun, 26 May 2024 10:20:55 -0700
Tim Rentsch <tr.17687@z991.linuxsc.com> wrote:
> Bonita Montero <Bonita.Montero@gmail.com> writes:
>
> > Am 26.05.2024 um 18:24 schrieb Tim Rentsch:
> >
> >> Bonita Montero <Bonita.Montero@gmail.com> writes:
> >>
> >>> Am 25.05.2024 um 19:40 schrieb Tim Rentsch:
> >>>
> >>>> Bonita Montero <Bonita.Montero@gmail.com> writes:
> >>>>
> >>>>> Am 25.05.2024 um 11:12 schrieb Tim Rentsch:
> >>>>>
> >>>>>> Your hash function is expensive to compute, moreso even
> >>>>>> than the "FNV" function shown earlier. In a case like
> >>>>>> this one where the compares are cheap, it's better to
> >>>>>> have a dumb-but-fast hash function that might need a
> >>>>>> few more looks to find an open slot, because the cost
> >>>>>> of looking is so cheap compared to computing the hash
> >>>>>> function.
> >>>>>
> >>>>> A (size_t)pointer * LARGE_PRIME can be sufficient,
> >>>>> ignoring the overflow.
> >>>>
> >>>> Plenty fast but the output quality is poor. ...
> >>>
> >>> If the prime is large enough there'e no regular distribution.
> >>
> >> Your conjecture is contradicted by empirical facts.
> >
> > There are no empirival facts for that since two times the
> > taken prime is beyond the 64 bit address space.
>
> You don't listen very well do you?
>
> I say the output quality is poor because I have run tests that
> show the poor output quality. I've done that with a prime of my
> own choosing and also with 18446744073709551557, the value you
> suggested. In both cases the test runs show results that are
> clearly worse than all the other hash functions tested, including
> bart's and malcolm's. Furthermore the results for your suggested
> calculation are worse across the board, on every variety of
> dynamic workload in my test suite.
>
> Your proposed hash function is too weak to be taken as a serious
> candidate.
18446744073709551557 is indeed very bad (too close to 2**64).
But I tried other prime and at least in my simple tests it performs
rather well.
May be my tests are to simplistic, I don't pretend to understand much
about it.
// hash_aes4.c
// Reference. Hopefully behaves similarly to crypto hash
#include <stdint.h>
#include <string.h>
#include <x86intrin.h>
uint64_t hash_ref(void* key)
{
uint64_t ukey = (uint64_t)key;
__m128i xkey = _mm_set_epi64x(ukey, 42);
static const uint64_t bar[8] = {
0xBB09BBCC90B24BF2, 0x825C622FF2792A01,
0x94F0535CB06D4060, 0x939C756246DBFD1D,
0x5B835E01A7E14CA1, 0xAC2BDAFC023CDD06,
0xE0B6A4735B774AEC, 0x9CAFB43E7DDE494C,
};
for (int i = 0; i < 4; ++i) {
__m128i rkey;
memcpy(&rkey, &bar[i*2], sizeof(rkey));
xkey = _mm_aesenc_si128(xkey, rkey);
}
memcpy(&ukey, &xkey, sizeof(ukey));
return ukey;
}
// hash_bonita.c
// Bonita's with different prime
#include <stdint.h>
uint64_t hash_foo(void* key)
{
uint64_t ukey = (uint64_t)key;
static const uint64_t LARGE_PRIME = 0xbb09bbcc90b24bcdull;
return ukey * LARGE_PRIME;
}
// ptr_hash.c
// test bench
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
uint64_t hash_foo(void*);
uint64_t hash_ref(void*);
static void test(
void** pointers, size_t n_pointers,
size_t* table, size_t tab_sz,
uint64_t (*hash_function)(void*));
static const char UsageStr[] =
"ptr_hash - examine performance of pointer hash function\n"
"Usage\n"
"ptr_hash n m sz1 [sz2]\n"
"where\n"
" n - the size of hash table\n"
" m - # of pointers to put in the table\n"
" sz1 - minimal size of allocated blocks\n"
" sz2 - [optional] maximal size of allocated blocks. Default = sz1\n"
;
int main(int argz, char** argv)
{
if (argz < 4) {
fprintf(stderr, "%s", UsageStr);
return 1;
}
size_t params[4];
for (int arg_i = 1; arg_i < 5 && arg_i < argz; ++arg_i) {
long val = strtol(argv[arg_i], NULL, 0);
if (val <= 0) {
fprintf(stderr
,"Bad parameter %s. Please specify positive number\n"
,argv[arg_i]);
return 1;
}
params[arg_i-1] = val;
}
size_t n = params[0];
size_t m = params[1];
size_t sz1 = params[2];
size_t sz2 = params[3];
if (argz == 4)
sz2 = sz1;
if (sz1 > sz2) {
size_t tmp = sz1; sz1 = sz2; sz2 = tmp;
}
int ret = 1;
void **pointers = malloc(m * sizeof(*pointers));
if (pointers) {
size_t* table = malloc(n * sizeof(*table));
if (table) {
ret = 0;
srand(1);
const unsigned __int128 RAND_SCALE = (unsigned __int128)RAND_MAX
+ 1;
for (size_t i = 0; i < m; ++i) {
size_t r = rand();
size_t sz = (unsigned __int128)(sz2-sz1+1)*r / RAND_SCALE + sz1;
void* block = malloc(sz);
if (!block) {
ret = 1;
perror("malloc()");
for (size_t k = 0; k < i; ++k)
free(pointers[k]);
break;
}
pointers[i] = block;
}
if (ret == 0) {
for (size_t i = 0; i < m; ++i)
free(pointers[i]); // we don't need allocated blocks for a
test
printf("ref: ");
test(pointers, m, table, n, hash_ref);
printf("uut: ");
test(pointers, m, table, n, hash_foo);
}
free(table);
} else {
perror("malloc()");
}
free(pointers);
} else {
perror("malloc()");
}
return ret;
}
static void test(
void** pointers, size_t n_pointers,
size_t* table, size_t tab_sz,
uint64_t (*hash_function)(void*))
{
for (size_t i = 0; i < tab_sz; ++i)
table[i] = 0;
// simulate hash operation
for (size_t i = 0; i < n_pointers; ++i) {
uint64_t h = hash_function(pointers[i]);
table[(size_t)(((unsigned __int128)tab_sz * h)>>64)] += 1;
}
// statistics
size_t occupied = 0;
size_t collisions = 0;
size_t worst = 0;
for (size_t i = 0; i < tab_sz; ++i) {
size_t c = table[i];
occupied += c != 0;
collisions += c > 1;
if (worst < c)
worst = c;
}
printf(
"%zu keys, %zu slots, %zu occupied %zu collisions. Worst collision:
%zu\n"
,n_pointers
, tab_sz
, occupied
, collisions
, worst
);
}
Compiled as:
gcc -s -Wall -O2 -march=native ptr_hash.c hash_bonita.c hash_aes4.c
Run as:
./a 100000 75000 80 1000
Result:
ref: 75000 keys, 100000 slots, 52799 occupied 17431 collisions. Worst
collision: 7
uut: 75000 keys, 100000 slots, 53201 occupied 17139 collisions. Worst
collision: 6
Back to comp.lang.c | Previous | Next — Previous in thread | Next in thread | Find similar
Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-05-25 10:40 -0700
Re: Good hash for pointers Malcolm McLean <malcolm.arthur.mclean@gmail.com> - 2024-05-25 18:56 +0100
Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-05-25 11:23 -0700
Re: Good hash for pointers Janis Papanagnou <janis_papanagnou+ng@hotmail.com> - 2024-05-25 23:13 +0200
Re: Good hash for pointers Malcolm McLean <malcolm.arthur.mclean@gmail.com> - 2024-05-25 23:07 +0100
Re: Good hash for pointers bart <bc@freeuk.com> - 2024-05-25 23:42 +0100
Re: Good hash for pointers Richard Harnden <richard.nospam@gmail.invalid> - 2024-05-26 19:58 +0100
Re: Good hash for pointers Kaz Kylheku <643-408-1753@kylheku.com> - 2024-05-26 22:42 +0000
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-05-26 18:05 +0200
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-05-26 18:07 +0200
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-05-26 18:04 +0200
Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-05-26 09:24 -0700
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-05-26 18:36 +0200
Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-05-26 10:20 -0700
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-05-26 19:39 +0200
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-05-26 19:54 +0200
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-05-27 08:07 +0200
Re: Good hash for pointers Ben Bacarisse <ben@bsb.me.uk> - 2024-05-28 11:07 +0100
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-05-30 10:10 +0200
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-05-30 11:27 +0200
Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-05-30 19:26 -0700
Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-05-30 19:27 -0700
Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-02 10:45 +0300
Re: Good hash for pointers "Chris M. Thomasson" <chris.m.thomasson.1@gmail.com> - 2024-06-02 12:42 -0700
Re: Good hash for pointers "Chris M. Thomasson" <chris.m.thomasson.1@gmail.com> - 2024-06-03 12:35 -0700
Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-02 16:02 -0700
Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-03 10:50 +0300
Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-03 18:02 -0700
Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-04 11:38 +0300
Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-04 22:10 -0700
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-03 16:34 +0200
Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-03 17:46 +0300
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-03 17:54 +0200
Re: Good hash for pointers bart <bc@freeuk.com> - 2024-06-03 17:24 +0100
Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-03 20:16 +0300
Re: Good hash for pointers bart <bc@freeuk.com> - 2024-06-03 19:48 +0100
Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-03 22:41 +0300
Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-03 22:51 +0300
Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-03 16:51 -0700
Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-03 17:01 -0700
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-03 20:25 +0200
Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-03 19:50 +0300
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-03 20:31 +0200
Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-05 00:59 +0300
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-05 11:10 +0200
Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-05 12:34 +0300
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-05 12:05 +0200
Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-05 13:11 +0300
Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-05 08:58 -0700
AES problem (was: Good hash for pointers) Michael S <already5chosen@yahoo.com> - 2024-06-05 19:51 +0300
Re: AES problem (was: Good hash for pointers) Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-05 21:24 -0700
Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-05 19:59 +0300
Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-05 21:40 -0700
Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-06 11:00 +0300
Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-06 13:35 +0300
Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-16 04:38 -0700
Re: Good hash for pointers "Chris M. Thomasson" <chris.m.thomasson.1@gmail.com> - 2024-06-16 12:34 -0700
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-07 20:53 +0200
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-09 13:35 +0200
Re: Good hash for pointers Richard Harnden <richard.nospam@gmail.invalid> - 2024-06-09 12:40 +0100
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-09 15:09 +0200
Re: Good hash for pointers Malcolm McLean <malcolm.arthur.mclean@gmail.com> - 2024-06-10 01:34 +0100
Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-09 18:31 -0700
Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-10 15:14 +0300
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-10 14:40 +0200
Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-15 20:32 -0700
Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-10 09:35 +0200
csiph-web