Groups | Search | Server Info | Keyboard shortcuts | Login | Register [http] [https] [nntp] [nntps]


Groups > comp.lang.c > #385561

Re: Good hash for pointers

From Michael S <already5chosen@yahoo.com>
Newsgroups comp.lang.c
Subject Re: Good hash for pointers
Date 2024-06-05 00:59 +0300
Organization A noiseless patient Spider
Message-ID <20240605005916.00001b33@yahoo.com> (permalink)
References (6 earlier) <8634q5hjsp.fsf@linuxsc.com> <v2vmhr$3ffjk$1@raubtier-asyl.eternal-september.org> <86le3wfsmd.fsf@linuxsc.com> <v2voe7$3fr50$1@raubtier-asyl.eternal-september.org> <86ed9ofq14.fsf@linuxsc.com>

Show all headers | View raw


On Sun, 26 May 2024 10:20:55 -0700
Tim Rentsch <tr.17687@z991.linuxsc.com> wrote:

> Bonita Montero <Bonita.Montero@gmail.com> writes:
> 
> > Am 26.05.2024 um 18:24 schrieb Tim Rentsch:
> >  
> >> Bonita Montero <Bonita.Montero@gmail.com> writes:
> >>  
> >>> Am 25.05.2024 um 19:40 schrieb Tim Rentsch:
> >>>  
> >>>> Bonita Montero <Bonita.Montero@gmail.com> writes:
> >>>>  
> >>>>> Am 25.05.2024 um 11:12 schrieb Tim Rentsch:
> >>>>>  
> >>>>>> Your hash function is expensive to compute, moreso even
> >>>>>> than the "FNV" function shown earlier.  In a case like
> >>>>>> this one where the compares are cheap, it's better to
> >>>>>> have a dumb-but-fast hash function that might need a
> >>>>>> few more looks to find an open slot, because the cost
> >>>>>> of looking is so cheap compared to computing the hash
> >>>>>> function.  
> >>>>>
> >>>>> A (size_t)pointer * LARGE_PRIME can be sufficient,
> >>>>> ignoring the overflow.  
> >>>>
> >>>> Plenty fast but the output quality is poor. ...  
> >>>
> >>> If the prime is large enough there'e no regular distribution.  
> >>
> >> Your conjecture is contradicted by empirical facts.  
> >
> > There are no empirival facts for that since two times the
> > taken prime is beyond the 64 bit address space.  
> 
> You don't listen very well do you?
> 
> I say the output quality is poor because I have run tests that
> show the poor output quality.  I've done that with a prime of my
> own choosing and also with 18446744073709551557, the value you
> suggested.  In both cases the test runs show results that are
> clearly worse than all the other hash functions tested, including
> bart's and malcolm's.  Furthermore the results for your suggested
> calculation are worse across the board, on every variety of
> dynamic workload in my test suite.
> 
> Your proposed hash function is too weak to be taken as a serious
> candidate.

18446744073709551557 is indeed very bad (too close to 2**64).
But I tried other prime and at least in my simple tests it performs
rather well.
May be my tests are to simplistic, I don't pretend to understand much
about it.

// hash_aes4.c
// Reference. Hopefully behaves similarly to crypto hash
#include <stdint.h>
#include <string.h>
#include <x86intrin.h>

uint64_t hash_ref(void* key)
{
  uint64_t ukey = (uint64_t)key;
  __m128i xkey = _mm_set_epi64x(ukey, 42);
  static const uint64_t bar[8] = {
    0xBB09BBCC90B24BF2, 0x825C622FF2792A01,
    0x94F0535CB06D4060, 0x939C756246DBFD1D,
    0x5B835E01A7E14CA1, 0xAC2BDAFC023CDD06,
    0xE0B6A4735B774AEC, 0x9CAFB43E7DDE494C,
  };
  for (int i = 0; i < 4; ++i) {
    __m128i rkey;
    memcpy(&rkey, &bar[i*2], sizeof(rkey));
    xkey = _mm_aesenc_si128(xkey, rkey);
  }
  memcpy(&ukey, &xkey, sizeof(ukey));
  return ukey;
}

// hash_bonita.c 
// Bonita's with different prime
#include <stdint.h>

uint64_t hash_foo(void* key)
{
  uint64_t ukey = (uint64_t)key;
  static const uint64_t LARGE_PRIME = 0xbb09bbcc90b24bcdull;
  return ukey * LARGE_PRIME;
}


// ptr_hash.c 
// test bench
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>

uint64_t hash_foo(void*);
uint64_t hash_ref(void*);

static void test(
 void** pointers, size_t n_pointers,
 size_t* table,   size_t tab_sz,
 uint64_t (*hash_function)(void*));

static const char UsageStr[] =
"ptr_hash - examine performance of pointer hash function\n"
"Usage\n"
"ptr_hash n m sz1 [sz2]\n"
"where\n"
" n   - the size of hash table\n"
" m   - # of pointers to put in the table\n"
" sz1 - minimal size of allocated blocks\n"
" sz2 - [optional] maximal size of allocated blocks. Default = sz1\n"
;
int main(int argz, char** argv)
{
  if (argz < 4) {
    fprintf(stderr, "%s", UsageStr);
    return 1;
  }

  size_t params[4];
  for (int arg_i = 1; arg_i < 5 && arg_i < argz; ++arg_i) {
    long val = strtol(argv[arg_i], NULL, 0);
    if (val <= 0) {
      fprintf(stderr
        ,"Bad parameter %s. Please specify positive number\n"
        ,argv[arg_i]);
      return 1;
    }
    params[arg_i-1] = val;
  }
  size_t n = params[0];
  size_t m = params[1];
  size_t sz1 = params[2];
  size_t sz2 = params[3];
  if (argz == 4)
    sz2 = sz1;
  if (sz1 > sz2) {
    size_t tmp = sz1; sz1 = sz2; sz2 = tmp;
  }

  int ret = 1;
  void **pointers = malloc(m * sizeof(*pointers));
  if (pointers) {
    size_t* table = malloc(n * sizeof(*table));
    if (table) {
      ret = 0;
      srand(1);
      const unsigned __int128 RAND_SCALE = (unsigned __int128)RAND_MAX
  + 1;
      for (size_t i = 0; i < m; ++i) {
        size_t r = rand();
        size_t sz = (unsigned __int128)(sz2-sz1+1)*r / RAND_SCALE + sz1;
        void* block = malloc(sz);
        if (!block) {
          ret = 1;
          perror("malloc()");
          for (size_t k = 0; k < i; ++k)
            free(pointers[k]);
          break;
        }
        pointers[i] = block;
      }
      if (ret == 0) {
        for (size_t i = 0; i < m; ++i)
          free(pointers[i]); // we don't need allocated blocks for a
  test
        printf("ref: ");
        test(pointers, m, table, n, hash_ref);
        printf("uut: ");
        test(pointers, m, table, n, hash_foo);
      }
      free(table);
    } else {
      perror("malloc()");
    }
    free(pointers);
  } else {
    perror("malloc()");
  }

  return ret;
}

static void test(
 void** pointers, size_t n_pointers,
 size_t* table,   size_t tab_sz,
 uint64_t (*hash_function)(void*))
{
  for (size_t i = 0; i < tab_sz; ++i)
    table[i] = 0;
  // simulate hash operation
  for (size_t i = 0; i < n_pointers; ++i) {
    uint64_t h = hash_function(pointers[i]);
    table[(size_t)(((unsigned __int128)tab_sz * h)>>64)] += 1;
  }
  // statistics
  size_t occupied = 0;
  size_t collisions = 0;
  size_t worst = 0;
  for (size_t i = 0; i < tab_sz; ++i) {
    size_t c = table[i];
    occupied += c != 0;
    collisions += c > 1;
    if (worst < c)
      worst = c;
  }
  printf(
  "%zu keys, %zu slots, %zu occupied %zu collisions. Worst collision:
%zu\n"
  ,n_pointers
  , tab_sz
  , occupied
  , collisions
  , worst
  );
}


Compiled as:
gcc -s -Wall -O2 -march=native ptr_hash.c hash_bonita.c hash_aes4.c


Run as:
./a 100000 75000 80 1000

Result:
ref: 75000 keys, 100000 slots, 52799 occupied 17431 collisions. Worst
collision: 7 
uut: 75000 keys, 100000 slots, 53201 occupied 17139 collisions. Worst
collision: 6





Back to comp.lang.c | Previous | NextPrevious in thread | Next in thread | Find similar


Thread

Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-05-25 10:40 -0700
  Re: Good hash for pointers Malcolm McLean <malcolm.arthur.mclean@gmail.com> - 2024-05-25 18:56 +0100
    Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-05-25 11:23 -0700
    Re: Good hash for pointers Janis Papanagnou <janis_papanagnou+ng@hotmail.com> - 2024-05-25 23:13 +0200
      Re: Good hash for pointers Malcolm McLean <malcolm.arthur.mclean@gmail.com> - 2024-05-25 23:07 +0100
        Re: Good hash for pointers bart <bc@freeuk.com> - 2024-05-25 23:42 +0100
          Re: Good hash for pointers Richard Harnden <richard.nospam@gmail.invalid> - 2024-05-26 19:58 +0100
          Re: Good hash for pointers Kaz Kylheku <643-408-1753@kylheku.com> - 2024-05-26 22:42 +0000
    Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-05-26 18:05 +0200
      Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-05-26 18:07 +0200
  Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-05-26 18:04 +0200
    Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-05-26 09:24 -0700
      Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-05-26 18:36 +0200
        Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-05-26 10:20 -0700
          Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-05-26 19:39 +0200
            Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-05-26 19:54 +0200
            Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-05-27 08:07 +0200
              Re: Good hash for pointers Ben Bacarisse <ben@bsb.me.uk> - 2024-05-28 11:07 +0100
                Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-05-30 10:10 +0200
                Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-05-30 11:27 +0200
                Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-05-30 19:26 -0700
            Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-05-30 19:27 -0700
              Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-02 10:45 +0300
                Re: Good hash for pointers "Chris M. Thomasson" <chris.m.thomasson.1@gmail.com> - 2024-06-02 12:42 -0700
                Re: Good hash for pointers "Chris M. Thomasson" <chris.m.thomasson.1@gmail.com> - 2024-06-03 12:35 -0700
                Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-02 16:02 -0700
                Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-03 10:50 +0300
                Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-03 18:02 -0700
                Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-04 11:38 +0300
                Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-04 22:10 -0700
                Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-03 16:34 +0200
                Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-03 17:46 +0300
                Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-03 17:54 +0200
                Re: Good hash for pointers bart <bc@freeuk.com> - 2024-06-03 17:24 +0100
                Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-03 20:16 +0300
                Re: Good hash for pointers bart <bc@freeuk.com> - 2024-06-03 19:48 +0100
                Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-03 22:41 +0300
                Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-03 22:51 +0300
                Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-03 16:51 -0700
                Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-03 17:01 -0700
                Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-03 20:25 +0200
                Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-03 19:50 +0300
                Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-03 20:31 +0200
          Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-05 00:59 +0300
            Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-05 11:10 +0200
              Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-05 12:34 +0300
                Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-05 12:05 +0200
                Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-05 13:11 +0300
            Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-05 08:58 -0700
              AES problem (was: Good hash for pointers) Michael S <already5chosen@yahoo.com> - 2024-06-05 19:51 +0300
                Re: AES problem (was: Good hash for pointers) Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-05 21:24 -0700
              Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-05 19:59 +0300
                Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-05 21:40 -0700
                Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-06 11:00 +0300
              Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-06 13:35 +0300
                Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-16 04:38 -0700
                Re: Good hash for pointers "Chris M. Thomasson" <chris.m.thomasson.1@gmail.com> - 2024-06-16 12:34 -0700
            Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-07 20:53 +0200
              Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-09 13:35 +0200
                Re: Good hash for pointers Richard Harnden <richard.nospam@gmail.invalid> - 2024-06-09 12:40 +0100
                Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-09 15:09 +0200
                Re: Good hash for pointers Malcolm McLean <malcolm.arthur.mclean@gmail.com> - 2024-06-10 01:34 +0100
                Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-09 18:31 -0700
                Re: Good hash for pointers Michael S <already5chosen@yahoo.com> - 2024-06-10 15:14 +0300
                Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-10 14:40 +0200
                Re: Good hash for pointers Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-15 20:32 -0700
                Re: Good hash for pointers Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-10 09:35 +0200

csiph-web