diff options
| author | Kotresh HR <khiremat@redhat.com> | 2017-06-28 05:09:12 -0400 | 
|---|---|---|
| committer | Niels de Vos <ndevos@redhat.com> | 2017-06-30 08:16:57 +0000 | 
| commit | 292b4e42fdc023e307fde35e189285040d4b9cdd (patch) | |
| tree | 1c91585e4a9c4bd3871666622f8fe5f158c67ec0 | |
| parent | d95535bae2d200c2210feac7568b1fdbf6f545a9 (diff) | |
contrib/xxhash: Add xxhash library
xxhash is a faster non-cryptographic hash.
https://github.com/Cyan4973/xxHash
Release Taken: "xxHash v0.6.2"
--------------
Files added:
  contrib/xxhash/xxhash.c
  contrib/xxhash/xxhash.h
  contrib/xxhash/xxhsum.c
Modifications to source:
------------------------
Following functions and data types got 'GF_' prefix
as below to avoid any form of name collisions in future.
    ---- Functions ----
    GF_XXH_versionNumber
    GF_XXH32
    GF_XXH32_createState
    GF_XXH32_freeState
    GF_XXH32_copyState
    GF_XXH32_reset
    GF_XXH32_update
    GF_XXH32_digest
    GF_XXH32_canonicalFromHash
    GF_XXH32_hashFromCanonical
    GF_XXH64
    GF_XXH64_createState
    GF_XXH64_freeState
    GF_XXH64_copyState
    GF_XXH64_reset
    GF_XXH64_update
    GF_XXH64_digest
    GF_XXH64_canonicalFromHash
    GF_XXH64_hashFromCanonical
    ---- Data Types ----
    GF_XXH_errorcode
    GF_XXH32_state_t*
    GF_XXH32_canonical_t*
    GF_XXH32_hash_t
    GF_XXH64_state_t*
    GF_XXH64_canonical_t*
    GF_XXH64_hash_t
It is linked with libglusterfs.so. A wrapper
funtion is also added for the easy usage in
common-utils.c.
xxhash can be used for the all the usecases where
a faster non-cryptographic hash is required.
gfid to path infra would be using this for now.
NOTE:
----
The gluster coding guidelines check is ignored
as maintaining it further would be difficult.
Updates: #253
Change-Id: Ib143f90d91d4ee99864a10246d5983e92900173b
Signed-off-by: Kotresh HR <khiremat@redhat.com>
Reviewed-on: https://review.gluster.org/17641
Smoke: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Amar Tumballi <amarts@redhat.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: Niels de Vos <ndevos@redhat.com>
| -rw-r--r-- | MAINTAINERS | 8 | ||||
| -rw-r--r-- | contrib/xxhash/xxhash.c | 890 | ||||
| -rw-r--r-- | contrib/xxhash/xxhash.h | 301 | ||||
| -rw-r--r-- | contrib/xxhash/xxhsum.c | 1238 | ||||
| -rw-r--r-- | libglusterfs/src/Makefile.am | 5 | ||||
| -rw-r--r-- | libglusterfs/src/common-utils.c | 18 | ||||
| -rw-r--r-- | libglusterfs/src/common-utils.h | 5 | 
7 files changed, 2464 insertions, 1 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index b1ccca0f48a..4bb7aebc238 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -191,6 +191,14 @@ M: Pranith Karampuri <pkarampu@redhat.com>  S: Maintained  F: libglusterfs/ +xxhash +M: Aravinda VK <avishwan@redhat.com> +M: Kotresh HR <khiremat@redhat.com> +P: Amar Tumballi <amarts@redhat.com> +S: Maintained +F: contrib/xxhash/ +T: https://github.com/Cyan4973/xxHash.git +  Management Daemon  M: Kaushal Madappa <kmadapp@redhat.com>  M: Atin Mukherjee <amukherj@redhat.com> diff --git a/contrib/xxhash/xxhash.c b/contrib/xxhash/xxhash.c new file mode 100644 index 00000000000..d5592c2d7ee --- /dev/null +++ b/contrib/xxhash/xxhash.c @@ -0,0 +1,890 @@ +/* +*  xxHash - Fast Hash algorithm +*  Copyright (C) 2012-2016, Yann Collet +* +*  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) +* +*  Redistribution and use in source and binary forms, with or without +*  modification, are permitted provided that the following conditions are +*  met: +* +*  * Redistributions of source code must retain the above copyright +*  notice, this list of conditions and the following disclaimer. +*  * Redistributions in binary form must reproduce the above +*  copyright notice, this list of conditions and the following disclaimer +*  in the documentation and/or other materials provided with the +*  distribution. +* +*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +*  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +*  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +*  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +*  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +*  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +*  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +*  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +*  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +*  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +*  You can contact the author at : +*  - xxHash homepage: http://www.xxhash.com +*  - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + + +/* ************************************* +*  Tuning parameters +***************************************/ +/*!XXH_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. + *            It can generate buggy code on targets which do not support unaligned memory accesses. + *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */ +#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +#    define XXH_FORCE_MEMORY_ACCESS 2 +#  elif defined(__INTEL_COMPILER) || \ +  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +#    define XXH_FORCE_MEMORY_ACCESS 1 +#  endif +#endif + +/*!XXH_ACCEPT_NULL_INPUT_POINTER : + * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. + * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. + * By default, this option is disabled. To enable it, uncomment below define : + */ +/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ + +/*!XXH_FORCE_NATIVE_FORMAT : + * By default, xxHash library provides endian-independant Hash values, based on little-endian convention. + * Results are therefore identical for little-endian and big-endian CPU. + * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. + * Should endian-independance be of no importance for your application, you may set the #define below to 1, + * to improve speed for Big-endian CPU. + * This option has no impact on Little_Endian CPU. + */ +#ifndef XXH_FORCE_NATIVE_FORMAT   /* can be defined externally */ +#  define XXH_FORCE_NATIVE_FORMAT 0 +#endif + +/*!XXH_FORCE_ALIGN_CHECK : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : check for aligned/unaligned input. + * The check costs one initial branch per hash; set to 0 when the input data + * is guaranteed to be aligned. + */ +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ +#  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +#    define XXH_FORCE_ALIGN_CHECK 0 +#  else +#    define XXH_FORCE_ALIGN_CHECK 1 +#  endif +#endif + + +/* ************************************* +*  Includes & Memory related functions +***************************************/ +/* Modify the local functions below should you wish to use some other memory routines */ +/* for malloc(), free() */ +#include <stdlib.h> +static void* XXH_malloc(size_t s) { return malloc(s); } +static void  XXH_free  (void* p)  { free(p); } +/* for memcpy() */ +#include <string.h> +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } + +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" + + +/* ************************************* +*  Compiler Specific Options +***************************************/ +#ifdef _MSC_VER    /* Visual Studio */ +#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */ +#  define FORCE_INLINE static __forceinline +#else +#  if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */ +#    ifdef __GNUC__ +#      define FORCE_INLINE static inline __attribute__((always_inline)) +#    else +#      define FORCE_INLINE static inline +#    endif +#  else +#    define FORCE_INLINE static +#  endif /* __STDC_VERSION__ */ +#endif + + +/* ************************************* +*  Basic Types +***************************************/ +#ifndef MEM_MODULE +# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +#   include <stdint.h> +    typedef uint8_t  BYTE; +    typedef uint16_t U16; +    typedef uint32_t U32; +    typedef  int32_t S32; +# else +    typedef unsigned char      BYTE; +    typedef unsigned short     U16; +    typedef unsigned int       U32; +    typedef   signed int       S32; +# endif +#endif + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; } __attribute__((packed)) unalign; +static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ +static U32 XXH_read32(const void* memPtr) +{ +    U32 val; +    memcpy(&val, memPtr, sizeof(val)); +    return val; +} + +#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + + +/* **************************************** +*  Compiler-specific Functions and Macros +******************************************/ +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ +#if defined(_MSC_VER) +#  define XXH_rotl32(x,r) _rotl(x,r) +#  define XXH_rotl64(x,r) _rotl64(x,r) +#else +#  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +#  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) +#endif + +#if defined(_MSC_VER)     /* Visual Studio */ +#  define XXH_swap32 _byteswap_ulong +#elif GCC_VERSION >= 403 +#  define XXH_swap32 __builtin_bswap32 +#else +static U32 XXH_swap32 (U32 x) +{ +    return  ((x << 24) & 0xff000000 ) | +            ((x <<  8) & 0x00ff0000 ) | +            ((x >>  8) & 0x0000ff00 ) | +            ((x >> 24) & 0x000000ff ); +} +#endif + + +/* ************************************* +*  Architecture Macros +***************************************/ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; + +/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ +#ifndef XXH_CPU_LITTLE_ENDIAN +    static const int g_one = 1; +#   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&g_one)) +#endif + + +/* *************************** +*  Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ +    if (align==XXH_unaligned) +        return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); +    else +        return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); +} + +FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) +{ +    return XXH_readLE32_align(ptr, endian, XXH_unaligned); +} + +static U32 XXH_readBE32(const void* ptr) +{ +    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); +} + + +/* ************************************* +*  Macros +***************************************/ +#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }    /* use only *after* variable declarations */ +XXH_PUBLIC_API unsigned GF_XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } + + +/* ******************************************************************* +*  32-bits hash functions +*********************************************************************/ +static const U32 PRIME32_1 = 2654435761U; +static const U32 PRIME32_2 = 2246822519U; +static const U32 PRIME32_3 = 3266489917U; +static const U32 PRIME32_4 =  668265263U; +static const U32 PRIME32_5 =  374761393U; + +static U32 XXH32_round(U32 seed, U32 input) +{ +    seed += input * PRIME32_2; +    seed  = XXH_rotl32(seed, 13); +    seed *= PRIME32_1; +    return seed; +} + +FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) +{ +    const BYTE* p = (const BYTE*)input; +    const BYTE* bEnd = p + len; +    U32 h32; +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER +    if (p==NULL) { +        len=0; +        bEnd=p=(const BYTE*)(size_t)16; +    } +#endif + +    if (len>=16) { +        const BYTE* const limit = bEnd - 16; +        U32 v1 = seed + PRIME32_1 + PRIME32_2; +        U32 v2 = seed + PRIME32_2; +        U32 v3 = seed + 0; +        U32 v4 = seed - PRIME32_1; + +        do { +            v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; +            v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; +            v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; +            v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; +        } while (p<=limit); + +        h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); +    } else { +        h32  = seed + PRIME32_5; +    } + +    h32 += (U32) len; + +    while (p+4<=bEnd) { +        h32 += XXH_get32bits(p) * PRIME32_3; +        h32  = XXH_rotl32(h32, 17) * PRIME32_4 ; +        p+=4; +    } + +    while (p<bEnd) { +        h32 += (*p) * PRIME32_5; +        h32 = XXH_rotl32(h32, 11) * PRIME32_1 ; +        p++; +    } + +    h32 ^= h32 >> 15; +    h32 *= PRIME32_2; +    h32 ^= h32 >> 13; +    h32 *= PRIME32_3; +    h32 ^= h32 >> 16; + +    return h32; +} + + +XXH_PUBLIC_API unsigned int GF_XXH32 (const void* input, size_t len, unsigned int seed) +{ +#if 0 +    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ +    XXH32_CREATESTATE_STATIC(state); +    GF_XXH32_reset(state, seed); +    GF_XXH32_update(state, input, len); +    return GF_XXH32_digest(state); +#else +    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +    if (XXH_FORCE_ALIGN_CHECK) { +        if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */ +            if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) +                return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); +            else +                return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); +    }   } + +    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) +        return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); +    else +        return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + + +/*======   Hash streaming   ======*/ + +XXH_PUBLIC_API GF_XXH32_state_t* GF_XXH32_createState(void) +{ +    return (GF_XXH32_state_t*)XXH_malloc(sizeof(GF_XXH32_state_t)); +} +XXH_PUBLIC_API GF_XXH_errorcode GF_XXH32_freeState(GF_XXH32_state_t* statePtr) +{ +    XXH_free(statePtr); +    return XXH_OK; +} + +XXH_PUBLIC_API void GF_XXH32_copyState(GF_XXH32_state_t* restrict dstState, const GF_XXH32_state_t* restrict srcState) +{ +    memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API GF_XXH_errorcode GF_XXH32_reset(GF_XXH32_state_t* statePtr, unsigned int seed) +{ +    GF_XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ +    memset(&state, 0, sizeof(state)-4);   /* do not write into reserved, for future removal */ +    state.v1 = seed + PRIME32_1 + PRIME32_2; +    state.v2 = seed + PRIME32_2; +    state.v3 = seed + 0; +    state.v4 = seed - PRIME32_1; +    memcpy(statePtr, &state, sizeof(state)); +    return XXH_OK; +} + + +FORCE_INLINE GF_XXH_errorcode XXH32_update_endian (GF_XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ +    const BYTE* p = (const BYTE*)input; +    const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER +    if (input==NULL) return XXH_ERROR; +#endif + +    state->total_len_32 += (unsigned)len; +    state->large_len |= (len>=16) | (state->total_len_32>=16); + +    if (state->memsize + len < 16)  {   /* fill in tmp buffer */ +        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); +        state->memsize += (unsigned)len; +        return XXH_OK; +    } + +    if (state->memsize) {   /* some data left from previous update */ +        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); +        {   const U32* p32 = state->mem32; +            state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; +            state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; +            state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; +            state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++; +        } +        p += 16-state->memsize; +        state->memsize = 0; +    } + +    if (p <= bEnd-16) { +        const BYTE* const limit = bEnd - 16; +        U32 v1 = state->v1; +        U32 v2 = state->v2; +        U32 v3 = state->v3; +        U32 v4 = state->v4; + +        do { +            v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; +            v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; +            v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; +            v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; +        } while (p<=limit); + +        state->v1 = v1; +        state->v2 = v2; +        state->v3 = v3; +        state->v4 = v4; +    } + +    if (p < bEnd) { +        XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); +        state->memsize = (unsigned)(bEnd-p); +    } + +    return XXH_OK; +} + +XXH_PUBLIC_API GF_XXH_errorcode GF_XXH32_update (GF_XXH32_state_t* state_in, const void* input, size_t len) +{ +    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) +        return XXH32_update_endian(state_in, input, len, XXH_littleEndian); +    else +        return XXH32_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE U32 XXH32_digest_endian (const GF_XXH32_state_t* state, XXH_endianess endian) +{ +    const BYTE * p = (const BYTE*)state->mem32; +    const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; +    U32 h32; + +    if (state->large_len) { +        h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); +    } else { +        h32 = state->v3 /* == seed */ + PRIME32_5; +    } + +    h32 += state->total_len_32; + +    while (p+4<=bEnd) { +        h32 += XXH_readLE32(p, endian) * PRIME32_3; +        h32  = XXH_rotl32(h32, 17) * PRIME32_4; +        p+=4; +    } + +    while (p<bEnd) { +        h32 += (*p) * PRIME32_5; +        h32  = XXH_rotl32(h32, 11) * PRIME32_1; +        p++; +    } + +    h32 ^= h32 >> 15; +    h32 *= PRIME32_2; +    h32 ^= h32 >> 13; +    h32 *= PRIME32_3; +    h32 ^= h32 >> 16; + +    return h32; +} + + +XXH_PUBLIC_API unsigned int GF_XXH32_digest (const GF_XXH32_state_t* state_in) +{ +    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) +        return XXH32_digest_endian(state_in, XXH_littleEndian); +    else +        return XXH32_digest_endian(state_in, XXH_bigEndian); +} + + +/*======   Canonical representation   ======*/ + +/*! Default XXH result types are basic unsigned 32 and 64 bits. +*   The canonical representation follows human-readable write convention, aka big-endian (large digits first). +*   These functions allow transformation of hash result into and from its canonical format. +*   This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. +*/ + +XXH_PUBLIC_API void GF_XXH32_canonicalFromHash(GF_XXH32_canonical_t* dst, GF_XXH32_hash_t hash) +{ +    XXH_STATIC_ASSERT(sizeof(GF_XXH32_canonical_t) == sizeof(GF_XXH32_hash_t)); +    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); +    memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API GF_XXH32_hash_t GF_XXH32_hashFromCanonical(const GF_XXH32_canonical_t* src) +{ +    return XXH_readBE32(src); +} + + +#ifndef XXH_NO_LONG_LONG + +/* ******************************************************************* +*  64-bits hash functions +*********************************************************************/ + +/*======   Memory access   ======*/ + +#ifndef MEM_MODULE +# define MEM_MODULE +# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +#   include <stdint.h> +    typedef uint64_t U64; +# else +    typedef unsigned long long U64;   /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */ +# endif +#endif + + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64; + +static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + +static U64 XXH_read64(const void* memPtr) +{ +    U64 val; +    memcpy(&val, memPtr, sizeof(val)); +    return val; +} + +#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + +#if defined(_MSC_VER)     /* Visual Studio */ +#  define XXH_swap64 _byteswap_uint64 +#elif GCC_VERSION >= 403 +#  define XXH_swap64 __builtin_bswap64 +#else +static U64 XXH_swap64 (U64 x) +{ +    return  ((x << 56) & 0xff00000000000000ULL) | +            ((x << 40) & 0x00ff000000000000ULL) | +            ((x << 24) & 0x0000ff0000000000ULL) | +            ((x << 8)  & 0x000000ff00000000ULL) | +            ((x >> 8)  & 0x00000000ff000000ULL) | +            ((x >> 24) & 0x0000000000ff0000ULL) | +            ((x >> 40) & 0x000000000000ff00ULL) | +            ((x >> 56) & 0x00000000000000ffULL); +} +#endif + +FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ +    if (align==XXH_unaligned) +        return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); +    else +        return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); +} + +FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +{ +    return XXH_readLE64_align(ptr, endian, XXH_unaligned); +} + +static U64 XXH_readBE64(const void* ptr) +{ +    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} + + +/*======   xxh64   ======*/ + +static const U64 PRIME64_1 = 11400714785074694791ULL; +static const U64 PRIME64_2 = 14029467366897019727ULL; +static const U64 PRIME64_3 =  1609587929392839161ULL; +static const U64 PRIME64_4 =  9650029242287828579ULL; +static const U64 PRIME64_5 =  2870177450012600261ULL; + +static U64 XXH64_round(U64 acc, U64 input) +{ +    acc += input * PRIME64_2; +    acc  = XXH_rotl64(acc, 31); +    acc *= PRIME64_1; +    return acc; +} + +static U64 XXH64_mergeRound(U64 acc, U64 val) +{ +    val  = XXH64_round(0, val); +    acc ^= val; +    acc  = acc * PRIME64_1 + PRIME64_4; +    return acc; +} + +FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) +{ +    const BYTE* p = (const BYTE*)input; +    const BYTE* const bEnd = p + len; +    U64 h64; +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER +    if (p==NULL) { +        len=0; +        bEnd=p=(const BYTE*)(size_t)32; +    } +#endif + +    if (len>=32) { +        const BYTE* const limit = bEnd - 32; +        U64 v1 = seed + PRIME64_1 + PRIME64_2; +        U64 v2 = seed + PRIME64_2; +        U64 v3 = seed + 0; +        U64 v4 = seed - PRIME64_1; + +        do { +            v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; +            v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; +            v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; +            v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; +        } while (p<=limit); + +        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); +        h64 = XXH64_mergeRound(h64, v1); +        h64 = XXH64_mergeRound(h64, v2); +        h64 = XXH64_mergeRound(h64, v3); +        h64 = XXH64_mergeRound(h64, v4); + +    } else { +        h64  = seed + PRIME64_5; +    } + +    h64 += (U64) len; + +    while (p+8<=bEnd) { +        U64 const k1 = XXH64_round(0, XXH_get64bits(p)); +        h64 ^= k1; +        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; +        p+=8; +    } + +    if (p+4<=bEnd) { +        h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; +        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; +        p+=4; +    } + +    while (p<bEnd) { +        h64 ^= (*p) * PRIME64_5; +        h64 = XXH_rotl64(h64, 11) * PRIME64_1; +        p++; +    } + +    h64 ^= h64 >> 33; +    h64 *= PRIME64_2; +    h64 ^= h64 >> 29; +    h64 *= PRIME64_3; +    h64 ^= h64 >> 32; + +    return h64; +} + + +XXH_PUBLIC_API unsigned long long GF_XXH64 (const void* input, size_t len, unsigned long long seed) +{ +#if 0 +    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ +    XXH64_CREATESTATE_STATIC(state); +    GF_XXH64_reset(state, seed); +    GF_XXH64_update(state, input, len); +    return GF_XXH64_digest(state); +#else +    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +    if (XXH_FORCE_ALIGN_CHECK) { +        if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */ +            if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) +                return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); +            else +                return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); +    }   } + +    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) +        return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); +    else +        return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + +/*======   Hash Streaming   ======*/ + +XXH_PUBLIC_API GF_XXH64_state_t* GF_XXH64_createState(void) +{ +    return (GF_XXH64_state_t*)XXH_malloc(sizeof(GF_XXH64_state_t)); +} +XXH_PUBLIC_API GF_XXH_errorcode GF_XXH64_freeState(GF_XXH64_state_t* statePtr) +{ +    XXH_free(statePtr); +    return XXH_OK; +} + +XXH_PUBLIC_API void GF_XXH64_copyState(GF_XXH64_state_t* restrict dstState, const GF_XXH64_state_t* restrict srcState) +{ +    memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API GF_XXH_errorcode GF_XXH64_reset(GF_XXH64_state_t* statePtr, unsigned long long seed) +{ +    GF_XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ +    memset(&state, 0, sizeof(state)-8);   /* do not write into reserved, for future removal */ +    state.v1 = seed + PRIME64_1 + PRIME64_2; +    state.v2 = seed + PRIME64_2; +    state.v3 = seed + 0; +    state.v4 = seed - PRIME64_1; +    memcpy(statePtr, &state, sizeof(state)); +    return XXH_OK; +} + +FORCE_INLINE GF_XXH_errorcode XXH64_update_endian (GF_XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ +    const BYTE* p = (const BYTE*)input; +    const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER +    if (input==NULL) return XXH_ERROR; +#endif + +    state->total_len += len; + +    if (state->memsize + len < 32) {  /* fill in tmp buffer */ +        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); +        state->memsize += (U32)len; +        return XXH_OK; +    } + +    if (state->memsize) {   /* tmp buffer is full */ +        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); +        state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); +        state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); +        state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); +        state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); +        p += 32-state->memsize; +        state->memsize = 0; +    } + +    if (p+32 <= bEnd) { +        const BYTE* const limit = bEnd - 32; +        U64 v1 = state->v1; +        U64 v2 = state->v2; +        U64 v3 = state->v3; +        U64 v4 = state->v4; + +        do { +            v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; +            v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; +            v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; +            v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; +        } while (p<=limit); + +        state->v1 = v1; +        state->v2 = v2; +        state->v3 = v3; +        state->v4 = v4; +    } + +    if (p < bEnd) { +        XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); +        state->memsize = (unsigned)(bEnd-p); +    } + +    return XXH_OK; +} + +XXH_PUBLIC_API GF_XXH_errorcode GF_XXH64_update (GF_XXH64_state_t* state_in, const void* input, size_t len) +{ +    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) +        return XXH64_update_endian(state_in, input, len, XXH_littleEndian); +    else +        return XXH64_update_endian(state_in, input, len, XXH_bigEndian); +} + +FORCE_INLINE U64 XXH64_digest_endian (const GF_XXH64_state_t* state, XXH_endianess endian) +{ +    const BYTE * p = (const BYTE*)state->mem64; +    const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; +    U64 h64; + +    if (state->total_len >= 32) { +        U64 const v1 = state->v1; +        U64 const v2 = state->v2; +        U64 const v3 = state->v3; +        U64 const v4 = state->v4; + +        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); +        h64 = XXH64_mergeRound(h64, v1); +        h64 = XXH64_mergeRound(h64, v2); +        h64 = XXH64_mergeRound(h64, v3); +        h64 = XXH64_mergeRound(h64, v4); +    } else { +        h64  = state->v3 + PRIME64_5; +    } + +    h64 += (U64) state->total_len; + +    while (p+8<=bEnd) { +        U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian)); +        h64 ^= k1; +        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; +        p+=8; +    } + +    if (p+4<=bEnd) { +        h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; +        h64  = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; +        p+=4; +    } + +    while (p<bEnd) { +        h64 ^= (*p) * PRIME64_5; +        h64  = XXH_rotl64(h64, 11) * PRIME64_1; +        p++; +    } + +    h64 ^= h64 >> 33; +    h64 *= PRIME64_2; +    h64 ^= h64 >> 29; +    h64 *= PRIME64_3; +    h64 ^= h64 >> 32; + +    return h64; +} + +XXH_PUBLIC_API unsigned long long GF_XXH64_digest (const GF_XXH64_state_t* state_in) +{ +    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) +        return XXH64_digest_endian(state_in, XXH_littleEndian); +    else +        return XXH64_digest_endian(state_in, XXH_bigEndian); +} + + +/*====== Canonical representation   ======*/ + +XXH_PUBLIC_API void GF_XXH64_canonicalFromHash(GF_XXH64_canonical_t* dst, GF_XXH64_hash_t hash) +{ +    XXH_STATIC_ASSERT(sizeof(GF_XXH64_canonical_t) == sizeof(GF_XXH64_hash_t)); +    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); +    memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API GF_XXH64_hash_t GF_XXH64_hashFromCanonical(const GF_XXH64_canonical_t* src) +{ +    return XXH_readBE64(src); +} + +#endif  /* XXH_NO_LONG_LONG */ diff --git a/contrib/xxhash/xxhash.h b/contrib/xxhash/xxhash.h new file mode 100644 index 00000000000..98352b9018e --- /dev/null +++ b/contrib/xxhash/xxhash.h @@ -0,0 +1,301 @@ +/* +   xxHash - Extremely Fast Hash algorithm +   Header File +   Copyright (C) 2012-2016, Yann Collet. + +   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + +   Redistribution and use in source and binary forms, with or without +   modification, are permitted provided that the following conditions are +   met: + +       * Redistributions of source code must retain the above copyright +   notice, this list of conditions and the following disclaimer. +       * Redistributions in binary form must reproduce the above +   copyright notice, this list of conditions and the following disclaimer +   in the documentation and/or other materials provided with the +   distribution. + +   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +   You can contact the author at : +   - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + +/* Notice extracted from xxHash homepage : + +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. + +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) + +Name            Speed       Q.Score   Author +xxHash          5.4 GB/s     10 +CrapWow         3.2 GB/s      2       Andrew +MumurHash 3a    2.7 GB/s     10       Austin Appleby +SpookyHash      2.0 GB/s     10       Bob Jenkins +SBox            1.4 GB/s      9       Bret Mulvey +Lookup3         1.2 GB/s      9       Bob Jenkins +SuperFastHash   1.2 GB/s      1       Paul Hsieh +CityHash64      1.05 GB/s    10       Pike & Alakuijala +FNV             0.55 GB/s     5       Fowler, Noll, Vo +CRC32           0.43 GB/s     9 +MD5-32          0.33 GB/s    10       Ronald L. Rivest +SHA1-32         0.28 GB/s    10 + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. + +A 64-bits version, named XXH64, is available since r35. +It offers much better speed, but for 64-bits applications only. +Name     Speed on 64 bits    Speed on 32 bits +XXH64       13.8 GB/s            1.9 GB/s +XXH32        6.8 GB/s            6.0 GB/s +*/ + +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* **************************** +*  Compiler specifics +******************************/ +#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* ! C99 */ +#  define restrict   /* disable restrict */ +#endif + + +/* **************************** +*  Definitions +******************************/ +#include <stddef.h>   /* size_t */ +typedef enum { XXH_OK=0, XXH_ERROR } GF_XXH_errorcode; + + +/* **************************** +*  API modifier +******************************/ +/** XXH_PRIVATE_API +*   This is useful to include xxhash functions in `static` mode +*   in order to inline them, and remove their symbol from the public list. +*   Methodology : +*     #define XXH_PRIVATE_API +*     #include "xxhash.h" +*   `xxhash.c` is automatically included. +*   It's not useful to compile and link it as a separate module. +*/ +#ifdef XXH_PRIVATE_API +#  ifndef XXH_STATIC_LINKING_ONLY +#    define XXH_STATIC_LINKING_ONLY +#  endif +#  if defined(__GNUC__) +#    define XXH_PUBLIC_API static __inline __attribute__((unused)) +#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +#    define XXH_PUBLIC_API static inline +#  elif defined(_MSC_VER) +#    define XXH_PUBLIC_API static __inline +#  else +#    define XXH_PUBLIC_API static   /* this version may generate warnings for unused static functions; disable the relevant warning */ +#  endif +#else +#  define XXH_PUBLIC_API   /* do nothing */ +#endif /* XXH_PRIVATE_API */ + +/*!XXH_NAMESPACE, aka Namespace Emulation : + +If you want to include _and expose_ xxHash functions from within your own library, +but also want to avoid symbol collisions with other libraries which may also include xxHash, + +you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library +with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values). + +Note that no change is required within the calling program as long as it includes `xxhash.h` : +regular symbol name will be automatically translated by this header. +*/ +#ifdef XXH_NAMESPACE +#  define XXH_CAT(A,B) A##B +#  define XXH_NAME2(A,B) XXH_CAT(A,B) +#  define GF_XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, GF_XXH_versionNumber) +#  define GF_XXH32 XXH_NAME2(XXH_NAMESPACE, GF_XXH32) +#  define GF_XXH32_createState XXH_NAME2(XXH_NAMESPACE, GF_XXH32_createState) +#  define GF_XXH32_freeState XXH_NAME2(XXH_NAMESPACE, GF_XXH32_freeState) +#  define GF_XXH32_reset XXH_NAME2(XXH_NAMESPACE, GF_XXH32_reset) +#  define GF_XXH32_update XXH_NAME2(XXH_NAMESPACE, GF_XXH32_update) +#  define GF_XXH32_digest XXH_NAME2(XXH_NAMESPACE, GF_XXH32_digest) +#  define GF_XXH32_copyState XXH_NAME2(XXH_NAMESPACE, GF_XXH32_copyState) +#  define GF_XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, GF_XXH32_canonicalFromHash) +#  define GF_XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, GF_XXH32_hashFromCanonical) +#  define GF_XXH64 XXH_NAME2(XXH_NAMESPACE, GF_XXH64) +#  define GF_XXH64_createState XXH_NAME2(XXH_NAMESPACE, GF_XXH64_createState) +#  define GF_XXH64_freeState XXH_NAME2(XXH_NAMESPACE, GF_XXH64_freeState) +#  define GF_XXH64_reset XXH_NAME2(XXH_NAMESPACE, GF_XXH64_reset) +#  define GF_XXH64_update XXH_NAME2(XXH_NAMESPACE, GF_XXH64_update) +#  define GF_XXH64_digest XXH_NAME2(XXH_NAMESPACE, GF_XXH64_digest) +#  define GF_XXH64_copyState XXH_NAME2(XXH_NAMESPACE, GF_XXH64_copyState) +#  define GF_XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, GF_XXH64_canonicalFromHash) +#  define GF_XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, GF_XXH64_hashFromCanonical) +#endif + + +/* ************************************* +*  Version +***************************************/ +#define XXH_VERSION_MAJOR    0 +#define XXH_VERSION_MINOR    6 +#define XXH_VERSION_RELEASE  2 +#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) +XXH_PUBLIC_API unsigned GF_XXH_versionNumber (void); + + +/*-********************************************************************** +*  32-bits hash +************************************************************************/ +typedef unsigned int       GF_XXH32_hash_t; + +/*! XXH32() : +    Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". +    The memory between input & input+length must be valid (allocated and read-accessible). +    "seed" can be used to alter the result predictably. +    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */ +XXH_PUBLIC_API GF_XXH32_hash_t GF_XXH32 (const void* input, size_t length, unsigned int seed); + +/*======   Streaming   ======*/ +typedef struct XXH32_state_s GF_XXH32_state_t;   /* incomplete type */ +XXH_PUBLIC_API GF_XXH32_state_t* GF_XXH32_createState(void); +XXH_PUBLIC_API GF_XXH_errorcode  GF_XXH32_freeState(GF_XXH32_state_t* statePtr); +XXH_PUBLIC_API void GF_XXH32_copyState(GF_XXH32_state_t* restrict dst_state, const GF_XXH32_state_t* restrict src_state); + +XXH_PUBLIC_API GF_XXH_errorcode GF_XXH32_reset  (GF_XXH32_state_t* statePtr, unsigned int seed); +XXH_PUBLIC_API GF_XXH_errorcode GF_XXH32_update (GF_XXH32_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API GF_XXH32_hash_t  GF_XXH32_digest (const GF_XXH32_state_t* statePtr); + +/* +These functions generate the xxHash of an input provided in multiple segments. +Note that, for small input, they are slower than single-call functions, due to state management. +For small input, prefer `XXH32()` and `XXH64()` . + +XXH state must first be allocated, using XXH*_createState() . + +Start a new hash by initializing state with a seed, using XXH*_reset(). + +Then, feed the hash state by calling XXH*_update() as many times as necessary. +Obviously, input must be allocated and read accessible. +The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. + +Finally, a hash value can be produced anytime, by using XXH*_digest(). +This function returns the nn-bits hash as an int or long long. + +It's still possible to continue inserting input into the hash state after a digest, +and generate some new hashes later on, by calling again XXH*_digest(). + +When done, free XXH state space if it was allocated dynamically. +*/ + +/*======   Canonical representation   ======*/ + +typedef struct { unsigned char digest[4]; } GF_XXH32_canonical_t; +XXH_PUBLIC_API void GF_XXH32_canonicalFromHash(GF_XXH32_canonical_t* dst, GF_XXH32_hash_t hash); +XXH_PUBLIC_API GF_XXH32_hash_t GF_XXH32_hashFromCanonical(const GF_XXH32_canonical_t* src); + +/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. +*  The canonical representation uses human-readable write convention, aka big-endian (large digits first). +*  These functions allow transformation of hash result into and from its canonical format. +*  This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. +*/ + + +#ifndef XXH_NO_LONG_LONG +/*-********************************************************************** +*  64-bits hash +************************************************************************/ +typedef unsigned long long GF_XXH64_hash_t; + +/*! XXH64() : +    Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". +    "seed" can be used to alter the result predictably. +    This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark). +*/ +XXH_PUBLIC_API GF_XXH64_hash_t GF_XXH64 (const void* input, size_t length, unsigned long long seed); + +/*======   Streaming   ======*/ +typedef struct XXH64_state_s GF_XXH64_state_t;   /* incomplete type */ +XXH_PUBLIC_API GF_XXH64_state_t* GF_XXH64_createState(void); +XXH_PUBLIC_API GF_XXH_errorcode  GF_XXH64_freeState(GF_XXH64_state_t* statePtr); +XXH_PUBLIC_API void GF_XXH64_copyState(GF_XXH64_state_t* restrict dst_state, const GF_XXH64_state_t* restrict src_state); + +XXH_PUBLIC_API GF_XXH_errorcode GF_XXH64_reset  (GF_XXH64_state_t* statePtr, unsigned long long seed); +XXH_PUBLIC_API GF_XXH_errorcode GF_XXH64_update (GF_XXH64_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API GF_XXH64_hash_t  GF_XXH64_digest (const GF_XXH64_state_t* statePtr); + +/*======   Canonical representation   ======*/ +typedef struct { unsigned char digest[8]; } GF_XXH64_canonical_t; +XXH_PUBLIC_API void GF_XXH64_canonicalFromHash(GF_XXH64_canonical_t* dst, GF_XXH64_hash_t hash); +XXH_PUBLIC_API GF_XXH64_hash_t GF_XXH64_hashFromCanonical(const GF_XXH64_canonical_t* src); +#endif  /* XXH_NO_LONG_LONG */ + + +#ifdef XXH_STATIC_LINKING_ONLY + +/* ================================================================================================ +   This section contains definitions which are not guaranteed to remain stable. +   They may change in future versions, becoming incompatible with a different version of the library. +   They shall only be used with static linking. +   Never use these definitions in association with dynamic linking ! +=================================================================================================== */ + +/* These definitions are only meant to allow allocation of XXH state +   statically, on stack, or in a struct for example. +   Do not use members directly. */ + +   struct XXH32_state_s { +       unsigned total_len_32; +       unsigned large_len; +       unsigned v1; +       unsigned v2; +       unsigned v3; +       unsigned v4; +       unsigned mem32[4];   /* buffer defined as U32 for alignment */ +       unsigned memsize; +       unsigned reserved;   /* never read nor write, will be removed in a future version */ +   };   /* typedef'd to XXH32_state_t */ + +#ifndef XXH_NO_LONG_LONG +   struct XXH64_state_s { +       unsigned long long total_len; +       unsigned long long v1; +       unsigned long long v2; +       unsigned long long v3; +       unsigned long long v4; +       unsigned long long mem64[4];   /* buffer defined as U64 for alignment */ +       unsigned memsize; +       unsigned reserved[2];          /* never read nor write, will be removed in a future version */ +   };   /* typedef'd to XXH64_state_t */ +#endif + +#  ifdef XXH_PRIVATE_API +#    include "xxhash.c"   /* include xxhash function bodies as `static`, for inlining */ +#  endif + +#endif /* XXH_STATIC_LINKING_ONLY */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* XXHASH_H_5627135585666179 */ diff --git a/contrib/xxhash/xxhsum.c b/contrib/xxhash/xxhsum.c new file mode 100644 index 00000000000..0148e3c4566 --- /dev/null +++ b/contrib/xxhash/xxhsum.c @@ -0,0 +1,1238 @@ +/* +*  xxhsum - Command line interface for xxhash algorithms +*  Copyright (C) Yann Collet 2012-2016 +* +*  GPL v2 License +* +*  This program is free software; you can redistribute it and/or modify +*  it under the terms of the GNU General Public License as published by +*  the Free Software Foundation; either version 2 of the License, or +*  (at your option) any later version. +* +*  This program is distributed in the hope that it will be useful, +*  but WITHOUT ANY WARRANTY; without even the implied warranty of +*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +*  GNU General Public License for more details. +* +*  You should have received a copy of the GNU General Public License along +*  with this program; if not, write to the Free Software Foundation, Inc., +*  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +* +*  You can contact the author at : +*  - xxHash homepage : http://www.xxhash.com +*  - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + +/* xxhsum : + * Provides hash value of a file content, or a list of files, or stdin + * Display convention is Big Endian, for both 32 and 64 bits algorithms + */ + +#ifndef XXHASH_C_2097394837 +#define XXHASH_C_2097394837 + +/* ************************************ +*  Compiler Options +**************************************/ +/* MS Visual */ +#if defined(_MSC_VER) || defined(_WIN32) +#  define _CRT_SECURE_NO_WARNINGS   /* removes visual warnings */ +#endif + +/* Under Linux at least, pull in the *64 commands */ +#ifndef _LARGEFILE64_SOURCE +#  define _LARGEFILE64_SOURCE +#endif + + +/* ************************************ +*  Includes +**************************************/ +#include <stdlib.h>     /* malloc */ +#include <stdio.h>      /* fprintf, fopen, ftello64, fread, stdin, stdout; when present : _fileno */ +#include <string.h>     /* strcmp */ +#include <sys/types.h>  /* stat64 */ +#include <sys/stat.h>   /* stat64 */ +#include <time.h>       /* clock_t, clock, CLOCKS_PER_SEC */ + +#define XXH_STATIC_LINKING_ONLY   /* *_state_t */ +#include "xxhash.h" + + +/*-************************************ +*  OS-Specific Includes +**************************************/ +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__) +#  include <fcntl.h>    /* _O_BINARY */ +#  include <io.h>       /* _setmode, _isatty */ +#  ifdef __MINGW32__ +   int _fileno(FILE *stream);   /* MINGW somehow forgets to include this windows declaration into <stdio.h> */ +#  endif +#  define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY) +#  define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream)) +#else +#  include <unistd.h>   /* isatty, STDIN_FILENO */ +#  define SET_BINARY_MODE(file) +#  define IS_CONSOLE(stdStream) isatty(STDIN_FILENO) +#endif + +#if !defined(S_ISREG) +#  define S_ISREG(x) (((x) & S_IFMT) == S_IFREG) +#endif + + +/* ************************************ +*  Basic Types +**************************************/ +#ifndef MEM_MODULE +# define MEM_MODULE +# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */ +#   include <stdint.h> +    typedef uint8_t  BYTE; +    typedef uint16_t U16; +    typedef uint32_t U32; +    typedef  int32_t S32; +    typedef uint64_t U64; +#  else +    typedef unsigned char      BYTE; +    typedef unsigned short     U16; +    typedef unsigned int       U32; +    typedef   signed int       S32; +    typedef unsigned long long U64; +#  endif +#endif + +static unsigned BMK_isLittleEndian(void) +{ +    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */ +    return one.c[0]; +} + + +/* ************************************* +*  Constants +***************************************/ +#define LIB_VERSION XXH_VERSION_MAJOR.XXH_VERSION_MINOR.XXH_VERSION_RELEASE +#define QUOTE(str) #str +#define EXPAND_AND_QUOTE(str) QUOTE(str) +#define PROGRAM_VERSION EXPAND_AND_QUOTE(LIB_VERSION) +static const int g_nbBits = (int)(sizeof(void*)*8); +static const char g_lename[] = "little endian"; +static const char g_bename[] = "big endian"; +#define ENDIAN_NAME (BMK_isLittleEndian() ? g_lename : g_bename) +static const char author[] = "Yann Collet"; +#define WELCOME_MESSAGE(exename) "%s %s (%i-bits %s), by %s \n", exename, PROGRAM_VERSION,  g_nbBits, ENDIAN_NAME, author + +#define NBLOOPS    3                              /* Default number of benchmark iterations */ +#define TIMELOOP_S 1 +#define TIMELOOP  (TIMELOOP_S * CLOCKS_PER_SEC)   /* Minimum timing per iteration */ +#define XXHSUM32_DEFAULT_SEED 0                   /* Default seed for algo_xxh32 */ +#define XXHSUM64_DEFAULT_SEED 0                   /* Default seed for algo_xxh64 */ + +#define KB *( 1<<10) +#define MB *( 1<<20) +#define GB *(1U<<30) + +#define MAX_MEM    (2 GB - 64 MB) + +static const char stdinName[] = "-"; +typedef enum { algo_xxh32, algo_xxh64 } algoType; +static const algoType g_defaultAlgo = algo_xxh64;    /* required within main() & usage() */ + +/* <16 hex char> <SPC> <SPC> <filename> <'\0'> + * '4096' is typical Linux PATH_MAX configuration. */ +#define DEFAULT_LINE_LENGTH (sizeof(GF_XXH64_hash_t) * 2 + 2 + 4096 + 1) + +/* Maximum acceptable line length. */ +#define MAX_LINE_LENGTH (32 KB) + + +/* ************************************ +*  Display macros +**************************************/ +#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__) +#define DISPLAYRESULT(...)   fprintf(stdout, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) DISPLAY(__VA_ARGS__); +static U32 g_displayLevel = 1; + + +/* ************************************ +*  Local variables +**************************************/ +static size_t g_sampleSize = 100 KB; +static U32 g_nbIterations = NBLOOPS; + + +/* ************************************ +*  Benchmark Functions +**************************************/ +static clock_t BMK_clockSpan( clock_t start ) +{ +    return clock() - start;   /* works even if overflow; Typical max span ~ 30 mn */ +} + + +static size_t BMK_findMaxMem(U64 requiredMem) +{ +    size_t const step = 64 MB; +    void* testmem = NULL; + +    requiredMem = (((requiredMem >> 26) + 1) << 26); +    requiredMem += 2*step; +    if (requiredMem > MAX_MEM) requiredMem = MAX_MEM; + +    while (!testmem) { +        if (requiredMem > step) requiredMem -= step; +        else requiredMem >>= 1; +        testmem = malloc ((size_t)requiredMem); +    } +    free (testmem); + +    /* keep some space available */ +    if (requiredMem > step) requiredMem -= step; +    else requiredMem >>= 1; + +    return (size_t)requiredMem; +} + + +static U64 BMK_GetFileSize(const char* infilename) +{ +    int r; +#if defined(_MSC_VER) +    struct _stat64 statbuf; +    r = _stat64(infilename, &statbuf); +#else +    struct stat statbuf; +    r = stat(infilename, &statbuf); +#endif +    if (r || !S_ISREG(statbuf.st_mode)) return 0;   /* No good... */ +    return (U64)statbuf.st_size; +} + +typedef U32 (*hashFunction)(const void* buffer, size_t bufferSize, U32 seed); + +static U32 localXXH32(const void* buffer, size_t bufferSize, U32 seed) { return GF_XXH32(buffer, bufferSize, seed); } + +static U32 localXXH64(const void* buffer, size_t bufferSize, U32 seed) { return (U32)GF_XXH64(buffer, bufferSize, seed); } + +static void BMK_benchHash(hashFunction h, const char* hName, const void* buffer, size_t bufferSize) +{ +    static const U32 nbh_perloop = 100; +    U32 iterationNb; +    double fastestH = 100000000.; + +    DISPLAY("\r%79s\r", "");       /* Clean display line */ +    if (g_nbIterations<1) g_nbIterations=1; +    for (iterationNb = 1; iterationNb <= g_nbIterations; iterationNb++) { +        U32 nbHashes = 0, r=0; +        clock_t cStart; + +        DISPLAY("%1i-%-17.17s : %10u ->\r", iterationNb, hName, (U32)bufferSize); +        cStart = clock(); +        while (clock() == cStart);   /* starts clock() at its exact beginning */ +        cStart = clock(); + +        while (BMK_clockSpan(cStart) < TIMELOOP) { +            U32 i; +            for (i=0; i<nbh_perloop; i++) +                r += h(buffer, bufferSize, i); +            nbHashes += nbh_perloop; +        } +        if (r==0) DISPLAY(".\r");   /* need to do something with r to avoid compiler optimizing away the hash function */ +        {   double const timeS = ((double)BMK_clockSpan(cStart) / CLOCKS_PER_SEC) / nbHashes; +            if (timeS < fastestH) fastestH = timeS; +            DISPLAY("%1i-%-17.17s : %10u -> %7.1f MB/s\r", iterationNb, hName, (U32)bufferSize, ((double)bufferSize / (1<<20)) / fastestH ); +        } +    } +    DISPLAY("%-19.19s : %10u -> %7.1f MB/s  \n", hName, (U32)bufferSize, ((double)bufferSize / (1<<20)) / fastestH); +} + + +/* Note : buffer is supposed malloc'ed, hence aligned */ +static void BMK_benchMem(const void* buffer, size_t bufferSize) +{ +    /* XXH32 bench */ +    BMK_benchHash(localXXH32, "XXH32", buffer, bufferSize); + +    /* Bench XXH32 on Unaligned input */ +    if (bufferSize>1) +        BMK_benchHash(localXXH32, "XXH32 unaligned", ((const char*)buffer)+1, bufferSize-1); + +    /* Bench XXH64 */ +    BMK_benchHash(localXXH64, "XXH64", buffer, bufferSize); + +    /* Bench XXH64 on Unaligned input */ +    if (bufferSize>1) +        BMK_benchHash(localXXH64, "XXH64 unaligned", ((const char*)buffer)+1, bufferSize-1); +} + + +static size_t BMK_selectBenchedSize(const char* fileName) +{   U64 const inFileSize = BMK_GetFileSize(fileName); +    size_t benchedSize = (size_t) BMK_findMaxMem(inFileSize); +    if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize; +    if (benchedSize < inFileSize) { +        DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", fileName, (int)(benchedSize>>20)); +    } +    return benchedSize; +} + + +static int BMK_benchFiles(const char** fileNamesTable, int nbFiles) +{ +    int fileIdx; +    for (fileIdx=0; fileIdx<nbFiles; fileIdx++) { +        const char* const inFileName = fileNamesTable[fileIdx]; +        FILE* const inFile = fopen( inFileName, "rb" ); +        size_t const benchedSize = BMK_selectBenchedSize(inFileName); +        char* const buffer = (char*)malloc(benchedSize+16); +        void* const alignedBuffer = (buffer+15) - (((size_t)(buffer+15)) & 0xF);   /* align on next 16 bytes boundaries */ + +        /* Checks */ +        if ((inFile==NULL) || (inFileName==NULL)) { +            DISPLAY( "Pb opening %s\n", inFileName); +            free(buffer); +            return 11; +        } +        if(!buffer) { +            DISPLAY("\nError: not enough memory!\n"); +            fclose(inFile); +            return 12; +        } + +        /* Fill input buffer */ +        DISPLAY("\rLoading %s...        \n", inFileName); +        {   size_t const readSize = fread(alignedBuffer, 1, benchedSize, inFile); +            fclose(inFile); +            if(readSize != benchedSize) { +                DISPLAY("\nError: problem reading file '%s' !!    \n", inFileName); +                free(buffer); +                return 13; +        }   } + +        /* bench */ +        BMK_benchMem(alignedBuffer, benchedSize); + +        free(buffer); +    } + +    return 0; +} + + + +static int BMK_benchInternal(void) +{ +    size_t const benchedSize = g_sampleSize; +    void* const buffer = malloc(benchedSize); +    if(!buffer) { +        DISPLAY("\nError: not enough memory!\n"); +        return 12; +    } + +    /* bench */ +    DISPLAY("\rSample of %u KB...        \n", (U32)(benchedSize >> 10)); +    BMK_benchMem(buffer, benchedSize); + +    free(buffer); +    return 0; +} + + +static void BMK_checkResult(U32 r1, U32 r2) +{ +    static int nbTests = 1; +    if (r1==r2) DISPLAY("\rTest%3i : %08X == %08X   ok   ", nbTests, r1, r2); +    else { +        DISPLAY("\rERROR : Test%3i : %08X <> %08X   !!!!!   \n", nbTests, r1, r2); +        exit(1); +    } +    nbTests++; +} + + +static void BMK_checkResult64(U64 r1, U64 r2) +{ +    static int nbTests = 1; +    if (r1!=r2) { +        DISPLAY("\rERROR : Test%3i : 64-bits values non equals   !!!!!   \n", nbTests); +        DISPLAY("\r %08X%08X != %08X%08X \n", (U32)(r1>>32), (U32)r1, (U32)(r2>>32), (U32)r2); +        exit(1); +    } +    nbTests++; +} + + +static void BMK_testSequence64(void* sentence, size_t len, U64 seed, U64 Nresult) +{ +    GF_XXH64_state_t state; +    U64 Dresult; +    size_t pos; + +    Dresult = GF_XXH64(sentence, len, seed); +    BMK_checkResult64(Dresult, Nresult); + +    GF_XXH64_reset(&state, seed); +    GF_XXH64_update(&state, sentence, len); +    Dresult = GF_XXH64_digest(&state); +    BMK_checkResult64(Dresult, Nresult); + +    GF_XXH64_reset(&state, seed); +    for (pos=0; pos<len; pos++) GF_XXH64_update(&state, ((char*)sentence)+pos, 1); +    Dresult = GF_XXH64_digest(&state); +    BMK_checkResult64(Dresult, Nresult); +} + + +static void BMK_testSequence(const void* sequence, size_t len, U32 seed, U32 Nresult) +{ +    GF_XXH32_state_t state; +    U32 Dresult; +    size_t pos; + +    Dresult = GF_XXH32(sequence, len, seed); +    BMK_checkResult(Dresult, Nresult); + +    GF_XXH32_reset(&state, seed); +    GF_XXH32_update(&state, sequence, len); +    Dresult = GF_XXH32_digest(&state); +    BMK_checkResult(Dresult, Nresult); + +    GF_XXH32_reset(&state, seed); +    for (pos=0; pos<len; pos++) GF_XXH32_update(&state, ((const char*)sequence)+pos, 1); +    Dresult = GF_XXH32_digest(&state); +    BMK_checkResult(Dresult, Nresult); +} + + +#define SANITY_BUFFER_SIZE 101 +static void BMK_sanityCheck(void) +{ +    static const U32 prime = 2654435761U; +    BYTE sanityBuffer[SANITY_BUFFER_SIZE]; +    U32 byteGen = prime; + +    int i; +    for (i=0; i<SANITY_BUFFER_SIZE; i++) { +        sanityBuffer[i] = (BYTE)(byteGen>>24); +        byteGen *= byteGen; +    } + +    BMK_testSequence(NULL,          0, 0,     0x02CC5D05); +    BMK_testSequence(NULL,          0, prime, 0x36B78AE7); +    BMK_testSequence(sanityBuffer,  1, 0,     0xB85CBEE5); +    BMK_testSequence(sanityBuffer,  1, prime, 0xD5845D64); +    BMK_testSequence(sanityBuffer, 14, 0,     0xE5AA0AB4); +    BMK_testSequence(sanityBuffer, 14, prime, 0x4481951D); +    BMK_testSequence(sanityBuffer, SANITY_BUFFER_SIZE, 0,     0x1F1AA412); +    BMK_testSequence(sanityBuffer, SANITY_BUFFER_SIZE, prime, 0x498EC8E2); + +    BMK_testSequence64(NULL        ,  0, 0,     0xEF46DB3751D8E999ULL); +    BMK_testSequence64(NULL        ,  0, prime, 0xAC75FDA2929B17EFULL); +    BMK_testSequence64(sanityBuffer,  1, 0,     0x4FCE394CC88952D8ULL); +    BMK_testSequence64(sanityBuffer,  1, prime, 0x739840CB819FA723ULL); +    BMK_testSequence64(sanityBuffer, 14, 0,     0xCFFA8DB881BC3A3DULL); +    BMK_testSequence64(sanityBuffer, 14, prime, 0x5B9611585EFCC9CBULL); +    BMK_testSequence64(sanityBuffer, SANITY_BUFFER_SIZE, 0,     0x0EAB543384F878ADULL); +    BMK_testSequence64(sanityBuffer, SANITY_BUFFER_SIZE, prime, 0xCAA65939306F1E21ULL); + +    DISPLAY("\r%79s\r", "");       /* Clean display line */ +    DISPLAYLEVEL(2, "Sanity check -- all tests ok\n"); +} + + +/* ******************************************************** +*  File Hashing +**********************************************************/ + +static void BMK_display_LittleEndian(const void* ptr, size_t length) +{ +    const BYTE* p = (const BYTE*)ptr; +    size_t idx; +    for (idx=length-1; idx<length; idx--)    /* intentional underflow to negative to detect end */ +        DISPLAYRESULT("%02x", p[idx]); +} + +static void BMK_display_BigEndian(const void* ptr, size_t length) +{ +    const BYTE* p = (const BYTE*)ptr; +    size_t idx; +    for (idx=0; idx<length; idx++) +        DISPLAYRESULT("%02x", p[idx]); +} + +static void BMK_hashStream(void* xxhHashValue, const algoType hashType, FILE* inFile, void* buffer, size_t blockSize) +{ +    GF_XXH64_state_t state64; +    GF_XXH32_state_t state32; +    size_t readSize; + +    /* Init */ +    GF_XXH32_reset(&state32, XXHSUM32_DEFAULT_SEED); +    GF_XXH64_reset(&state64, XXHSUM64_DEFAULT_SEED); + +    /* Load file & update hash */ +    readSize = 1; +    while (readSize) { +        readSize = fread(buffer, 1, blockSize, inFile); +        switch(hashType) +        { +        case algo_xxh32: +            GF_XXH32_update(&state32, buffer, readSize); +            break; +        case algo_xxh64: +            GF_XXH64_update(&state64, buffer, readSize); +            break; +        default: +            break; +        } +    } + +    switch(hashType) +    { +    case algo_xxh32: +        {   U32 const h32 = GF_XXH32_digest(&state32); +            memcpy(xxhHashValue, &h32, sizeof(h32)); +            break; +        } +    case algo_xxh64: +        {   U64 const h64 = GF_XXH64_digest(&state64); +            memcpy(xxhHashValue, &h64, sizeof(h64)); +            break; +        } +    default: +            break; +    } +} + + +typedef enum { big_endian, little_endian} endianess; + +static int BMK_hash(const char* fileName, +                    const algoType hashType, +                    const endianess displayEndianess) +{ +    FILE*  inFile; +    size_t const blockSize = 64 KB; +    void*  buffer; +    U32    h32 = 0; +    U64    h64 = 0; + +    /* Check file existence */ +    if (fileName == stdinName) { +        inFile = stdin; +        SET_BINARY_MODE(stdin); +    } +    else +        inFile = fopen( fileName, "rb" ); +    if (inFile==NULL) { +        DISPLAY( "Pb opening %s\n", fileName); +        return 1; +    } + +    /* Memory allocation & restrictions */ +    buffer = malloc(blockSize); +    if(!buffer) { +        DISPLAY("\nError: not enough memory!\n"); +        fclose(inFile); +        return 1; +    } + +    /* loading notification */ +    {   const size_t fileNameSize = strlen(fileName); +        const char* const fileNameEnd = fileName + fileNameSize; +        const size_t maxInfoFilenameSize = fileNameSize > 30 ? 30 : fileNameSize; +        size_t infoFilenameSize = 1; +        while ( (infoFilenameSize < maxInfoFilenameSize) +              &&(fileNameEnd[-1-infoFilenameSize] != '/') +              &&(fileNameEnd[-1-infoFilenameSize] != '\\') ) +              infoFilenameSize++; +        DISPLAY("\rLoading %s...                        \r", fileNameEnd - infoFilenameSize); +    } + +    /* Load file & update hash */ +    switch(hashType) +    { +    case algo_xxh32: +        BMK_hashStream(&h32, algo_xxh32, inFile, buffer, blockSize); +        break; +    case algo_xxh64: +        BMK_hashStream(&h64, algo_xxh64, inFile, buffer, blockSize); +        break; +    default: +        break; +    } + +    fclose(inFile); +    free(buffer); + +    /* display Hash */ +    switch(hashType) +    { +    case algo_xxh32: +        {   GF_XXH32_canonical_t hcbe32; +            GF_XXH32_canonicalFromHash(&hcbe32, h32); +            displayEndianess==big_endian ? +                BMK_display_BigEndian(&hcbe32, sizeof(hcbe32)) : BMK_display_LittleEndian(&hcbe32, sizeof(hcbe32)); +            DISPLAYRESULT("  %s\n", fileName); +            break; +        } +    case algo_xxh64: +        {   GF_XXH64_canonical_t hcbe64; +            GF_XXH64_canonicalFromHash(&hcbe64, h64); +            displayEndianess==big_endian ? +                BMK_display_BigEndian(&hcbe64, sizeof(hcbe64)) : BMK_display_LittleEndian(&hcbe64, sizeof(hcbe64)); +            DISPLAYRESULT("  %s\n", fileName); +            break; +        } +    default: +            break; +    } + +    return 0; +} + + +static int BMK_hashFiles(const char** fnList, int fnTotal, +                         algoType hashType, endianess displayEndianess) +{ +    int fnNb; +    int result = 0; + +    if (fnTotal==0) +        return BMK_hash(stdinName, hashType, displayEndianess); + +    for (fnNb=0; fnNb<fnTotal; fnNb++) +        result += BMK_hash(fnList[fnNb], hashType, displayEndianess); +    DISPLAY("\r%70s\r", ""); +    return result; +} + + +typedef enum { +    GetLine_ok, +    GetLine_eof, +    GetLine_exceedMaxLineLength, +    GetLine_outOfMemory, +} GetLineResult; + +typedef enum { +    CanonicalFromString_ok, +    CanonicalFromString_invalidFormat, +} CanonicalFromStringResult; + +typedef enum { +    ParseLine_ok, +    ParseLine_invalidFormat, +} ParseLineResult; + +typedef enum { +    LineStatus_hashOk, +    LineStatus_hashFailed, +    LineStatus_failedToOpen, +} LineStatus; + +typedef union { +    GF_XXH32_canonical_t xxh32; +    GF_XXH64_canonical_t xxh64; +} Canonical; + +typedef struct { +    Canonical   canonical; +    const char* filename; +    int         xxhBits;    /* canonical type : 32:xxh32, 64:xxh64 */ +} ParsedLine; + +typedef struct { +    unsigned long   nProperlyFormattedLines; +    unsigned long   nImproperlyFormattedLines; +    unsigned long   nMismatchedChecksums; +    unsigned long   nOpenOrReadFailures; +    unsigned long   nMixedFormatLines; +    int             xxhBits; +    int             quit; +} ParseFileReport; + +typedef struct { +    const char*     inFileName; +    FILE*           inFile; +    int             lineMax; +    char*           lineBuf; +    size_t          blockSize; +    char*           blockBuf; +    int             strictMode; +    int             statusOnly; +    int             warn; +    int             quiet; +    ParseFileReport report; +} ParseFileArg; + + +/*  Read line from stream. +    Returns GetLine_ok, if it reads line successfully. +    Returns GetLine_eof, if stream reaches EOF. +    Returns GetLine_exceedMaxLineLength, if line length is longer than MAX_LINE_LENGTH. +    Returns GetLine_outOfMemory, if line buffer memory allocation failed. + */ +static GetLineResult getLine(char** lineBuf, int* lineMax, FILE* inFile) +{ +    GetLineResult result = GetLine_ok; +    int len = 0; + +    if (*lineBuf == NULL || *lineMax < 1) { +        *lineMax = DEFAULT_LINE_LENGTH; +        *lineBuf = (char*) realloc(*lineBuf, *lineMax); +        if(*lineBuf == NULL) return GetLine_outOfMemory; +    } + +    for (;;) { +        const int c = fgetc(inFile); +        if (c == EOF) { +            /* If we meet EOF before first character, returns GetLine_eof, +             * otherwise GetLine_ok. +             */ +            if (len == 0) result = GetLine_eof; +            break; +        } + +        /* Make enough space for len+1 (for final NUL) bytes. */ +        if (len+1 >= *lineMax) { +            char* newLineBuf = NULL; +            int newBufSize = *lineMax; + +            newBufSize += (newBufSize/2) + 1; /* x 1.5 */ +            if (newBufSize > MAX_LINE_LENGTH) newBufSize = MAX_LINE_LENGTH; +            if (len+1 >= newBufSize) return GetLine_exceedMaxLineLength; + +            newLineBuf = (char*) realloc(*lineBuf, newBufSize); +            if (newLineBuf == NULL) return GetLine_outOfMemory; + +            *lineBuf = newLineBuf; +            *lineMax = newBufSize; +        } + +        if (c == '\n') break; +        (*lineBuf)[len++] = (char) c; +    } + +    (*lineBuf)[len] = '\0'; +    return result; +} + + +/*  Converts one hexadecimal character to integer. + *  Returns -1, if given character is not hexadecimal. + */ +static int charToHex(char c) +{ +    int result = -1; +    if (c >= '0' && c <= '9') { +        result = (int) (c - '0'); +    } else if (c >= 'A' && c <= 'F') { +        result = (int) (c - 'A') + 0x0a; +    } else if (c >= 'a' && c <= 'f') { +        result = (int) (c - 'a') + 0x0a; +    } +    return result; +} + + +/*  Converts XXH32 canonical hexadecimal string hashStr to big endian unsigned char array dst. + *  Returns CANONICAL_FROM_STRING_INVALID_FORMAT, if hashStr is not well formatted. + *  Returns CANONICAL_FROM_STRING_OK, if hashStr is parsed successfully. + */ +static CanonicalFromStringResult canonicalFromString(unsigned char* dst, +                                                     size_t dstSize, +                                                     const char* hashStr) +{ +    size_t i; +    for (i = 0; i < dstSize; ++i) { +        int h0, h1; + +        h0 = charToHex(hashStr[i*2 + 0]); +        if (h0 < 0) return CanonicalFromString_invalidFormat; + +        h1 = charToHex(hashStr[i*2 + 1]); +        if (h1 < 0) return CanonicalFromString_invalidFormat; + +        dst[i] = (unsigned char) ((h0 << 4) | h1); +    } +    return CanonicalFromString_ok; +} + + +/*  Parse single line of xxHash checksum file. + *  Returns PARSE_LINE_ERROR_INVALID_FORMAT, if line is not well formatted. + *  Returns PARSE_LINE_OK if line is parsed successfully. + *  And members of parseLine will be filled by parsed values. + * + *  - line must be ended with '\0'. + *  - Since parsedLine.filename will point within given argument `line`, + *    users must keep `line`s content during they are using parsedLine. + * + *  Given xxHash checksum line should have the following format: + * + *      <8 or 16 hexadecimal char> <space> <space> <filename...> <'\0'> + */ +static ParseLineResult parseLine(ParsedLine* parsedLine, const char* line) +{ +    const char* const firstSpace = strchr(line, ' '); +    const char* const secondSpace = firstSpace + 1; + +    parsedLine->filename = NULL; +    parsedLine->xxhBits = 0; + +    if (firstSpace == NULL || *secondSpace != ' ') return ParseLine_invalidFormat; + +    switch (firstSpace - line) +    { +    case 8: +        {   GF_XXH32_canonical_t* xxh32c = &parsedLine->canonical.xxh32; +            if (canonicalFromString(xxh32c->digest, sizeof(xxh32c->digest), line) +                != CanonicalFromString_ok) { +                return ParseLine_invalidFormat; +            } +            parsedLine->xxhBits = 32; +            break; +        } + +    case 16: +        {   GF_XXH64_canonical_t* xxh64c = &parsedLine->canonical.xxh64; +            if (canonicalFromString(xxh64c->digest, sizeof(xxh64c->digest), line) +                != CanonicalFromString_ok) { +                return ParseLine_invalidFormat; +            } +            parsedLine->xxhBits = 64; +            break; +        } + +    default: +            return ParseLine_invalidFormat; +            break; +    } + +    parsedLine->filename = secondSpace + 1; +    return ParseLine_ok; +} + + +/*!  Parse xxHash checksum file. + */ +static void parseFile1(ParseFileArg* parseFileArg) +{ +    const char* const inFileName = parseFileArg->inFileName; +    ParseFileReport* const report = &parseFileArg->report; + +    unsigned long lineNumber = 0; +    memset(report, 0, sizeof(*report)); + +    while (!report->quit) { +        FILE* fp = NULL; +        LineStatus lineStatus = LineStatus_hashFailed; +        GetLineResult getLineResult; +        ParsedLine parsedLine; +        memset(&parsedLine, 0, sizeof(parsedLine)); + +        lineNumber++; +        if (lineNumber == 0) { +            /* This is unlikely happen, but md5sum.c has this +             * error check. */ +            DISPLAY("%s : too many checksum lines\n", inFileName); +            report->quit = 1; +            break; +        } + +        getLineResult = getLine(&parseFileArg->lineBuf, &parseFileArg->lineMax, +                                parseFileArg->inFile); +        if (getLineResult != GetLine_ok) { +            if (getLineResult == GetLine_eof) break; + +            switch (getLineResult) +            { +            case GetLine_ok: +            case GetLine_eof: +                /* These cases never happen.  See above getLineResult related "if"s. +                   They exist just for make gcc's -Wswitch-enum happy. */ +                break; + +            default: +                DISPLAY("%s : %lu: unknown error\n", inFileName, lineNumber); +                break; + +            case GetLine_exceedMaxLineLength: +                DISPLAY("%s : %lu: too long line\n", inFileName, lineNumber); +                break; + +            case GetLine_outOfMemory: +                DISPLAY("%s : %lu: out of memory\n", inFileName, lineNumber); +                break; +            } +            report->quit = 1; +            break; +        } + +        if (parseLine(&parsedLine, parseFileArg->lineBuf) != ParseLine_ok) { +            report->nImproperlyFormattedLines++; +            if (parseFileArg->warn) { +                DISPLAY("%s : %lu: improperly formatted XXHASH checksum line\n" +                    , inFileName, lineNumber); +            } +            continue; +        } + +        if (report->xxhBits != 0 && report->xxhBits != parsedLine.xxhBits) { +            /* Don't accept xxh32/xxh64 mixed file */ +            report->nImproperlyFormattedLines++; +            report->nMixedFormatLines++; +            if (parseFileArg->warn) { +                DISPLAY("%s : %lu: improperly formatted XXHASH checksum line (XXH32/64)\n" +                    , inFileName, lineNumber); +            } +            continue; +        } + +        report->nProperlyFormattedLines++; +        if (report->xxhBits == 0) { +            report->xxhBits = parsedLine.xxhBits; +        } + +        fp = fopen(parsedLine.filename, "rb"); +        if (fp == NULL) { +            lineStatus = LineStatus_failedToOpen; +        } else { +            lineStatus = LineStatus_hashFailed; +            switch (parsedLine.xxhBits) +            { +            case 32: +                {   GF_XXH32_hash_t xxh; +                    BMK_hashStream(&xxh, algo_xxh32, fp, parseFileArg->blockBuf, parseFileArg->blockSize); +                    if (xxh == GF_XXH32_hashFromCanonical(&parsedLine.canonical.xxh32)) { +                        lineStatus = LineStatus_hashOk; +                }   } +                break; + +            case 64: +                {   GF_XXH64_hash_t xxh; +                    BMK_hashStream(&xxh, algo_xxh64, fp, parseFileArg->blockBuf, parseFileArg->blockSize); +                    if (xxh == GF_XXH64_hashFromCanonical(&parsedLine.canonical.xxh64)) { +                        lineStatus = LineStatus_hashOk; +                }   } +                break; + +            default: +                break; +            } +            fclose(fp); +        } + +        switch (lineStatus) +        { +        default: +            DISPLAY("%s : unknown error\n", inFileName); +            report->quit = 1; +            break; + +        case LineStatus_failedToOpen: +            report->nOpenOrReadFailures++; +            if (!parseFileArg->statusOnly) { +                DISPLAYRESULT("%s : %lu: FAILED open or read %s\n" +                    , inFileName, lineNumber, parsedLine.filename); +            } +            break; + +        case LineStatus_hashOk: +        case LineStatus_hashFailed: +            {   int b = 1; +                if (lineStatus == LineStatus_hashOk) { +                    /* If --quiet is specified, don't display "OK" */ +                    if (parseFileArg->quiet) b = 0; +                } else { +                    report->nMismatchedChecksums++; +                } + +                if (b && !parseFileArg->statusOnly) { +                    DISPLAYRESULT("%s: %s\n", parsedLine.filename +                        , lineStatus == LineStatus_hashOk ? "OK" : "FAILED"); +            }   } +            break; +        } +    }   /* while (!report->quit) */ +} + + +/*  Parse xxHash checksum file. + *  Returns 1, if all procedures were succeeded. + *  Returns 0, if any procedures was failed. + * + *  If strictMode != 0, return error code if any line is invalid. + *  If statusOnly != 0, don't generate any output. + *  If warn != 0, print a warning message to stderr. + *  If quiet != 0, suppress "OK" line. + * + *  "All procedures are succeeded" means: + *    - Checksum file contains at least one line and less than SIZE_T_MAX lines. + *    - All files are properly opened and read. + *    - All hash values match with its content. + *    - (strict mode) All lines in checksum file are consistent and well formatted. + * + */ +static int checkFile(const char* inFileName, +                     const endianess displayEndianess, +                     U32 strictMode, +                     U32 statusOnly, +                     U32 warn, +                     U32 quiet) +{ +    int result = 0; +    FILE* inFile = NULL; +    ParseFileArg parseFileArgBody; +    ParseFileArg* const parseFileArg = &parseFileArgBody; +    ParseFileReport* const report = &parseFileArg->report; + +    if (displayEndianess != big_endian) { +        /* Don't accept little endian */ +        DISPLAY( "Check file mode doesn't support little endian\n" ); +        return 0; +    } + +    /* note : stdinName is special constant pointer.  It is not a string. */ +    if (inFileName == stdinName) { +        /* note : Since we expect text input for xxhash -c mode, +         * Don't set binary mode for stdin */ +        inFile = stdin; +    } else { +        inFile = fopen( inFileName, "rt" ); +    } + +    if (inFile == NULL) { +        DISPLAY( "Pb opening %s\n", inFileName); +        return 0; +    } + +    parseFileArg->inFileName    = inFileName; +    parseFileArg->inFile        = inFile; +    parseFileArg->lineMax       = DEFAULT_LINE_LENGTH; +    parseFileArg->lineBuf       = (char*) malloc((size_t) parseFileArg->lineMax); +    parseFileArg->blockSize     = 64 * 1024; +    parseFileArg->blockBuf      = (char*) malloc(parseFileArg->blockSize); +    parseFileArg->strictMode    = strictMode; +    parseFileArg->statusOnly    = statusOnly; +    parseFileArg->warn          = warn; +    parseFileArg->quiet         = quiet; + +    parseFile1(parseFileArg); + +    free(parseFileArg->blockBuf); +    free(parseFileArg->lineBuf); + +    if (inFile != stdin) fclose(inFile); + +    /* Show error/warning messages.  All messages are copied from md5sum.c +     */ +    if (report->nProperlyFormattedLines == 0) { +        DISPLAY("%s: no properly formatted XXHASH checksum lines found\n", inFileName); +    } else if (!statusOnly) { +        if (report->nImproperlyFormattedLines) { +            DISPLAYRESULT("%lu lines are improperly formatted\n" +                , report->nImproperlyFormattedLines); +        } +        if (report->nOpenOrReadFailures) { +            DISPLAYRESULT("%lu listed files could not be read\n" +                , report->nOpenOrReadFailures); +        } +        if (report->nMismatchedChecksums) { +            DISPLAYRESULT("%lu computed checksums did NOT match\n" +                , report->nMismatchedChecksums); +    }   } + +    /* Result (exit) code logic is copied from +     * gnu coreutils/src/md5sum.c digest_check() */ +    result =   report->nProperlyFormattedLines != 0 +            && report->nMismatchedChecksums == 0 +            && report->nOpenOrReadFailures == 0 +            && (!strictMode || report->nImproperlyFormattedLines == 0) +            && report->quit == 0; +    return result; +} + + +static int checkFiles(const char** fnList, int fnTotal, +                      const endianess displayEndianess, +                      U32 strictMode, +                      U32 statusOnly, +                      U32 warn, +                      U32 quiet) +{ +    int ok = 1; + +    /* Special case for stdinName "-", +     * note: stdinName is not a string.  It's special pointer. */ +    if (fnTotal==0) { +        ok &= checkFile(stdinName, displayEndianess, strictMode, statusOnly, warn, quiet); +    } else { +        int fnNb; +        for (fnNb=0; fnNb<fnTotal; fnNb++) +            ok &= checkFile(fnList[fnNb], displayEndianess, strictMode, statusOnly, warn, quiet); +    } +    return ok ? 0 : 1; +} + + +/* ******************************************************** +*  Main +**********************************************************/ + +static int usage(const char* exename) +{ +    DISPLAY( WELCOME_MESSAGE(exename) ); +    DISPLAY( "Usage :\n"); +    DISPLAY( "      %s [arg] [filenames]\n", exename); +    DISPLAY( "When no filename provided, or - provided : use stdin as input\n"); +    DISPLAY( "Arguments :\n"); +    DISPLAY( " -H# : hash selection : 0=32bits, 1=64bits (default: %i)\n", (int)g_defaultAlgo); +    DISPLAY( " -c  : read xxHash sums from the [filenames] and check them\n"); +    DISPLAY( " -h  : help \n"); +    return 0; +} + + +static int usage_advanced(const char* exename) +{ +    usage(exename); +    DISPLAY( "Advanced :\n"); +    DISPLAY( " --little-endian : hash printed using little endian convention (default: big endian)\n"); +    DISPLAY( " -V, --version   : display version\n"); +    DISPLAY( " -h, --help      : display long help and exit\n"); +    DISPLAY( " -b  : benchmark mode \n"); +    DISPLAY( " -i# : number of iterations (benchmark mode; default %i)\n", g_nbIterations); +    DISPLAY( "\n"); +    DISPLAY( "The following four options are useful only when verifying checksums (-c):\n"); +    DISPLAY( "--strict : don't print OK for each successfully verified file\n"); +    DISPLAY( "--status : don't output anything, status code shows success\n"); +    DISPLAY( "--quiet  : exit non-zero for improperly formatted checksum lines\n"); +    DISPLAY( "--warn   : warn about improperly formatted checksum lines\n"); +    return 0; +} + +static int badusage(const char* exename) +{ +    DISPLAY("Wrong parameters\n"); +    usage(exename); +    return 1; +} + + +int main(int argc, const char** argv) +{ +    int i, filenamesStart=0; +    const char* const exename = argv[0]; +    U32 benchmarkMode = 0; +    U32 fileCheckMode = 0; +    U32 strictMode    = 0; +    U32 statusOnly    = 0; +    U32 warn          = 0; +    U32 quiet         = 0; +    algoType algo = g_defaultAlgo; +    endianess displayEndianess = big_endian; + +    /* special case : xxh32sum default to 32 bits checksum */ +    if (strstr(exename, "xxh32sum") != NULL) algo = algo_xxh32; + +    for(i=1; i<argc; i++) { +        const char* argument = argv[i]; + +        if(!argument) continue;   /* Protection, if argument empty */ + +        if (!strcmp(argument, "--little-endian")) { displayEndianess = little_endian; continue; } +        if (!strcmp(argument, "--check")) { fileCheckMode = 1; continue; } +        if (!strcmp(argument, "--strict")) { strictMode = 1; continue; } +        if (!strcmp(argument, "--status")) { statusOnly = 1; continue; } +        if (!strcmp(argument, "--quiet")) { quiet = 1; continue; } +        if (!strcmp(argument, "--warn")) { warn = 1; continue; } +        if (!strcmp(argument, "--help")) { return usage_advanced(exename); } +        if (!strcmp(argument, "--version")) { DISPLAY(WELCOME_MESSAGE(exename)); return 0; } + +        if (*argument!='-') { +            if (filenamesStart==0) filenamesStart=i;   /* only supports a continuous list of filenames */ +            continue; +        } + +        /* command selection */ +        argument++;   /* note : *argument=='-' */ + +        while (*argument!=0) { +            switch(*argument) +            { +            /* Display version */ +            case 'V': +                DISPLAY(WELCOME_MESSAGE(exename)); return 0; + +            /* Display help on usage */ +            case 'h': +                return usage_advanced(exename); + +            /* select hash algorithm */ +            case 'H': +                algo = (algoType)(argument[1] - '0'); +                argument+=2; +                break; + +            /* File check mode */ +            case 'c': +                fileCheckMode=1; +                argument++; +                break; + +            /* Warning mode (file check mode only, alias of "--warning") */ +            case 'w': +                warn=1; +                argument++; +                break; + +            /* Trigger benchmark mode */ +            case 'b': +                argument++; +                benchmarkMode=1; +                break; + +            /* Modify Nb Iterations (benchmark only) */ +            case 'i': +                g_nbIterations = argument[1] - '0'; +                argument+=2; +                break; + +            /* Modify Block size (benchmark only) */ +            case 'B': +                argument++; +                g_sampleSize = 0; +                while (argument[0]>='0' && argument[0]<='9') +                    g_sampleSize *= 10, g_sampleSize += argument[0]-'0', argument++; +                break; + +            default: +                return badusage(exename); +            } +        } +    }   /* for(i=1; i<argc; i++) */ + +    /* Check benchmark mode */ +    if (benchmarkMode) { +        DISPLAY( WELCOME_MESSAGE(exename) ); +        BMK_sanityCheck(); +        if (filenamesStart==0) return BMK_benchInternal(); +        return BMK_benchFiles(argv+filenamesStart, argc-filenamesStart); +    } + +    /* Check if input is defined as console; trigger an error in this case */ +    if ( (filenamesStart==0) && IS_CONSOLE(stdin) ) return badusage(exename); + +    if (filenamesStart==0) filenamesStart = argc; +    if (fileCheckMode) { +        return checkFiles(argv+filenamesStart, argc-filenamesStart, displayEndianess, strictMode, statusOnly, warn, quiet); +    } else { +        return BMK_hashFiles(argv+filenamesStart, argc-filenamesStart, algo, displayEndianess); +    } +} + +#endif /* XXHASH_C_2097394837 */ diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am index 52b73166ddb..768fe042a10 100644 --- a/libglusterfs/src/Makefile.am +++ b/libglusterfs/src/Makefile.am @@ -9,7 +9,8 @@ libglusterfs_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \  	-I$(top_srcdir)/rpc/xdr/src/ -I$(top_builddir)/rpc/xdr/src/ \  	-I$(top_srcdir)/rpc/rpc-lib/src/ -I$(CONTRIBDIR)/rbtree \  	-I$(CONTRIBDIR)/libexecinfo ${ARGP_STANDALONE_CPPFLAGS} \ -	-DSBIN_DIR=\"$(sbindir)\" -I$(CONTRIBDIR)/timer-wheel +	-DSBIN_DIR=\"$(sbindir)\" -I$(CONTRIBDIR)/timer-wheel \ +	-I$(CONTRIBDIR)/xxhash  libglusterfs_la_LIBADD = @LEXLIB@ $(ZLIB_LIBS) $(MATH_LIB) $(UUID_LIBS)  libglusterfs_la_LDFLAGS = -version-info $(LIBGLUSTERFS_LT_VERSION) @@ -33,6 +34,7 @@ libglusterfs_la_SOURCES = dict.c xlator.c logging.c \  	$(CONTRIBDIR)/libexecinfo/execinfo.c quota-common-utils.c rot-buffs.c \  	$(CONTRIBDIR)/timer-wheel/timer-wheel.c \  	$(CONTRIBDIR)/timer-wheel/find_last_bit.c default-args.c locking.c \ +	$(CONTRIBDIR)/xxhash/xxhash.c \  	compound-fop-utils.c throttle-tbf.c  nodist_libglusterfs_la_SOURCES = y.tab.c graph.lex.c defaults.c @@ -62,6 +64,7 @@ noinst_HEADERS = unittest/unittest.h \  	$(CONTRIBDIR)/mount/mntent_compat.h \  	$(CONTRIBDIR)/libexecinfo/execinfo_compat.h \  	$(CONTRIBDIR)/timer-wheel/timer-wheel.h \ +	$(CONTRIBDIR)/xxhash/xxhash.h \  	tier-ctr-interface.h  if !HAVE_LIBUUID diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c index 65557fefba2..5015d9666b6 100644 --- a/libglusterfs/src/common-utils.c +++ b/libglusterfs/src/common-utils.c @@ -49,6 +49,7 @@  #include "lkowner.h"  #include "syscall.h"  #include "cli1-xdr.h" +#include "xxhash.h"  #include <ifaddrs.h>  #include "libglusterfs-messages.h" @@ -82,6 +83,23 @@ md5_wrapper(const unsigned char *data, size_t len, char *md5)                  snprintf(md5 + i * 2, lim-i*2, "%02x", scratch[i]);  } +void +gf_xxh64_wrapper(const unsigned char *data, size_t len, unsigned long long seed, +                 char *xxh64) +{ +        unsigned short         i      = 0; +        unsigned short         lim    = GF_XXH64_DIGEST_LENGTH*2+1; +        GF_XXH64_hash_t        hash   = 0; +        GF_XXH64_canonical_t   c_hash = {{0,},}; +        const uint8_t         *p      = (const uint8_t *) &c_hash; + +        hash = GF_XXH64(data, len, seed); +        GF_XXH64_canonicalFromHash(&c_hash, hash); + +        for (i = 0; i < GF_XXH64_DIGEST_LENGTH; i++) +                snprintf(xxh64 + i * 2, lim-i*2, "%02x", p[i]); +} +  /* works similar to mkdir(1) -p.   */  int diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h index 6bd24179592..c3c5ec77350 100644 --- a/libglusterfs/src/common-utils.h +++ b/libglusterfs/src/common-utils.h @@ -113,6 +113,9 @@ void trap (void);  /* Default value of signing waiting time to sign a file for bitrot */  #define SIGNING_TIMEOUT "120" +/* xxhash */ +#define GF_XXH64_DIGEST_LENGTH 8 +  /* Shard */  #define GF_XATTR_SHARD_FILE_SIZE  "trusted.glusterfs.shard.file-size"  #define SHARD_ROOT_GFID "be318638-e8a0-4c6d-977d-7a937aa84806" @@ -823,6 +826,8 @@ int gf_get_hostname_from_ip (char *client_ip, char **hostname);  gf_boolean_t gf_is_local_addr (char *hostname);  gf_boolean_t gf_is_same_address (char *host1, char *host2);  void md5_wrapper(const unsigned char *data, size_t len, char *md5); +void gf_xxh64_wrapper(const unsigned char *data, size_t len, +                      unsigned long long seed, char *xxh64);  int gf_set_timestamp  (const char *src, const char* dest);  int gf_thread_create (pthread_t *thread, const pthread_attr_t *attr,  | 
