Correct hash behavior for floating point numbers

This fixes HashMap where a key or part of a key is a floating point
number. To fix this the following has been done:

* HashMap now takes an extra template argument Comparator. This class
gets used to compare keys. The default Comperator now works correctly
for common types and floating point numbets.

* Variant implements ::hash_compare() now. This function implements
nan-safe comparison for all types with components that contain floating
point numbers.

* Variant now has a VariantComparator which uses Variant::hash_compare()
safely compare floating point components of variant's types.

* The hash functions for floating point numbers will now normalize NaN
values so that all floating point numbers that are NaN hash to the same
value.

C++ module writers that want to use HashMap internally in their modules
can now also safeguard against this crash by defining their on
Comperator class that safely compares their types.

GDScript users, or writers of modules that don't use HashMap internally
in their modules don't need to do anything.

This fixes #7354 and fixes #6947.
This commit is contained in:
Hein-Pieter van Braam 2017-02-15 14:41:16 +01:00
parent 903a3aa5f0
commit b696beea65
14 changed files with 273 additions and 51 deletions

View file

@ -295,7 +295,7 @@ uint64_t ClassDB::get_api_hash(APIType p_api) {
OBJTYPE_RLOCK;
#ifdef DEBUG_METHODS_ENABLED
uint64_t hash = hash_djb2_one_64(HashMapHahserDefault::hash(VERSION_FULL_NAME));
uint64_t hash = hash_djb2_one_64(HashMapHasherDefault::hash(VERSION_FULL_NAME));
List<StringName> names;

View file

@ -30,30 +30,20 @@
#define HASH_MAP_H
#include "hashfuncs.h"
#include "math_funcs.h"
#include "error_macros.h"
#include "ustring.h"
#include "os/memory.h"
#include "list.h"
class HashMapHahserDefault {
public:
struct HashMapHasherDefault {
static _FORCE_INLINE_ uint32_t hash(const String &p_string) { return p_string.hash(); }
static _FORCE_INLINE_ uint32_t hash(const char *p_cstr) { return hash_djb2(p_cstr); }
static _FORCE_INLINE_ uint32_t hash(const uint64_t p_int) {
uint64_t v=p_int;
v = (~v) + (v << 18); // v = (v << 18) - v - 1;
v = v ^ (v >> 31);
v = v * 21; // v = (v + (v << 2)) + (v << 4);
v = v ^ (v >> 11);
v = v + (v << 6);
v = v ^ (v >> 22);
return (int) v;
}
static _FORCE_INLINE_ uint32_t hash(const uint64_t p_int) { return hash_one_uint64(p_int); }
static _FORCE_INLINE_ uint32_t hash(const int64_t p_int) { return hash(uint64_t(p_int)); }
static _FORCE_INLINE_ uint32_t hash(const float p_float) { return hash_djb2_one_float(p_float); }
static _FORCE_INLINE_ uint32_t hash(const double p_double){ return hash_djb2_one_float(p_double); }
static _FORCE_INLINE_ uint32_t hash(const uint32_t p_int) { return p_int; }
static _FORCE_INLINE_ uint32_t hash(const int32_t p_int) { return (uint32_t)p_int; }
static _FORCE_INLINE_ uint32_t hash(const uint16_t p_int) { return p_int; }
@ -64,6 +54,21 @@ public:
//static _FORCE_INLINE_ uint32_t hash(const void* p_ptr) { return uint32_t(uint64_t(p_ptr))*(0x9e3779b1L); }
};
template <typename T>
struct HashMapComparatorDefault {
static bool compare(const T& p_lhs, const T& p_rhs) {
return p_lhs == p_rhs;
}
bool compare(const float& p_lhs, const float& p_rhs) {
return (p_lhs == p_rhs) || (Math::is_nan(p_lhs) && Math::is_nan(p_rhs));
}
bool compare(const double& p_lhs, const double& p_rhs) {
return (p_lhs == p_rhs) || (Math::is_nan(p_lhs) && Math::is_nan(p_rhs));
}
};
/**
* @class HashMap
* @author Juan Linietsky <reduzio@gmail.com>
@ -74,13 +79,14 @@ public:
* @param TKey Key, search is based on it, needs to be hasheable. It is unique in this container.
* @param TData Data, data associated with the key
* @param Hasher Hasher object, needs to provide a valid static hash function for TKey
* @param Comparator comparator object, needs to be able to safely compare two TKey values. It needs to ensure that x == x for any items inserted in the map. Bear in mind that nan != nan when implementing an equality check.
* @param MIN_HASH_TABLE_POWER Miminum size of the hash table, as a power of two. You rarely need to change this parameter.
* @param RELATIONSHIP Relationship at which the hash table is resized. if amount of elements is RELATIONSHIP
* times bigger than the hash table, table is resized to solve this condition. if RELATIONSHIP is zero, table is always MIN_HASH_TABLE_POWER.
*
*/
template<class TKey, class TData, class Hasher=HashMapHahserDefault,uint8_t MIN_HASH_TABLE_POWER=3,uint8_t RELATIONSHIP=8>
template<class TKey, class TData, class Hasher=HashMapHasherDefault, class Comparator=HashMapComparatorDefault<TKey>, uint8_t MIN_HASH_TABLE_POWER=3,uint8_t RELATIONSHIP=8>
class HashMap {
public:
@ -194,7 +200,6 @@ private:
}
/* I want to have only one function.. */
_FORCE_INLINE_ const Entry * get_entry( const TKey& p_key ) const {
@ -206,7 +211,7 @@ private:
while (e) {
/* checking hash first avoids comparing key, which may take longer */
if (e->hash == hash && e->pair.key == p_key ) {
if (e->hash == hash && Comparator::compare(e->pair.key,p_key) ) {
/* the pair exists in this hashtable, so just update data */
return e;
@ -253,7 +258,6 @@ private:
for (int i=0;i<( 1<<p_t.hash_table_power );i++) {
hash_table[i]=NULL;
/* elements will be in the reverse order, but it doesn't matter */
const Entry *e = p_t.hash_table[i];
@ -385,7 +389,7 @@ public:
while (e) {
/* checking hash first avoids comparing key, which may take longer */
if (e->hash == hash && e->pair.key == p_custom_key ) {
if (e->hash == hash && Comparator::compare(e->pair.key,p_custom_key) ) {
/* the pair exists in this hashtable, so just update data */
return &e->pair.data;
@ -411,7 +415,7 @@ public:
while (e) {
/* checking hash first avoids comparing key, which may take longer */
if (e->hash == hash && e->pair.key == p_custom_key ) {
if (e->hash == hash && Comparator::compare(e->pair.key,p_custom_key) ) {
/* the pair exists in this hashtable, so just update data */
return &e->pair.data;
@ -442,7 +446,7 @@ public:
while (e) {
/* checking hash first avoids comparing key, which may take longer */
if (e->hash == hash && e->pair.key == p_key ) {
if (e->hash == hash && Comparator::compare(e->pair.key,p_key) ) {
if (p) {
@ -637,7 +641,8 @@ public:
clear();
}
};
#endif

View file

@ -29,7 +29,8 @@
#ifndef HASHFUNCS_H
#define HASHFUNCS_H
#include "math_funcs.h"
#include "math_defs.h"
#include "typedefs.h"
/**
@ -69,19 +70,52 @@ static inline uint32_t hash_djb2_one_32(uint32_t p_in,uint32_t p_prev=5381) {
return ((p_prev<<5)+p_prev)+p_in;
}
static inline uint32_t hash_one_uint64(const uint64_t p_int) {
uint64_t v=p_int;
v = (~v) + (v << 18); // v = (v << 18) - v - 1;
v = v ^ (v >> 31);
v = v * 21; // v = (v + (v << 2)) + (v << 4);
v = v ^ (v >> 11);
v = v + (v << 6);
v = v ^ (v >> 22);
return (int) v;
}
static inline uint32_t hash_djb2_one_float(float p_in,uint32_t p_prev=5381) {
union {
float f;
uint32_t i;
} u;
// handle -0 case
if (p_in==0.0f) u.f=0.0f;
else u.f=p_in;
// Normalize +/- 0.0 and NaN values so they hash the same.
if (p_in==0.0f)
u.f=0.0;
else if (Math::is_nan(p_in))
u.f=Math_NAN;
else
u.f=p_in;
return ((p_prev<<5)+p_prev)+u.i;
}
// Overload for real_t size changes
static inline uint32_t hash_djb2_one_float(double p_in,uint32_t p_prev=5381) {
union {
double d;
uint64_t i;
} u;
// Normalize +/- 0.0 and NaN values so they hash the same.
if (p_in==0.0f)
u.d=0.0;
else if (Math::is_nan(p_in))
u.d=Math_NAN;
else
u.d=p_in;
return ((p_prev<<5)+p_prev) + hash_one_uint64(u.i);
}
template<class T>
static inline uint32_t make_uint32_t(T p_in) {

View file

@ -39,6 +39,7 @@
#define Math_PI 3.14159265358979323846
#define Math_SQRT12 0.7071067811865475244008443621048490
#define Math_LN2 0.693147180559945309417
#define Math_NAN NAN
class Math {

View file

@ -680,7 +680,7 @@ class ObjectDB {
unsigned long i;
} u;
u.p=p_obj;
return HashMapHahserDefault::hash((uint64_t)u.i);
return HashMapHasherDefault::hash((uint64_t)u.i);
}
};

View file

@ -28,6 +28,7 @@
/*************************************************************************/
#include "variant.h"
#include "math_funcs.h"
#include "resource.h"
#include "print_string.h"
#include "scene/main/node.h"
@ -2674,14 +2675,10 @@ uint32_t Variant::hash() const {
case INT: {
return _data._int;
} break;
case REAL: {
MarshallFloat mf;
mf.f=_data._real;
return mf.i;
return hash_djb2_one_float(_data._real);
} break;
case STRING: {
@ -2921,6 +2918,186 @@ uint32_t Variant::hash() const {
}
#define hash_compare_scalar(p_lhs, p_rhs)\
((p_lhs) == (p_rhs)) || (Math::is_nan(p_lhs) == Math::is_nan(p_rhs))
#define hash_compare_vector2(p_lhs, p_rhs)\
(hash_compare_scalar((p_lhs).x, (p_rhs).x)) && \
(hash_compare_scalar((p_lhs).y, (p_rhs).y))
#define hash_compare_vector3(p_lhs, p_rhs)\
(hash_compare_scalar((p_lhs).x, (p_rhs).x)) && \
(hash_compare_scalar((p_lhs).y, (p_rhs).y)) && \
(hash_compare_scalar((p_lhs).z, (p_rhs).z))
#define hash_compare_quat(p_lhs, p_rhs)\
(hash_compare_scalar((p_lhs).x, (p_rhs).x)) && \
(hash_compare_scalar((p_lhs).y, (p_rhs).y)) && \
(hash_compare_scalar((p_lhs).z, (p_rhs).z)) && \
(hash_compare_scalar((p_lhs).w, (p_rhs).w))
#define hash_compare_color(p_lhs, p_rhs)\
(hash_compare_scalar((p_lhs).r, (p_rhs).r)) && \
(hash_compare_scalar((p_lhs).g, (p_rhs).g)) && \
(hash_compare_scalar((p_lhs).b, (p_rhs).b)) && \
(hash_compare_scalar((p_lhs).a, (p_rhs).a))
#define hash_compare_pool_array(p_lhs, p_rhs, p_type, p_compare_func)\
const PoolVector<p_type>& l = *reinterpret_cast<const PoolVector<p_type>*>(p_lhs);\
const PoolVector<p_type>& r = *reinterpret_cast<const PoolVector<p_type>*>(p_rhs);\
\
if(l.size() != r.size()) \
return false; \
\
PoolVector<p_type>::Read lr = l.read(); \
PoolVector<p_type>::Read rr = r.read(); \
\
for(int i = 0; i < l.size(); ++i) { \
if(! p_compare_func((lr[0]), (rr[0]))) \
return false; \
}\
\
return true
bool Variant::hash_compare(const Variant& p_variant) const {
if (type != p_variant.type)
return false;
switch( type ) {
case REAL: {
return hash_compare_scalar(_data._real, p_variant._data._real);
} break;
case VECTOR2: {
const Vector2* l = reinterpret_cast<const Vector2*>(_data._mem);
const Vector2* r = reinterpret_cast<const Vector2*>(p_variant._data._mem);
return hash_compare_vector2(*l, *r);
} break;
case RECT2: {
const Rect2* l = reinterpret_cast<const Rect2*>(_data._mem);
const Rect2* r = reinterpret_cast<const Rect2*>(p_variant._data._mem);
return (hash_compare_vector2(l->pos, r->pos)) &&
(hash_compare_vector2(l->size, r->size));
} break;
case TRANSFORM2D: {
Transform2D* l = _data._transform2d;
Transform2D* r = p_variant._data._transform2d;
for(int i=0;i<3;i++) {
if (! (hash_compare_vector2(l->elements[i], r->elements[i])))
return false;
}
return true;
} break;
case VECTOR3: {
const Vector3* l = reinterpret_cast<const Vector3*>(_data._mem);
const Vector3* r = reinterpret_cast<const Vector3*>(p_variant._data._mem);
return hash_compare_vector3(*l, *r);
} break;
case PLANE: {
const Plane* l = reinterpret_cast<const Plane*>(_data._mem);
const Plane* r = reinterpret_cast<const Plane*>(p_variant._data._mem);
return (hash_compare_vector3(l->normal, r->normal)) &&
(hash_compare_scalar(l->d, r->d));
} break;
case RECT3: {
const Rect3* l = _data._rect3;
const Rect3* r = p_variant._data._rect3;
return (hash_compare_vector3(l->pos, r->pos) &&
(hash_compare_vector3(l->size, r->size)));
} break;
case QUAT: {
const Quat* l = reinterpret_cast<const Quat*>(_data._mem);
const Quat* r = reinterpret_cast<const Quat*>(p_variant._data._mem);
return hash_compare_quat(*l, *r);
} break;
case BASIS: {
const Basis* l = _data._basis;
const Basis* r = p_variant._data._basis;
for(int i=0;i<3;i++) {
if (! (hash_compare_vector3(l->elements[i], r->elements[i])))
return false;
}
return true;
} break;
case TRANSFORM: {
const Transform* l = _data._transform;
const Transform* r = p_variant._data._transform;
for(int i=0;i<3;i++) {
if (! (hash_compare_vector3(l->basis.elements[i], r->basis.elements[i])))
return false;
}
return hash_compare_vector3(l->origin, r->origin);
} break;
case COLOR: {
const Color* l = reinterpret_cast<const Color*>(_data._mem);
const Color* r = reinterpret_cast<const Color*>(p_variant._data._mem);
return hash_compare_color(*l, *r);
} break;
case ARRAY: {
const Array& l = *(reinterpret_cast<const Array*>(_data._mem));
const Array& r = *(reinterpret_cast<const Array*>(p_variant._data._mem));
if(l.size() != r.size())
return false;
for(int i = 0; i < l.size(); ++i) {
if(! l[0].hash_compare(r[0]))
return false;
}
return true;
} break;
case POOL_REAL_ARRAY: {
hash_compare_pool_array(_data._mem, p_variant._data._mem, real_t, hash_compare_scalar);
} break;
case POOL_VECTOR2_ARRAY: {
hash_compare_pool_array(_data._mem, p_variant._data._mem, Vector2, hash_compare_vector2);
} break;
case POOL_VECTOR3_ARRAY: {
hash_compare_pool_array(_data._mem, p_variant._data._mem, Vector3, hash_compare_vector3);
} break;
case POOL_COLOR_ARRAY: {
hash_compare_pool_array(_data._mem, p_variant._data._mem, Color, hash_compare_color);
} break;
default:
bool v;
Variant r;
evaluate(OP_EQUAL,*this,p_variant,r,v);
return r;
}
return false;
}
bool Variant::is_ref() const {

View file

@ -421,6 +421,7 @@ public:
bool operator<(const Variant& p_variant) const;
uint32_t hash() const;
bool hash_compare(const Variant& p_variant) const;
bool booleanize(bool &valid) const;
void static_assign(const Variant& p_variant);
@ -459,6 +460,10 @@ struct VariantHasher {
static _FORCE_INLINE_ uint32_t hash(const Variant &p_variant) { return p_variant.hash(); }
};
struct VariantComparator {
static _FORCE_INLINE_ bool compare(const Variant &p_lhs, const Variant &p_rhs) { return p_lhs.hash_compare(p_rhs); }
};
Variant::ObjData& Variant::_get_obj() {

View file

@ -134,7 +134,7 @@ private:
struct VersionKeyHash {
static _FORCE_INLINE_ uint32_t hash( const VersionKey& p_key) { return HashMapHahserDefault::hash(p_key.key); };
static _FORCE_INLINE_ uint32_t hash( const VersionKey& p_key) { return HashMapHasherDefault::hash(p_key.key); };
};
//this should use a way more cachefriendly version..

View file

@ -149,7 +149,7 @@ private:
struct VersionKeyHash {
static _FORCE_INLINE_ uint32_t hash( const VersionKey& p_key) { return HashMapHahserDefault::hash(p_key.key); };
static _FORCE_INLINE_ uint32_t hash( const VersionKey& p_key) { return HashMapHasherDefault::hash(p_key.key); };
};
//this should use a way more cachefriendly version..

View file

@ -93,7 +93,7 @@ class GDCompiler {
//int get_identifier_pos(const StringName& p_dentifier) const;
HashMap<Variant,int,VariantHasher> constant_map;
HashMap<Variant,int,VariantHasher,VariantComparator> constant_map;
Map<StringName,int> name_map;
int get_name_map_pos(const StringName& p_identifier) {

View file

@ -1169,7 +1169,7 @@ Vector<uint8_t> GDTokenizerBuffer::parse_code_string(const String& p_code) {
Map<StringName,int> identifier_map;
HashMap<Variant,int,VariantHasher> constant_map;
HashMap<Variant,int,VariantHasher,VariantComparator> constant_map;
Map<uint32_t,int> line_map;
Vector<uint32_t> token_array;

View file

@ -361,7 +361,7 @@ static int _nm_get_string(const String& p_string, Map<StringName,int> &name_map)
return idx;
}
static int _vm_get_variant(const Variant& p_variant, HashMap<Variant,int,VariantHasher> &variant_map) {
static int _vm_get_variant(const Variant& p_variant, HashMap<Variant,int,VariantHasher,VariantComparator> &variant_map) {
if (variant_map.has(p_variant))
return variant_map[p_variant];
@ -371,7 +371,7 @@ static int _vm_get_variant(const Variant& p_variant, HashMap<Variant,int,Variant
return idx;
}
Error SceneState::_parse_node(Node *p_owner,Node *p_node,int p_parent_idx, Map<StringName,int> &name_map,HashMap<Variant,int,VariantHasher> &variant_map,Map<Node*,int> &node_map,Map<Node*,int> &nodepath_map) {
Error SceneState::_parse_node(Node *p_owner,Node *p_node,int p_parent_idx, Map<StringName,int> &name_map,HashMap<Variant,int,VariantHasher,VariantComparator> &variant_map,Map<Node*,int> &node_map,Map<Node*,int> &nodepath_map) {
// this function handles all the work related to properly packing scenes, be it
@ -747,7 +747,7 @@ Error SceneState::_parse_node(Node *p_owner,Node *p_node,int p_parent_idx, Map<S
}
Error SceneState::_parse_connections(Node *p_owner,Node *p_node, Map<StringName,int> &name_map,HashMap<Variant,int,VariantHasher> &variant_map,Map<Node*,int> &node_map,Map<Node*,int> &nodepath_map) {
Error SceneState::_parse_connections(Node *p_owner,Node *p_node, Map<StringName,int> &name_map,HashMap<Variant,int,VariantHasher,VariantComparator> &variant_map,Map<Node*,int> &node_map,Map<Node*,int> &nodepath_map) {
if (p_node!=p_owner && p_node->get_owner() && p_node->get_owner()!=p_owner && !p_owner->is_editable_instance(p_node->get_owner()))
return OK;
@ -952,7 +952,7 @@ Error SceneState::pack(Node *p_scene) {
Node *scene = p_scene;
Map<StringName,int> name_map;
HashMap<Variant,int,VariantHasher> variant_map;
HashMap<Variant,int,VariantHasher,VariantComparator> variant_map;
Map<Node*,int> node_map;
Map<Node*,int> nodepath_map;

View file

@ -91,8 +91,8 @@ class SceneState : public Reference {
Vector<ConnectionData> connections;
Error _parse_node(Node *p_owner,Node *p_node,int p_parent_idx, Map<StringName,int> &name_map,HashMap<Variant,int,VariantHasher> &variant_map,Map<Node*,int> &node_map,Map<Node*,int> &nodepath_map);
Error _parse_connections(Node *p_owner,Node *p_node, Map<StringName,int> &name_map,HashMap<Variant,int,VariantHasher> &variant_map,Map<Node*,int> &node_map,Map<Node*,int> &nodepath_map);
Error _parse_node(Node *p_owner,Node *p_node,int p_parent_idx, Map<StringName,int> &name_map,HashMap<Variant,int,VariantHasher,VariantComparator> &variant_map,Map<Node*,int> &node_map,Map<Node*,int> &nodepath_map);
Error _parse_connections(Node *p_owner,Node *p_node, Map<StringName,int> &name_map,HashMap<Variant,int,VariantHasher,VariantComparator> &variant_map,Map<Node*,int> &node_map,Map<Node*,int> &nodepath_map);
String path;

View file

@ -1334,7 +1334,7 @@ void BakedLightBaker::_make_octree_texture() {
base<<=16;
base|=int((pos.z+cell_size*0.5)/cell_size);
uint32_t hash = HashMapHahserDefault::hash(base);
uint32_t hash = HashMapHasherDefault::hash(base);
uint32_t idx = hash % hash_table_size;
octhashptr[oct_idx].next=hashptr[idx];
octhashptr[oct_idx].hash=hash;
@ -1362,7 +1362,7 @@ void BakedLightBaker::_make_octree_texture() {
base<<=16;
base|=int((pos.z+cell_size*0.5)/cell_size);
uint32_t hash = HashMapHahserDefault::hash(base);
uint32_t hash = HashMapHasherDefault::hash(base);
uint32_t idx = hash % hash_table_size;
uint32_t bucket = hashptr[idx];