diff --git a/core/local_vector.h b/core/local_vector.h
index 62ba4f19690..65582dd1457 100644
--- a/core/local_vector.h
+++ b/core/local_vector.h
@@ -82,6 +82,19 @@ public:
 		}
 	}
 
+	// Removes the item copying the last value into the position of the one to
+	// remove. It's generally faster than `remove`.
+	void remove_unordered(U p_index) {
+		ERR_FAIL_INDEX(p_index, count);
+		count--;
+		if (count > p_index) {
+			data[p_index] = data[count];
+		}
+		if (!__has_trivial_destructor(T) && !force_trivial) {
+			data[count].~T();
+		}
+	}
+
 	void erase(const T &p_val) {
 		int64_t idx = find(p_val);
 		if (idx >= 0) {
diff --git a/core/math/bvh.h b/core/math/bvh.h
new file mode 100644
index 00000000000..f5f6f0b27b4
--- /dev/null
+++ b/core/math/bvh.h
@@ -0,0 +1,510 @@
+/*************************************************************************/
+/*  bvh.h                                                                */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef BVH_H
+#define BVH_H
+
+// BVH
+// This class provides a wrapper around BVH tree, which contains most of the functionality
+// for a dynamic BVH with templated leaf size.
+// However BVH also adds facilities for pairing, to maintain compatibility with Godot 3.2.
+// Pairing is a collision pairing system, on top of the basic BVH.
+
+#include "bvh_tree.h"
+
+#define BVHTREE_CLASS BVH_Tree<T, 2, MAX_ITEMS, USE_PAIRS>
+
+template <class T, bool USE_PAIRS = false, int MAX_ITEMS = 32>
+class BVH_Manager {
+
+public:
+	// note we are using uint32_t instead of BVHHandle, losing type safety, but this
+	// is for compatibility with octree
+	typedef void *(*PairCallback)(void *, uint32_t, T *, int, uint32_t, T *, int);
+	typedef void (*UnpairCallback)(void *, uint32_t, T *, int, uint32_t, T *, int, void *);
+
+	// these 2 are crucial for fine tuning, and can be applied manually
+	// see the variable declarations for more info.
+	void params_set_node_expansion(real_t p_value) {
+		if (p_value >= 0.0) {
+			tree._node_expansion = p_value;
+			tree._auto_node_expansion = false;
+		} else {
+			tree._auto_node_expansion = true;
+		}
+	}
+
+	void params_set_pairing_expansion(real_t p_value) {
+		if (p_value >= 0.0) {
+			tree._pairing_expansion = p_value;
+			tree._auto_pairing_expansion = false;
+		} else {
+			tree._auto_pairing_expansion = true;
+		}
+	}
+
+	void set_pair_callback(PairCallback p_callback, void *p_userdata) {
+		pair_callback = p_callback;
+		pair_callback_userdata = p_userdata;
+	}
+	void set_unpair_callback(UnpairCallback p_callback, void *p_userdata) {
+		unpair_callback = p_callback;
+		unpair_callback_userdata = p_userdata;
+	}
+
+	BVHHandle create(T *p_userdata, const AABB &p_aabb = AABB(), int p_subindex = 0, bool p_pairable = false, uint32_t p_pairable_type = 0, uint32_t p_pairable_mask = 1) {
+
+#ifdef TOOLS_ENABLED
+		if (!USE_PAIRS) {
+			if (p_pairable) {
+				WARN_PRINT_ONCE("creating pairable item in BVH with USE_PAIRS set to false");
+			}
+		}
+#endif
+
+		BVHHandle h = tree.item_add(p_userdata, p_aabb, p_subindex, p_pairable, p_pairable_type, p_pairable_mask);
+
+		if (USE_PAIRS) {
+			_add_changed_item(h, p_aabb);
+		}
+
+		return h;
+	}
+
+	////////////////////////////////////////////////////
+	// wrapper versions that use uint32_t instead of handle
+	// for backward compatibility. Less type safe
+	void move(uint32_t p_handle, const AABB &p_aabb) {
+		BVHHandle h;
+		h.set(p_handle);
+		move(h, p_aabb);
+	}
+
+	void erase(uint32_t p_handle) {
+		BVHHandle h;
+		h.set(p_handle);
+		erase(h);
+	}
+
+	void set_pairable(uint32_t p_handle, bool p_pairable, uint32_t p_pairable_type, uint32_t p_pairable_mask) {
+		BVHHandle h;
+		h.set(p_handle);
+		set_pairable(h, p_pairable, p_pairable_type, p_pairable_mask);
+	}
+
+	bool is_pairable(uint32_t p_handle) const {
+		BVHHandle h;
+		h.set(p_handle);
+		return item_is_pairable(h);
+	}
+	int get_subindex(uint32_t p_handle) const {
+		BVHHandle h;
+		h.set(p_handle);
+		return item_get_subindex(h);
+	}
+
+	T *get(uint32_t p_handle) const {
+		BVHHandle h;
+		h.set(p_handle);
+		return item_get_userdata(h);
+	}
+
+	////////////////////////////////////////////////////
+
+	void move(BVHHandle p_handle, const AABB &p_aabb) {
+
+		if (tree.item_move(p_handle, p_aabb)) {
+			if (USE_PAIRS) {
+				_add_changed_item(p_handle, p_aabb);
+			}
+		}
+	}
+
+	void erase(BVHHandle p_handle) {
+		// call unpair and remove all references to the item
+		// before deleting from the tree
+		if (USE_PAIRS) {
+			_remove_changed_item(p_handle);
+		}
+
+		tree.item_remove(p_handle);
+	}
+
+	// call e.g. once per frame (this does a trickle optimize)
+	void update() {
+		tree.update();
+		_check_for_collisions();
+#ifdef BVH_INTEGRITY_CHECKS
+		tree.integrity_check_all();
+#endif
+	}
+
+	// prefer calling this directly as type safe
+	void set_pairable(const BVHHandle &p_handle, bool p_pairable, uint32_t p_pairable_type, uint32_t p_pairable_mask) {
+		// unpair callback if already paired? NYI
+		tree.item_set_pairable(p_handle, p_pairable, p_pairable_type, p_pairable_mask);
+	}
+
+	// cull tests
+	int cull_aabb(const AABB &p_aabb, T **p_result_array, int p_result_max, int *p_subindex_array = nullptr, uint32_t p_mask = 0xFFFFFFFF) {
+		typename BVHTREE_CLASS::CullParams params;
+
+		params.result_count_overall = 0;
+		params.result_max = p_result_max;
+		params.result_array = p_result_array;
+		params.subindex_array = p_subindex_array;
+		params.mask = p_mask;
+		params.test_pairable_only = false;
+		params.abb.from(p_aabb);
+
+		tree.cull_aabb(params);
+
+		return params.result_count_overall;
+	}
+
+	int cull_segment(const Vector3 &p_from, const Vector3 &p_to, T **p_result_array, int p_result_max, int *p_subindex_array = nullptr, uint32_t p_mask = 0xFFFFFFFF) {
+		typename BVHTREE_CLASS::CullParams params;
+
+		params.result_count_overall = 0;
+		params.result_max = p_result_max;
+		params.result_array = p_result_array;
+		params.subindex_array = p_subindex_array;
+		params.mask = p_mask;
+
+		params.segment.from = p_from;
+		params.segment.to = p_to;
+
+		tree.cull_segment(params);
+
+		return params.result_count_overall;
+	}
+
+	int cull_point(const Vector3 &p_point, T **p_result_array, int p_result_max, int *p_subindex_array = nullptr, uint32_t p_mask = 0xFFFFFFFF) {
+		typename BVHTREE_CLASS::CullParams params;
+
+		params.result_count_overall = 0;
+		params.result_max = p_result_max;
+		params.result_array = p_result_array;
+		params.subindex_array = p_subindex_array;
+		params.mask = p_mask;
+
+		params.point = p_point;
+
+		tree.cull_point(params);
+		return params.result_count_overall;
+	}
+
+	int cull_convex(const Vector<Plane> &p_convex, T **p_result_array, int p_result_max, uint32_t p_mask = 0xFFFFFFFF) {
+		if (!p_convex.size())
+			return 0;
+
+		Vector<Vector3> convex_points = Geometry::compute_convex_mesh_points(&p_convex[0], p_convex.size());
+		if (convex_points.size() == 0)
+			return 0;
+
+		typename BVHTREE_CLASS::CullParams params;
+		params.result_count_overall = 0;
+		params.result_max = p_result_max;
+		params.result_array = p_result_array;
+		params.subindex_array = nullptr;
+		params.mask = p_mask;
+
+		params.hull.planes = &p_convex[0];
+		params.hull.num_planes = p_convex.size();
+		params.hull.points = &convex_points[0];
+		params.hull.num_points = convex_points.size();
+
+		tree.cull_convex(params);
+
+		return params.result_count_overall;
+	}
+
+private:
+	// do this after moving etc.
+	void _check_for_collisions() {
+		AABB bb;
+
+		typename BVHTREE_CLASS::CullParams params;
+
+		params.result_count_overall = 0;
+		params.result_max = INT_MAX;
+		params.result_array = nullptr;
+		params.subindex_array = nullptr;
+		params.mask = 0xFFFFFFFF;
+
+		for (unsigned int n = 0; n < changed_items.size(); n++) {
+			const BVHHandle &h = changed_items[n];
+
+			// use the expanded aabb for pairing
+			const AABB &expanded_aabb = tree._pairs[h.id()].expanded_aabb;
+			BVH_ABB abb;
+			abb.from(expanded_aabb);
+
+			// find all the existing paired aabbs that are no longer
+			// paired, and send callbacks
+			_find_leavers(h, abb);
+
+			uint32_t changed_item_ref_id = h.id();
+
+			// set up the test from this item.
+			// this includes whether to test the non pairable tree,
+			// and the item mask.
+			tree.item_fill_cullparams(h, params);
+
+			params.abb = abb;
+
+			params.result_count_overall = 0; // might not be needed
+			tree.cull_aabb(params, false);
+
+			for (unsigned int i = 0; i < tree._cull_hits.size(); i++) {
+				uint32_t ref_id = tree._cull_hits[i];
+
+				// don't collide against ourself
+				if (ref_id == changed_item_ref_id)
+					continue;
+
+#ifdef BVH_CHECKS
+				// if neither are pairable, they should ignore each other
+				// THIS SHOULD NEVER HAPPEN .. now we only test the pairable tree
+				// if the changed item is not pairable
+				CRASH_COND(params.test_pairable_only && !tree._extra[ref_id].pairable);
+#endif
+
+				// checkmasks is already done in the cull routine.
+				BVHHandle h_collidee;
+				h_collidee.set_id(ref_id);
+
+				// find NEW enterers, and send callbacks for them only
+				_collide(h, h_collidee);
+			}
+		}
+		_reset();
+	}
+
+public:
+	void item_get_AABB(BVHHandle p_handle, AABB &r_aabb) {
+		BVH_ABB abb;
+		tree.item_get_ABB(p_handle, abb);
+		abb.to(r_aabb);
+	}
+
+private:
+	// supplemental funcs
+	bool item_is_pairable(BVHHandle p_handle) const { return _get_extra(p_handle).pairable; }
+	T *item_get_userdata(BVHHandle p_handle) const { return _get_extra(p_handle).userdata; }
+	int item_get_subindex(BVHHandle p_handle) const { return _get_extra(p_handle).subindex; }
+
+	void _unpair(BVHHandle p_from, BVHHandle p_to) {
+		tree._handle_sort(p_from, p_to);
+
+		typename BVHTREE_CLASS::ItemPairs &pairs_from = tree._pairs[p_from.id()];
+		typename BVHTREE_CLASS::ItemPairs &pairs_to = tree._pairs[p_to.id()];
+
+		void *ud_from = pairs_from.remove_pair_to(p_to);
+		pairs_to.remove_pair_to(p_from);
+
+		// callback
+		if (unpair_callback) {
+
+			typename BVHTREE_CLASS::ItemExtra &exa = tree._extra[p_from.id()];
+			typename BVHTREE_CLASS::ItemExtra &exb = tree._extra[p_to.id()];
+
+			unpair_callback(pair_callback_userdata, p_from, exa.userdata, exa.subindex, p_to, exb.userdata, exb.subindex, ud_from);
+		}
+	}
+
+	// returns true if unpair
+	bool _find_leavers_process_pair(typename BVHTREE_CLASS::ItemPairs &p_pairs_from, const BVH_ABB &p_abb_from, BVHHandle p_from, BVHHandle p_to) {
+		BVH_ABB abb_to;
+		tree.item_get_ABB(p_to, abb_to);
+
+		// do they overlap?
+		if (p_abb_from.intersects(abb_to))
+			return false;
+
+		_unpair(p_from, p_to);
+		return true;
+	}
+
+	// find all the existing paired aabbs that are no longer
+	// paired, and send callbacks
+	void _find_leavers(BVHHandle p_handle, const BVH_ABB &expanded_abb_from) {
+		typename BVHTREE_CLASS::ItemPairs &p_from = tree._pairs[p_handle.id()];
+
+		// opportunity to de-extend pairs, before removing leavers
+		p_from.update();
+
+		BVH_ABB abb_from = expanded_abb_from;
+
+		// remove from pairing list for every partner
+		for (unsigned int n = 0; n < p_from.extended_pairs.size(); n++) {
+			BVHHandle h_to = p_from.extended_pairs[n].handle;
+			if (_find_leavers_process_pair(p_from, abb_from, p_handle, h_to)) {
+				// we need to keep the counter n up to date if we deleted a pair
+				// as the number of items in p_from.extended_pairs will have decreased by 1
+				// and we don't want to miss an item
+				n--;
+			}
+		}
+	}
+
+	// find NEW enterers, and send callbacks for them only
+	// handle a and b
+	void _collide(BVHHandle p_ha, BVHHandle p_hb) {
+		// only have to do this oneway, lower ID then higher ID
+		tree._handle_sort(p_ha, p_hb);
+
+		typename BVHTREE_CLASS::ItemPairs &p_from = tree._pairs[p_ha.id()];
+		typename BVHTREE_CLASS::ItemPairs &p_to = tree._pairs[p_hb.id()];
+
+		// does this pair exist already?
+		// or only check the one with lower number of pairs for greater speed
+		if (p_from.num_pairs <= p_to.num_pairs) {
+			if (p_from.contains_pair_to(p_hb))
+				return;
+		} else {
+			if (p_to.contains_pair_to(p_ha))
+				return;
+		}
+
+		// callback
+		void *callback_userdata = nullptr;
+
+		if (pair_callback) {
+			const typename BVHTREE_CLASS::ItemExtra &exa = _get_extra(p_ha);
+			const typename BVHTREE_CLASS::ItemExtra &exb = _get_extra(p_hb);
+
+			callback_userdata = pair_callback(pair_callback_userdata, p_ha, exa.userdata, exa.subindex, p_hb, exb.userdata, exb.subindex);
+		}
+
+		// new pair! .. only really need to store the userdata on the lower handle, but both have storage so...
+		p_from.add_pair_to(p_hb, callback_userdata);
+		p_to.add_pair_to(p_ha, callback_userdata);
+	}
+
+	// if we remove an item, we need to immediately remove the pairs, to prevent reading the pair after deletion
+	void _remove_pairs_containing(BVHHandle p_handle) {
+
+		typename BVHTREE_CLASS::ItemPairs &p_from = tree._pairs[p_handle.id()];
+
+		// remove from pairing list for every partner.
+		// can't easily use a for loop here, because removing changes the size of the list
+		while (p_from.extended_pairs.size()) {
+			BVHHandle h_to = p_from.extended_pairs[0].handle;
+			_unpair(p_handle, h_to);
+		}
+	}
+
+private:
+	const typename BVHTREE_CLASS::ItemExtra &_get_extra(BVHHandle p_handle) const {
+		return tree._extra[p_handle.id()];
+	}
+	const typename BVHTREE_CLASS::ItemRef &_get_ref(BVHHandle p_handle) const {
+		return tree._refs[p_handle.id()];
+	}
+
+	void _reset() {
+		changed_items.clear();
+		_tick++;
+	}
+
+	void _add_changed_item(BVHHandle p_handle, const AABB &aabb) {
+
+		// only if uses pairing
+		// no .. non pairable items seem to be able to pair with pairable
+
+		// aabb check with expanded aabb. This greatly decreases processing
+		// at the cost of slightly less accurate pairing checks
+		AABB &expanded_aabb = tree._pairs[p_handle.id()].expanded_aabb;
+		if (expanded_aabb.encloses(aabb))
+			return;
+
+		uint32_t &last_updated_tick = tree._extra[p_handle.id()].last_updated_tick;
+
+		if (last_updated_tick == _tick)
+			return; // already on changed list
+
+		// mark as on list
+		last_updated_tick = _tick;
+
+		// opportunity to de-extend pairs (before collision detection, which will delete then recreate pairs)
+
+		// new expanded aabb
+		expanded_aabb = aabb;
+		expanded_aabb.grow_by(tree._pairing_expansion);
+
+		changed_items.push_back(p_handle);
+	}
+
+	void _remove_changed_item(BVHHandle p_handle) {
+
+		// Care has to be taken here for items that are deleted. The ref ID
+		// could be reused on the same tick for new items. This is probably
+		// rare but should be taken into consideration
+
+		// callbacks
+		_remove_pairs_containing(p_handle);
+
+		// remove from changed items (not very efficient yet)
+		for (unsigned int n = 0; n < changed_items.size(); n++) {
+			if (changed_items[n] == p_handle) {
+				changed_items.remove_unordered(n);
+			}
+		}
+
+		// reset the last updated tick (may not be necessary but just in case)
+		tree._extra[p_handle.id()].last_updated_tick = 0;
+	}
+
+	PairCallback pair_callback;
+	UnpairCallback unpair_callback;
+	void *pair_callback_userdata;
+	void *unpair_callback_userdata;
+
+	BVHTREE_CLASS tree;
+
+	// for collision pairing,
+	// maintain a list of all items moved etc on each frame / tick
+	LocalVector<BVHHandle, uint32_t, true> changed_items;
+	uint32_t _tick;
+
+public:
+	BVH_Manager() {
+		_tick = 1; // start from 1 so items with 0 indicate never updated
+		pair_callback = nullptr;
+		unpair_callback = nullptr;
+		pair_callback_userdata = nullptr;
+		unpair_callback_userdata = nullptr;
+	}
+};
+
+#undef BVHTREE_CLASS
+
+#endif // BVH_H
diff --git a/core/math/bvh_abb.h b/core/math/bvh_abb.h
new file mode 100644
index 00000000000..724caa0d366
--- /dev/null
+++ b/core/math/bvh_abb.h
@@ -0,0 +1,252 @@
+/*************************************************************************/
+/*  bvh_abb.h                                                            */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef BVH_ABB_H
+#define BVH_ABB_H
+
+// special optimized version of axis aligned bounding box
+struct BVH_ABB {
+	struct ConvexHull {
+		// convex hulls (optional)
+		const Plane *planes;
+		int num_planes;
+		const Vector3 *points;
+		int num_points;
+	};
+
+	struct Segment {
+		Vector3 from;
+		Vector3 to;
+	};
+
+	enum IntersectResult {
+		IR_MISS = 0,
+		IR_PARTIAL,
+		IR_FULL,
+	};
+
+	// we store mins with a negative value in order to test them with SIMD
+	Vector3 min;
+	Vector3 neg_max;
+
+	bool operator==(const BVH_ABB &o) const { return (min == o.min) && (neg_max == o.neg_max); }
+	bool operator!=(const BVH_ABB &o) const { return (*this == o) == false; }
+
+	void set(const Vector3 &_min, const Vector3 &_max) {
+		min = _min;
+		neg_max = -_max;
+	}
+
+	// to and from standard AABB
+	void from(const AABB &p_aabb) {
+		min = p_aabb.position;
+		neg_max = -(p_aabb.position + p_aabb.size);
+	}
+
+	void to(AABB &r_aabb) const {
+		r_aabb.position = min;
+		r_aabb.size = calculate_size();
+	}
+
+	void merge(const BVH_ABB &p_o) {
+		neg_max.x = MIN(neg_max.x, p_o.neg_max.x);
+		neg_max.y = MIN(neg_max.y, p_o.neg_max.y);
+		neg_max.z = MIN(neg_max.z, p_o.neg_max.z);
+
+		min.x = MIN(min.x, p_o.min.x);
+		min.y = MIN(min.y, p_o.min.y);
+		min.z = MIN(min.z, p_o.min.z);
+	}
+
+	Vector3 calculate_size() const {
+		return -neg_max - min;
+	}
+
+	Vector3 calculate_centre() const {
+		return Vector3((calculate_size() * 0.5) + min);
+	}
+
+	real_t get_proximity_to(const BVH_ABB &p_b) const {
+		const Vector3 d = (min - neg_max) - (p_b.min - p_b.neg_max);
+		return (Math::abs(d.x) + Math::abs(d.y) + Math::abs(d.z));
+	}
+
+	int select_by_proximity(const BVH_ABB &p_a, const BVH_ABB &p_b) const {
+		return (get_proximity_to(p_a) < get_proximity_to(p_b) ? 0 : 1);
+	}
+
+	uint32_t find_cutting_planes(const BVH_ABB::ConvexHull &p_hull, uint32_t *p_plane_ids) const {
+		uint32_t count = 0;
+
+		for (int n = 0; n < p_hull.num_planes; n++) {
+			const Plane &p = p_hull.planes[n];
+			if (intersects_plane(p)) {
+				p_plane_ids[count++] = n;
+			}
+		}
+
+		return count;
+	}
+
+	bool intersects_plane(const Plane &p_p) const {
+		Vector3 size = calculate_size();
+		Vector3 half_extents = size * 0.5;
+		Vector3 ofs = min + half_extents;
+
+		// forward side of plane?
+		Vector3 point_offset(
+				(p_p.normal.x < 0) ? -half_extents.x : half_extents.x,
+				(p_p.normal.y < 0) ? -half_extents.y : half_extents.y,
+				(p_p.normal.z < 0) ? -half_extents.z : half_extents.z);
+		Vector3 point = point_offset + ofs;
+
+		if (!p_p.is_point_over(point))
+			return false;
+
+		point = -point_offset + ofs;
+		if (p_p.is_point_over(point))
+			return false;
+
+		return true;
+	}
+
+	bool intersects_convex_optimized(const ConvexHull &p_hull, const uint32_t *p_plane_ids, uint32_t p_num_planes) const {
+		Vector3 size = calculate_size();
+		Vector3 half_extents = size * 0.5;
+		Vector3 ofs = min + half_extents;
+
+		for (unsigned int i = 0; i < p_num_planes; i++) {
+
+			const Plane &p = p_hull.planes[p_plane_ids[i]];
+			Vector3 point(
+					(p.normal.x > 0) ? -half_extents.x : half_extents.x,
+					(p.normal.y > 0) ? -half_extents.y : half_extents.y,
+					(p.normal.z > 0) ? -half_extents.z : half_extents.z);
+			point += ofs;
+			if (p.is_point_over(point))
+				return false;
+		}
+
+		return true;
+	}
+
+	bool intersects_convex_partial(const ConvexHull &p_hull) const {
+		AABB bb;
+		to(bb);
+		return bb.intersects_convex_shape(p_hull.planes, p_hull.num_planes, p_hull.points, p_hull.num_points);
+	}
+
+	IntersectResult intersects_convex(const ConvexHull &p_hull) const {
+		if (intersects_convex_partial(p_hull)) {
+			// fully within? very important for tree checks
+			if (is_within_convex(p_hull)) {
+				return IR_FULL;
+			}
+
+			return IR_PARTIAL;
+		}
+
+		return IR_MISS;
+	}
+
+	bool is_within_convex(const ConvexHull &p_hull) const {
+		// use half extents routine
+		AABB bb;
+		to(bb);
+		return bb.inside_convex_shape(p_hull.planes, p_hull.num_planes);
+	}
+
+	bool is_point_within_hull(const ConvexHull &p_hull, const Vector3 &p_pt) const {
+		for (int n = 0; n < p_hull.num_planes; n++) {
+			if (p_hull.planes[n].distance_to(p_pt) > 0.0f)
+				return false;
+		}
+		return true;
+	}
+
+	bool intersects_segment(const Segment &p_s) const {
+		AABB bb;
+		to(bb);
+		return bb.intersects_segment(p_s.from, p_s.to);
+	}
+
+	bool intersects_point(const Vector3 &p_pt) const {
+		if (_vector3_any_lessthan(-p_pt, neg_max)) return false;
+		if (_vector3_any_lessthan(p_pt, min)) return false;
+		return true;
+	}
+
+	bool intersects(const BVH_ABB &p_o) const {
+		if (_vector3_any_morethan(p_o.min, -neg_max)) return false;
+		if (_vector3_any_morethan(min, -p_o.neg_max)) return false;
+		return true;
+	}
+
+	bool is_other_within(const BVH_ABB &p_o) const {
+		if (_vector3_any_lessthan(p_o.neg_max, neg_max)) return false;
+		if (_vector3_any_lessthan(p_o.min, min)) return false;
+		return true;
+	}
+
+	void grow(const Vector3 &p_change) {
+		neg_max -= p_change;
+		min -= p_change;
+	}
+
+	void expand(real_t p_change) {
+		grow(Vector3(p_change, p_change, p_change));
+	}
+
+	float get_area() const // actually surface area metric
+	{
+		Vector3 d = calculate_size();
+		return 2.0f * (d.x * d.y + d.y * d.z + d.z * d.x);
+	}
+	void set_to_max_opposite_extents() {
+		neg_max = Vector3(FLT_MAX, FLT_MAX, FLT_MAX);
+		min = neg_max;
+	}
+
+	bool _vector3_any_morethan(const Vector3 &p_a, const Vector3 &p_b) const {
+		if (p_a.x > p_b.x) return true;
+		if (p_a.y > p_b.y) return true;
+		if (p_a.z > p_b.z) return true;
+		return false;
+	}
+
+	bool _vector3_any_lessthan(const Vector3 &p_a, const Vector3 &p_b) const {
+		if (p_a.x < p_b.x) return true;
+		if (p_a.y < p_b.y) return true;
+		if (p_a.z < p_b.z) return true;
+		return false;
+	}
+};
+
+#endif // BVH_ABB_H
diff --git a/core/math/bvh_cull.inc b/core/math/bvh_cull.inc
new file mode 100644
index 00000000000..212969c3a80
--- /dev/null
+++ b/core/math/bvh_cull.inc
@@ -0,0 +1,524 @@
+public:
+// cull parameters is a convenient way of passing a bunch
+// of arguments through the culling functions without
+// writing loads of code. Not all members are used for some cull checks
+struct CullParams {
+	int result_count_overall; // both trees
+	int result_count; // this tree only
+	int result_max;
+	T **result_array;
+	int *subindex_array;
+	uint32_t mask;
+
+	// optional components for different tests
+	Vector3 point;
+	BVH_ABB abb;
+	typename BVH_ABB::ConvexHull hull;
+	typename BVH_ABB::Segment segment;
+
+	// when collision testing, non pairable moving items
+	// only need to be tested against the pairable tree.
+	// collisions with other non pairable items are irrelevant.
+	bool test_pairable_only;
+};
+
+private:
+void _cull_translate_hits(CullParams &p) {
+	int num_hits = _cull_hits.size();
+	int left = p.result_max - p.result_count_overall;
+
+	if (num_hits > left)
+		num_hits = left;
+
+	int out_n = p.result_count_overall;
+
+	for (int n = 0; n < num_hits; n++) {
+		uint32_t ref_id = _cull_hits[n];
+
+		const ItemExtra &ex = _extra[ref_id];
+		p.result_array[out_n] = ex.userdata;
+
+		if (p.subindex_array)
+			p.subindex_array[out_n] = ex.subindex;
+
+		out_n++;
+	}
+
+	p.result_count = num_hits;
+	p.result_count_overall += num_hits;
+}
+
+public:
+int cull_convex(CullParams &r_params, bool p_translate_hits = true) {
+
+	_cull_hits.clear();
+	r_params.result_count = 0;
+
+	for (int n = 0; n < NUM_TREES; n++) {
+		if (_root_node_id[n] == BVHCommon::INVALID)
+			continue;
+
+		_cull_convex_iterative(_root_node_id[n], r_params);
+	}
+
+	if (p_translate_hits)
+		_cull_translate_hits(r_params);
+
+	return r_params.result_count;
+}
+
+int cull_segment(CullParams &r_params, bool p_translate_hits = true) {
+	_cull_hits.clear();
+	r_params.result_count = 0;
+
+	for (int n = 0; n < NUM_TREES; n++) {
+		if (_root_node_id[n] == BVHCommon::INVALID)
+			continue;
+
+		_cull_segment_iterative(_root_node_id[n], r_params);
+	}
+
+	if (p_translate_hits)
+		_cull_translate_hits(r_params);
+
+	return r_params.result_count;
+}
+
+int cull_point(CullParams &r_params, bool p_translate_hits = true) {
+
+	_cull_hits.clear();
+	r_params.result_count = 0;
+
+	for (int n = 0; n < NUM_TREES; n++) {
+		if (_root_node_id[n] == BVHCommon::INVALID)
+			continue;
+
+		_cull_point_iterative(_root_node_id[n], r_params);
+	}
+
+	if (p_translate_hits)
+		_cull_translate_hits(r_params);
+
+	return r_params.result_count;
+}
+
+int cull_aabb(CullParams &r_params, bool p_translate_hits = true) {
+	_cull_hits.clear();
+	r_params.result_count = 0;
+
+	for (int n = 0; n < NUM_TREES; n++) {
+		if (_root_node_id[n] == BVHCommon::INVALID)
+			continue;
+
+		if ((n == 0) && r_params.test_pairable_only)
+			continue;
+
+		_cull_aabb_iterative(_root_node_id[n], r_params);
+	}
+
+	if (p_translate_hits)
+		_cull_translate_hits(r_params);
+
+	return r_params.result_count;
+}
+
+bool _cull_hits_full(const CullParams &p) {
+	// instead of checking every hit, we can do a lazy check for this condition.
+	// it isn't a problem if we write too much _cull_hits because they only the
+	// result_max amount will be translated and outputted. But we might as
+	// well stop our cull checks after the maximum has been reached.
+	return (int)_cull_hits.size() >= p.result_max;
+}
+
+void _cull_hit(uint32_t p_ref_id, CullParams &p) {
+
+	// take into account masks etc
+	// this would be more efficient to do before plane checks,
+	// but done here for ease to get started
+	if (USE_PAIRS) {
+		const ItemExtra &ex = _extra[p_ref_id];
+
+		if (!(p.mask & ex.pairable_type))
+			return;
+	}
+
+	_cull_hits.push_back(p_ref_id);
+}
+
+bool _cull_segment_iterative(uint32_t p_node_id, CullParams &r_params) {
+
+	// our function parameters to keep on a stack
+	struct CullSegParams {
+		uint32_t node_id;
+	};
+
+	// most of the iterative functionality is contained in this helper class
+	BVH_IterativeInfo<CullSegParams> ii;
+
+	// alloca must allocate the stack from this function, it cannot be allocated in the
+	// helper class
+	ii.stack = (CullSegParams *)alloca(ii.get_alloca_stacksize());
+
+	// seed the stack
+	ii.get_first()->node_id = p_node_id;
+
+	CullSegParams csp;
+
+	// while there are still more nodes on the stack
+	while (ii.pop(csp)) {
+
+		TNode &tnode = _nodes[csp.node_id];
+
+		if (tnode.is_leaf()) {
+
+			// lazy check for hits full up condition
+			if (_cull_hits_full(r_params)) {
+				return false;
+			}
+
+			TLeaf &leaf = _node_get_leaf(tnode);
+
+			// test children individually
+			for (int n = 0; n < leaf.num_items; n++) {
+				const BVH_ABB &aabb = leaf.get_aabb(n);
+
+				if (aabb.intersects_segment(r_params.segment)) {
+					uint32_t child_id = leaf.get_item_ref_id(n);
+
+					// register hit
+					_cull_hit(child_id, r_params);
+				}
+			}
+		} else {
+			// test children individually
+			for (int n = 0; n < tnode.num_children; n++) {
+
+				uint32_t child_id = tnode.children[n];
+				const BVH_ABB &child_abb = _nodes[child_id].aabb;
+
+				if (child_abb.intersects_segment(r_params.segment)) {
+
+					// add to the stack
+					CullSegParams *child = ii.request();
+					child->node_id = child_id;
+				}
+			}
+		}
+
+	} // while more nodes to pop
+
+	// true indicates results are not full
+	return true;
+}
+
+bool _cull_point_iterative(uint32_t p_node_id, CullParams &r_params) {
+	// our function parameters to keep on a stack
+	struct CullPointParams {
+		uint32_t node_id;
+	};
+
+	// most of the iterative functionality is contained in this helper class
+	BVH_IterativeInfo<CullPointParams> ii;
+
+	// alloca must allocate the stack from this function, it cannot be allocated in the
+	// helper class
+	ii.stack = (CullPointParams *)alloca(ii.get_alloca_stacksize());
+
+	// seed the stack
+	ii.get_first()->node_id = p_node_id;
+
+	CullPointParams cpp;
+
+	// while there are still more nodes on the stack
+	while (ii.pop(cpp)) {
+
+		TNode &tnode = _nodes[cpp.node_id];
+		// no hit with this node?
+		if (!tnode.aabb.intersects_point(r_params.point))
+			continue;
+
+		if (tnode.is_leaf()) {
+
+			// lazy check for hits full up condition
+			if (_cull_hits_full(r_params)) {
+				return false;
+			}
+
+			TLeaf &leaf = _node_get_leaf(tnode);
+
+			// test children individually
+			for (int n = 0; n < leaf.num_items; n++) {
+
+				if (leaf.get_aabb(n).intersects_point(r_params.point)) {
+					uint32_t child_id = leaf.get_item_ref_id(n);
+
+					// register hit
+					_cull_hit(child_id, r_params);
+				}
+			}
+		} else {
+			// test children individually
+			for (int n = 0; n < tnode.num_children; n++) {
+				uint32_t child_id = tnode.children[n];
+
+				// add to the stack
+				CullPointParams *child = ii.request();
+				child->node_id = child_id;
+			}
+		}
+
+	} // while more nodes to pop
+
+	// true indicates results are not full
+	return true;
+}
+
+bool _cull_aabb_iterative(uint32_t p_node_id, CullParams &r_params, bool p_fully_within = false) {
+
+	// our function parameters to keep on a stack
+	struct CullAABBParams {
+		uint32_t node_id;
+		bool fully_within;
+	};
+
+	// most of the iterative functionality is contained in this helper class
+	BVH_IterativeInfo<CullAABBParams> ii;
+
+	// alloca must allocate the stack from this function, it cannot be allocated in the
+	// helper class
+	ii.stack = (CullAABBParams *)alloca(ii.get_alloca_stacksize());
+
+	// seed the stack
+	ii.get_first()->node_id = p_node_id;
+	ii.get_first()->fully_within = p_fully_within;
+
+	CullAABBParams cap;
+
+	// while there are still more nodes on the stack
+	while (ii.pop(cap)) {
+
+		TNode &tnode = _nodes[cap.node_id];
+
+		if (tnode.is_leaf()) {
+
+			// lazy check for hits full up condition
+			if (_cull_hits_full(r_params)) {
+				return false;
+			}
+
+			TLeaf &leaf = _node_get_leaf(tnode);
+
+			// if fully within we can just add all items
+			// as long as they pass mask checks
+			if (cap.fully_within) {
+				for (int n = 0; n < leaf.num_items; n++) {
+					uint32_t child_id = leaf.get_item_ref_id(n);
+
+					// register hit
+					_cull_hit(child_id, r_params);
+				}
+			} else {
+				for (int n = 0; n < leaf.num_items; n++) {
+					const BVH_ABB &aabb = leaf.get_aabb(n);
+
+					if (aabb.intersects(r_params.abb)) {
+						uint32_t child_id = leaf.get_item_ref_id(n);
+
+						// register hit
+						_cull_hit(child_id, r_params);
+					}
+				}
+			} // not fully within
+		} else {
+			if (!cap.fully_within) {
+				// test children individually
+				for (int n = 0; n < tnode.num_children; n++) {
+
+					uint32_t child_id = tnode.children[n];
+					const BVH_ABB &child_abb = _nodes[child_id].aabb;
+
+					if (child_abb.intersects(r_params.abb)) {
+						// is the node totally within the aabb?
+						bool fully_within = r_params.abb.is_other_within(child_abb);
+
+						// add to the stack
+						CullAABBParams *child = ii.request();
+
+						// should always return valid child
+						child->node_id = child_id;
+						child->fully_within = fully_within;
+					}
+				}
+			} else {
+				for (int n = 0; n < tnode.num_children; n++) {
+					uint32_t child_id = tnode.children[n];
+
+					// add to the stack
+					CullAABBParams *child = ii.request();
+
+					// should always return valid child
+					child->node_id = child_id;
+					child->fully_within = true;
+				}
+			}
+		}
+
+	} // while more nodes to pop
+
+	// true indicates results are not full
+	return true;
+}
+
+// returns full up with results
+bool _cull_convex_iterative(uint32_t p_node_id, CullParams &r_params, bool p_fully_within = false) {
+
+	// our function parameters to keep on a stack
+	struct CullConvexParams {
+		uint32_t node_id;
+		bool fully_within;
+	};
+
+	// most of the iterative functionality is contained in this helper class
+	BVH_IterativeInfo<CullConvexParams> ii;
+
+	// alloca must allocate the stack from this function, it cannot be allocated in the
+	// helper class
+	ii.stack = (CullConvexParams *)alloca(ii.get_alloca_stacksize());
+
+	// seed the stack
+	ii.get_first()->node_id = p_node_id;
+	ii.get_first()->fully_within = p_fully_within;
+
+	// preallocate these as a once off to be reused
+	uint32_t max_planes = r_params.hull.num_planes;
+	uint32_t *plane_ids = (uint32_t *)alloca(sizeof(uint32_t) * max_planes);
+
+	CullConvexParams ccp;
+
+	// while there are still more nodes on the stack
+	while (ii.pop(ccp)) {
+		const TNode &tnode = _nodes[ccp.node_id];
+
+		if (!ccp.fully_within) {
+
+			typename BVH_ABB::IntersectResult res = tnode.aabb.intersects_convex(r_params.hull);
+
+			switch (res) {
+				default: {
+					continue; // miss, just move on to the next node in the stack
+				} break;
+				case BVH_ABB::IR_PARTIAL: {
+				} break;
+				case BVH_ABB::IR_FULL: {
+					ccp.fully_within = true;
+				} break;
+			}
+
+		} // if not fully within already
+
+		if (tnode.is_leaf()) {
+
+			// lazy check for hits full up condition
+			if (_cull_hits_full(r_params)) {
+				return false;
+			}
+
+			const TLeaf &leaf = _node_get_leaf(tnode);
+
+			// if fully within, simply add all items to the result
+			// (taking into account masks)
+			if (ccp.fully_within) {
+
+				for (int n = 0; n < leaf.num_items; n++) {
+
+					uint32_t child_id = leaf.get_item_ref_id(n);
+
+					// register hit
+					_cull_hit(child_id, r_params);
+				}
+
+			} else {
+
+				// we can either use a naive check of all the planes against the AABB,
+				// or an optimized check, which finds in advance which of the planes can possibly
+				// cut the AABB, and only tests those. This can be much faster.
+#define BVH_CONVEX_CULL_OPTIMIZED
+#ifdef BVH_CONVEX_CULL_OPTIMIZED
+				// first find which planes cut the aabb
+				uint32_t num_planes = tnode.aabb.find_cutting_planes(r_params.hull, plane_ids);
+				BVH_ASSERT(num_planes <= max_planes);
+
+//#define BVH_CONVEX_CULL_OPTIMIZED_RIGOR_CHECK
+#ifdef BVH_CONVEX_CULL_OPTIMIZED_RIGOR_CHECK
+				// rigorous check
+				uint32_t results[MAX_ITEMS];
+				uint32_t num_results = 0;
+#endif
+
+				// test children individually
+				for (int n = 0; n < leaf.num_items; n++) {
+					//const Item &item = leaf.get_item(n);
+					const BVH_ABB &aabb = leaf.get_aabb(n);
+
+					if (aabb.intersects_convex_optimized(r_params.hull, plane_ids, num_planes)) {
+						uint32_t child_id = leaf.get_item_ref_id(n);
+
+#ifdef BVH_CONVEX_CULL_OPTIMIZED_RIGOR_CHECK
+						results[num_results++] = child_id;
+#endif
+
+						// register hit
+						_cull_hit(child_id, r_params);
+					}
+				}
+
+#ifdef BVH_CONVEX_CULL_OPTIMIZED_RIGOR_CHECK
+				uint32_t test_count = 0;
+
+				for (int n = 0; n < leaf.num_items; n++) {
+					const BVH_ABB &aabb = leaf.get_aabb(n);
+
+					if (aabb.intersects_convex_partial(r_params.hull)) {
+						uint32_t child_id = leaf.get_item_ref_id(n);
+
+						CRASH_COND(child_id != results[test_count++]);
+						CRASH_COND(test_count > num_results);
+					}
+				}
+#endif
+
+#else
+				// not BVH_CONVEX_CULL_OPTIMIZED
+				// test children individually
+				for (int n = 0; n < leaf.num_items; n++) {
+					const BVH_ABB &aabb = leaf.get_aabb(n);
+
+					if (aabb.intersects_convex_partial(r_params.hull)) {
+						uint32_t child_id = leaf.get_item_ref_id(n);
+
+						// full up with results? exit early, no point in further testing
+						if (!_cull_hit(child_id, r_params))
+							return false;
+					}
+				}
+#endif // BVH_CONVEX_CULL_OPTIMIZED
+			} // if not fully within
+		} else {
+
+			for (int n = 0; n < tnode.num_children; n++) {
+				uint32_t child_id = tnode.children[n];
+
+				// add to the stack
+				CullConvexParams *child = ii.request();
+
+				// should always return valid child
+				child->node_id = child_id;
+				child->fully_within = ccp.fully_within;
+			}
+		}
+
+	} // while more nodes to pop
+
+	// true indicates results are not full
+	return true;
+}
diff --git a/core/math/bvh_debug.inc b/core/math/bvh_debug.inc
new file mode 100644
index 00000000000..4a5df741aab
--- /dev/null
+++ b/core/math/bvh_debug.inc
@@ -0,0 +1,68 @@
+public:
+#ifdef BVH_VERBOSE
+void _debug_recursive_print_tree(int p_tree_id) const {
+	if (_root_node_id[p_tree_id] != BVHCommon::INVALID)
+		_debug_recursive_print_tree_node(_root_node_id[p_tree_id]);
+}
+
+String _debug_aabb_to_string(const BVH_ABB &aabb) const {
+	String sz = "(";
+	sz += itos(-aabb.neg_min.x);
+	sz += " ~ ";
+	sz += itos(aabb.max.x);
+	sz += ") (";
+
+	sz += itos(-aabb.neg_min.y);
+	sz += " ~ ";
+	sz += itos(aabb.max.y);
+	sz += ") (";
+
+	sz += itos(-aabb.neg_min.z);
+	sz += " ~ ";
+	sz += itos(aabb.max.z);
+	sz += ") ";
+
+	Vector3 size = aabb.calculate_size();
+	float vol = size.x * size.y * size.z;
+	sz += "vol " + itos(vol);
+
+	return sz;
+}
+
+void _debug_recursive_print_tree_node(uint32_t p_node_id, int depth = 0) const {
+	const TNode &tnode = _nodes[p_node_id];
+
+	String sz = "";
+	for (int n = 0; n < depth; n++) {
+		sz += "\t";
+	}
+	sz += itos(p_node_id);
+
+	if (tnode.is_leaf()) {
+		sz += " L";
+		sz += itos(tnode.height) + " ";
+		const TLeaf *leaf = node_get_leaf(tnode);
+
+		sz += "[";
+		for (int n = 0; n < leaf->num_items; n++) {
+			if (n)
+				sz += ", ";
+			sz += "r";
+			sz += itos(leaf->get_item_ref_id(n));
+		}
+		sz += "]  ";
+	} else {
+		sz += " N";
+		sz += itos(tnode.height) + " ";
+	}
+
+	sz += _debug_aabb_to_string(tnode.aabb);
+	print_line(sz);
+
+	if (!tnode.is_leaf()) {
+		for (int n = 0; n < tnode.num_children; n++) {
+			_debug_recursive_print_tree_node(tnode.children[n], depth + 1);
+		}
+	}
+}
+#endif
diff --git a/core/math/bvh_integrity.inc b/core/math/bvh_integrity.inc
new file mode 100644
index 00000000000..724fc8e2211
--- /dev/null
+++ b/core/math/bvh_integrity.inc
@@ -0,0 +1,42 @@
+void _integrity_check_all() {
+#ifdef BVH_INTEGRITY_CHECKS
+	for (int n = 0; n < NUM_TREES; n++) {
+		uint32_t root = _root_node_id[n];
+		if (root != BVHCommon::INVALID) {
+			_integrity_check_down(root);
+		}
+	}
+#endif
+}
+
+void _integrity_check_up(uint32_t p_node_id) {
+	TNode &node = _nodes[p_node_id];
+
+	BVH_ABB abb = node.aabb;
+	node_update_aabb(node);
+
+	BVH_ABB abb2 = node.aabb;
+	abb2.expand(-_node_expansion);
+
+	CRASH_COND(!abb.is_other_within(abb2));
+}
+
+void _integrity_check_down(uint32_t p_node_id) {
+	const TNode &node = _nodes[p_node_id];
+
+	if (node.is_leaf()) {
+		_integrity_check_up(p_node_id);
+	} else {
+		CRASH_COND(node.num_children != 2);
+
+		for (int n = 0; n < node.num_children; n++) {
+			uint32_t child_id = node.children[n];
+
+			// check the children parent pointers are correct
+			TNode &child = _nodes[child_id];
+			CRASH_COND(child.parent_id != p_node_id);
+
+			_integrity_check_down(child_id);
+		}
+	}
+}
diff --git a/core/math/bvh_logic.inc b/core/math/bvh_logic.inc
new file mode 100644
index 00000000000..79463957741
--- /dev/null
+++ b/core/math/bvh_logic.inc
@@ -0,0 +1,221 @@
+
+// for slow incremental optimization, we will periodically remove each
+// item from the tree and reinsert, to give it a chance to find a better position
+void _logic_item_remove_and_reinsert(uint32_t p_ref_id) {
+	// get the reference
+	ItemRef &ref = _refs[p_ref_id];
+
+	// special case of debug draw
+	if (ref.item_id == BVHCommon::INVALID)
+		return;
+
+	BVH_ASSERT(ref.tnode_id != BVHCommon::INVALID);
+
+	// some overlay elaborate way to find out which tree the node is in!
+	BVHHandle temp_handle;
+	temp_handle.set_id(p_ref_id);
+	_current_tree = _handle_get_tree_id(temp_handle);
+
+	// remove and reinsert
+	BVH_ABB abb;
+	node_remove_item(p_ref_id, &abb);
+
+	// we must choose where to add to tree
+	ref.tnode_id = _logic_choose_item_add_node(_root_node_id[_current_tree], abb);
+	_node_add_item(ref.tnode_id, p_ref_id, abb);
+
+	refit_upward_and_balance(ref.tnode_id);
+}
+
+// from randy gaul balance function
+BVH_ABB _logic_abb_merge(const BVH_ABB &a, const BVH_ABB &b) {
+	BVH_ABB c = a;
+	c.merge(b);
+	return c;
+}
+
+//--------------------------------------------------------------------------------------------------
+/**
+@file	q3DynamicAABBTree.h
+@author	Randy Gaul
+@date	10/10/2014
+	Copyright (c) 2014 Randy Gaul http://www.randygaul.net
+	This software is provided 'as-is', without any express or implied
+	warranty. In no event will the authors be held liable for any damages
+	arising from the use of this software.
+	Permission is granted to anyone to use this software for any purpose,
+	including commercial applications, and to alter it and redistribute it
+	freely, subject to the following restrictions:
+	  1. The origin of this software must not be misrepresented; you must not
+	     claim that you wrote the original software. If you use this software
+	     in a product, an acknowledgment in the product documentation would be
+	     appreciated but is not required.
+	  2. Altered source versions must be plainly marked as such, and must not
+	     be misrepresented as being the original software.
+	  3. This notice may not be removed or altered from any source distribution.
+*/
+//--------------------------------------------------------------------------------------------------
+
+// This function is based on the 'Balance' function from Randy Gaul's qu3e
+// https://github.com/RandyGaul/qu3e
+// It is MODIFIED from qu3e version.
+// This is the only function used (and _logic_abb_merge helper function).
+int32_t _logic_balance(int32_t iA) {
+	//	return iA; // uncomment this to bypass balance
+
+	TNode *A = &_nodes[iA];
+
+	if (A->is_leaf() || A->height == 1)
+		return iA;
+
+	/*      A
+	      /   \
+	     B     C
+	    / \   / \
+	   D   E F   G
+	*/
+
+	CRASH_COND(A->num_children != 2);
+	int32_t iB = A->children[0];
+	int32_t iC = A->children[1];
+	TNode *B = &_nodes[iB];
+	TNode *C = &_nodes[iC];
+
+	int32_t balance = C->height - B->height;
+
+	// C is higher, promote C
+	if (balance > 1) {
+		int32_t iF = C->children[0];
+		int32_t iG = C->children[1];
+		TNode *F = &_nodes[iF];
+		TNode *G = &_nodes[iG];
+
+		// grandParent point to C
+		if (A->parent_id != BVHCommon::INVALID) {
+			if (_nodes[A->parent_id].children[0] == iA)
+				_nodes[A->parent_id].children[0] = iC;
+
+			else
+				_nodes[A->parent_id].children[1] = iC;
+		} else {
+			// check this .. seems dodgy
+			change_root_node(iC);
+		}
+
+		// Swap A and C
+		C->children[0] = iA;
+		C->parent_id = A->parent_id;
+		A->parent_id = iC;
+
+		// Finish rotation
+		if (F->height > G->height) {
+			C->children[1] = iF;
+			A->children[1] = iG;
+			G->parent_id = iA;
+			A->aabb = _logic_abb_merge(B->aabb, G->aabb);
+			C->aabb = _logic_abb_merge(A->aabb, F->aabb);
+
+			A->height = 1 + MAX(B->height, G->height);
+			C->height = 1 + MAX(A->height, F->height);
+		}
+
+		else {
+			C->children[1] = iG;
+			A->children[1] = iF;
+			F->parent_id = iA;
+			A->aabb = _logic_abb_merge(B->aabb, F->aabb);
+			C->aabb = _logic_abb_merge(A->aabb, G->aabb);
+
+			A->height = 1 + MAX(B->height, F->height);
+			C->height = 1 + MAX(A->height, G->height);
+		}
+
+		return iC;
+	}
+
+	// B is higher, promote B
+	else if (balance < -1) {
+		int32_t iD = B->children[0];
+		int32_t iE = B->children[1];
+		TNode *D = &_nodes[iD];
+		TNode *E = &_nodes[iE];
+
+		// grandParent point to B
+		if (A->parent_id != BVHCommon::INVALID) {
+			if (_nodes[A->parent_id].children[0] == iA)
+				_nodes[A->parent_id].children[0] = iB;
+			else
+				_nodes[A->parent_id].children[1] = iB;
+		}
+
+		else {
+			// check this .. seems dodgy
+			change_root_node(iB);
+		}
+
+		// Swap A and B
+		B->children[1] = iA;
+		B->parent_id = A->parent_id;
+		A->parent_id = iB;
+
+		// Finish rotation
+		if (D->height > E->height) {
+			B->children[0] = iD;
+			A->children[0] = iE;
+			E->parent_id = iA;
+			A->aabb = _logic_abb_merge(C->aabb, E->aabb);
+			B->aabb = _logic_abb_merge(A->aabb, D->aabb);
+
+			A->height = 1 + MAX(C->height, E->height);
+			B->height = 1 + MAX(A->height, D->height);
+		}
+
+		else {
+			B->children[0] = iE;
+			A->children[0] = iD;
+			D->parent_id = iA;
+			A->aabb = _logic_abb_merge(C->aabb, D->aabb);
+			B->aabb = _logic_abb_merge(A->aabb, E->aabb);
+
+			A->height = 1 + MAX(C->height, D->height);
+			B->height = 1 + MAX(A->height, E->height);
+		}
+
+		return iB;
+	}
+
+	return iA;
+}
+
+// either choose an existing node to add item to, or create a new node and return this
+uint32_t _logic_choose_item_add_node(uint32_t p_node_id, const BVH_ABB &p_aabb) {
+
+	while (true) {
+		BVH_ASSERT(p_node_id != BVHCommon::INVALID);
+		TNode &tnode = _nodes[p_node_id];
+
+		if (tnode.is_leaf()) {
+			// if a leaf, and non full, use this to add to
+			if (!node_is_leaf_full(tnode))
+				return p_node_id;
+
+			// else split the leaf, and use one of the children to add to
+			return split_leaf(p_node_id, p_aabb);
+		}
+
+		// this should not happen???
+		// is still happening, need to debug and find circumstances. Is not that serious
+		// but would be nice to prevent. I think it only happens with the root node.
+		if (tnode.num_children == 1) {
+			WARN_PRINT_ONCE("BVH::recursive_choose_item_add_node, node with 1 child, recovering");
+			p_node_id = tnode.children[0];
+		} else {
+			BVH_ASSERT(tnode.num_children == 2);
+			TNode &childA = _nodes[tnode.children[0]];
+			TNode &childB = _nodes[tnode.children[1]];
+			int which = p_aabb.select_by_proximity(childA.aabb, childB.aabb);
+
+			p_node_id = tnode.children[which];
+		}
+	}
+}
diff --git a/core/math/bvh_misc.inc b/core/math/bvh_misc.inc
new file mode 100644
index 00000000000..b973f02cd9f
--- /dev/null
+++ b/core/math/bvh_misc.inc
@@ -0,0 +1,54 @@
+
+int _handle_get_tree_id(BVHHandle p_handle) const {
+	if (USE_PAIRS) {
+		int tree = 0;
+		if (_extra[p_handle.id()].pairable)
+			tree = 1;
+		return tree;
+	}
+	return 0;
+}
+
+public:
+void _handle_sort(BVHHandle &p_ha, BVHHandle &p_hb) const {
+	if (p_ha.id() > p_hb.id()) {
+		BVHHandle temp = p_hb;
+		p_hb = p_ha;
+		p_ha = temp;
+	}
+}
+
+private:
+void create_root_node(int p_tree) {
+	// if there is no root node, create one
+	if (_root_node_id[p_tree] == BVHCommon::INVALID) {
+		uint32_t root_node_id;
+		TNode *node = _nodes.request(root_node_id);
+		node->clear();
+		_root_node_id[p_tree] = root_node_id;
+
+		// make the root node a leaf
+		uint32_t leaf_id;
+		TLeaf *leaf = _leaves.request(leaf_id);
+		leaf->clear();
+		node->neg_leaf_id = -(int)leaf_id;
+	}
+}
+
+bool node_is_leaf_full(TNode &tnode) const {
+	const TLeaf &leaf = _node_get_leaf(tnode);
+	return leaf.is_full();
+}
+
+public:
+TLeaf &_node_get_leaf(TNode &tnode) {
+	BVH_ASSERT(tnode.is_leaf());
+	return _leaves[tnode.get_leaf_id()];
+}
+
+const TLeaf &_node_get_leaf(const TNode &tnode) const {
+	BVH_ASSERT(tnode.is_leaf());
+	return _leaves[tnode.get_leaf_id()];
+}
+
+private:
diff --git a/core/math/bvh_pair.inc b/core/math/bvh_pair.inc
new file mode 100644
index 00000000000..ce79857f088
--- /dev/null
+++ b/core/math/bvh_pair.inc
@@ -0,0 +1,64 @@
+public:
+// note .. maybe this can be attached to another node structure?
+// depends which works best for cache.
+struct ItemPairs {
+	struct Link {
+		void set(BVHHandle h, void *ud) {
+			handle = h;
+			userdata = ud;
+		}
+		BVHHandle handle;
+		void *userdata;
+	};
+
+	void clear() {
+		num_pairs = 0;
+		extended_pairs.reset();
+	}
+
+	AABB expanded_aabb;
+
+	// maybe we can just use the number in the vector TODO
+	int32_t num_pairs;
+	LocalVector<Link> extended_pairs;
+
+	void add_pair_to(BVHHandle h, void *p_userdata) {
+		Link temp;
+		temp.set(h, p_userdata);
+
+		extended_pairs.push_back(temp);
+		num_pairs++;
+	}
+
+	uint32_t find_pair_to(BVHHandle h) const {
+		for (int n = 0; n < num_pairs; n++) {
+			if (extended_pairs[n].handle == h) {
+				return n;
+			}
+		}
+		return -1;
+	}
+
+	bool contains_pair_to(BVHHandle h) const {
+		return find_pair_to(h) != BVHCommon::INVALID;
+	}
+
+	// return success
+	void *remove_pair_to(BVHHandle h) {
+		void *userdata = nullptr;
+
+		for (int n = 0; n < num_pairs; n++) {
+			if (extended_pairs[n].handle == h) {
+				userdata = extended_pairs[n].userdata;
+				extended_pairs.remove_unordered(n);
+				num_pairs--;
+				break;
+			}
+		}
+
+		return userdata;
+	}
+
+	void update() {
+	}
+};
diff --git a/core/math/bvh_public.inc b/core/math/bvh_public.inc
new file mode 100644
index 00000000000..bb293fa0e3e
--- /dev/null
+++ b/core/math/bvh_public.inc
@@ -0,0 +1,338 @@
+public:
+BVHHandle item_add(T *p_userdata, const AABB &p_aabb, int32_t p_subindex, bool p_pairable, uint32_t p_pairable_type, uint32_t p_pairable_mask, bool p_invisible = false) {
+#ifdef BVH_VERBOSE_TREE
+	VERBOSE_PRINT("\nitem_add BEFORE");
+	_recursive_print_tree();
+	VERBOSE_PRINT("\n");
+#endif
+
+	BVH_ABB abb;
+	abb.from(p_aabb);
+
+	// handle to be filled with the new item ref
+	BVHHandle handle;
+
+	// ref id easier to pass around than handle
+	uint32_t ref_id;
+
+	// this should never fail
+	ItemRef *ref = _refs.request(ref_id);
+
+	// the extra data should be parallel list to the references
+	uint32_t extra_id;
+	ItemExtra *extra = _extra.request(extra_id);
+	BVH_ASSERT(extra_id == ref_id);
+
+	// pairs info
+	if (USE_PAIRS) {
+		uint32_t pairs_id;
+		ItemPairs *pairs = _pairs.request(pairs_id);
+		pairs->clear();
+		BVH_ASSERT(pairs_id == ref_id);
+	}
+
+	extra->subindex = p_subindex;
+	extra->userdata = p_userdata;
+	extra->last_updated_tick = 0;
+
+	// add an active reference to the list for slow incremental optimize
+	// this list must be kept in sync with the references as they are added or removed.
+	extra->active_ref_id = _active_refs.size();
+	_active_refs.push_back(ref_id);
+
+	if (USE_PAIRS) {
+		extra->pairable_mask = p_pairable_mask;
+		extra->pairable_type = p_pairable_type;
+		extra->pairable = p_pairable;
+	} else {
+		// just for safety, in case this gets queried etc
+		extra->pairable = 0;
+		p_pairable = false;
+	}
+
+	// assign to handle to return
+	handle.set_id(ref_id);
+
+	_current_tree = 0;
+	if (p_pairable)
+		_current_tree = 1;
+
+	create_root_node(_current_tree);
+
+	// we must choose where to add to tree
+	ref->tnode_id = _logic_choose_item_add_node(_root_node_id[_current_tree], abb);
+
+	bool refit = _node_add_item(ref->tnode_id, ref_id, abb);
+
+	if (refit) {
+		// only need to refit from the parent
+		const TNode &add_node = _nodes[ref->tnode_id];
+		if (add_node.parent_id != BVHCommon::INVALID)
+			refit_upward_and_balance(add_node.parent_id);
+	}
+
+#ifdef BVH_VERBOSE
+	// memory use
+	int mem = _refs.estimate_memory_use();
+	mem += _nodes.estimate_memory_use();
+
+	String sz = _debug_aabb_to_string(abb);
+	VERBOSE_PRINT("\titem_add [" + itos(ref_id) + "] " + itos(_refs.size()) + " refs,\t" + itos(_nodes.size()) + " nodes " + sz);
+	VERBOSE_PRINT("mem use : " + itos(mem) + ", num nodes : " + itos(_nodes.size()));
+
+#endif
+
+	return handle;
+}
+
+void _debug_print_refs() {
+#ifdef BVH_VERBOSE_TREE
+	print_line("refs.....");
+	for (int n = 0; n < _refs.size(); n++) {
+		const ItemRef &ref = _refs[n];
+		print_line("tnode_id " + itos(ref.tnode_id) + ", item_id " + itos(ref.item_id));
+	}
+
+#endif
+}
+
+// returns false if noop
+bool item_move(BVHHandle p_handle, const AABB &p_aabb) {
+	uint32_t ref_id = p_handle.id();
+
+	BVH_ABB abb;
+	abb.from(p_aabb);
+
+	// get the reference
+	ItemRef &ref = _refs[ref_id];
+
+	BVH_ASSERT(ref.tnode_id != BVHCommon::INVALID);
+	TNode &tnode = _nodes[ref.tnode_id];
+
+	// does it fit within the current aabb?
+	if (tnode.aabb.is_other_within(abb)) {
+		// do nothing .. fast path .. not moved enough to need refit
+
+		// however we WILL update the exact aabb in the leaf, as this will be needed
+		// for accurate collision detection
+		TLeaf &leaf = _node_get_leaf(tnode);
+
+		leaf.get_aabb(ref.item_id) = abb;
+		_integrity_check_all();
+
+		return true;
+	}
+
+	_current_tree = _handle_get_tree_id(p_handle);
+
+	// remove and reinsert
+	node_remove_item(ref_id);
+
+	// we must choose where to add to tree
+	ref.tnode_id = _logic_choose_item_add_node(_root_node_id[_current_tree], abb);
+
+	// add to the tree
+	bool needs_refit = _node_add_item(ref.tnode_id, ref_id, abb);
+
+	// only need to refit from the PARENT
+	if (needs_refit) {
+		// only need to refit from the parent
+		const TNode &add_node = _nodes[ref.tnode_id];
+		if (add_node.parent_id != BVHCommon::INVALID)
+			// not sure we need to rebalance all the time, this can be done less often
+			refit_upward(add_node.parent_id);
+		//refit_upward_and_balance(add_node.parent_id);
+	}
+
+	return true;
+}
+
+void item_remove(BVHHandle p_handle) {
+	uint32_t ref_id = p_handle.id();
+
+	_current_tree = _handle_get_tree_id(p_handle);
+
+	VERBOSE_PRINT("item_remove [" + itos(ref_id) + "] ");
+
+	////////////////////////////////////////
+	// remove the active reference from the list for slow incremental optimize
+	// this list must be kept in sync with the references as they are added or removed.
+	uint32_t active_ref_id = _extra[ref_id].active_ref_id;
+	uint32_t ref_id_moved_back = _active_refs[_active_refs.size() - 1];
+
+	// swap back and decrement for fast unordered remove
+	_active_refs[active_ref_id] = ref_id_moved_back;
+	_active_refs.resize(_active_refs.size() - 1);
+
+	// keep the moved active reference up to date
+	_extra[ref_id_moved_back].active_ref_id = active_ref_id;
+	////////////////////////////////////////
+
+	// remove the item from the node
+	node_remove_item(ref_id);
+
+	// remove the item reference
+	_refs.free(ref_id);
+	_extra.free(ref_id);
+	if (USE_PAIRS) {
+		_pairs.free(ref_id);
+	}
+
+	// don't think refit_all is necessary?
+	//refit_all(_current_tree);
+
+#ifdef BVH_VERBOSE_TREE
+	_recursive_print_tree(tree_id);
+#endif
+}
+
+// during collision testing, we want to set the mask and whether pairable for the item testing from
+void item_fill_cullparams(BVHHandle p_handle, CullParams &r_params) const {
+	uint32_t ref_id = p_handle.id();
+	const ItemExtra &extra = _extra[ref_id];
+
+	// testing from a non pairable item, we only want to test pairable items
+	r_params.test_pairable_only = extra.pairable == 0;
+
+	// we take into account the mask of the item testing from
+	r_params.mask = extra.pairable_mask;
+}
+
+bool item_is_pairable(const BVHHandle &p_handle) {
+	uint32_t ref_id = p_handle.id();
+	const ItemExtra &extra = _extra[ref_id];
+	return extra.pairable != 0;
+}
+
+void item_get_ABB(const BVHHandle &p_handle, BVH_ABB &r_abb) {
+	// change tree?
+	uint32_t ref_id = p_handle.id();
+	const ItemRef &ref = _refs[ref_id];
+
+	TNode &tnode = _nodes[ref.tnode_id];
+	TLeaf &leaf = _node_get_leaf(tnode);
+
+	r_abb = leaf.get_aabb(ref.item_id);
+}
+
+void item_set_pairable(const BVHHandle &p_handle, bool p_pairable, uint32_t p_pairable_type, uint32_t p_pairable_mask) {
+	// change tree?
+	uint32_t ref_id = p_handle.id();
+
+	ItemExtra &ex = _extra[ref_id];
+	ItemRef &ref = _refs[ref_id];
+
+	ex.pairable_type = p_pairable_type;
+	ex.pairable_mask = p_pairable_mask;
+
+	if ((ex.pairable != 0) != p_pairable) {
+		// record abb
+		TNode &tnode = _nodes[ref.tnode_id];
+		TLeaf &leaf = _node_get_leaf(tnode);
+		BVH_ABB abb = leaf.get_aabb(ref.item_id);
+
+		// make sure current tree is correct prior to changing
+		_current_tree = _handle_get_tree_id(p_handle);
+
+		// remove from old tree
+		node_remove_item(ref_id);
+
+		ex.pairable = p_pairable;
+
+		// add to new tree
+		_current_tree = _handle_get_tree_id(p_handle);
+		create_root_node(_current_tree);
+
+		// we must choose where to add to tree
+		ref.tnode_id = _logic_choose_item_add_node(_root_node_id[_current_tree], abb);
+		bool needs_refit = _node_add_item(ref.tnode_id, ref_id, abb);
+
+		// only need to refit from the PARENT
+		if (needs_refit) {
+			// only need to refit from the parent
+			const TNode &add_node = _nodes[ref.tnode_id];
+			if (add_node.parent_id != BVHCommon::INVALID)
+				refit_upward_and_balance(add_node.parent_id);
+		}
+	}
+}
+
+void incremental_optimize() {
+	// first update all aabbs as one off step..
+	// this is cheaper than doing it on each move as each leaf may get touched multiple times
+	// in a frame.
+	for (int n = 0; n < NUM_TREES; n++) {
+		if (_root_node_id[n] != BVHCommon::INVALID)
+			refit_branch(_root_node_id[n]);
+	}
+
+	// now do small section reinserting to get things moving
+	// gradually, and keep items in the right leaf
+	if (_current_active_ref >= _active_refs.size()) {
+		_current_active_ref = 0;
+	}
+
+	// special case
+	if (!_active_refs.size())
+		return;
+
+	uint32_t ref_id = _active_refs[_current_active_ref++];
+
+	_logic_item_remove_and_reinsert(ref_id);
+
+#ifdef BVH_VERBOSE
+	// memory use
+	int mem_refs = _refs.estimate_memory_use();
+	int mem_nodes = _nodes.estimate_memory_use();
+	int mem_leaves = _leaves.estimate_memory_use();
+
+	String sz;
+	sz += "mem_refs : " + itos(mem_refs) + " ";
+	sz += "mem_nodes : " + itos(mem_nodes) + " ";
+	sz += "mem_leaves : " + itos(mem_leaves) + " ";
+	sz += ", num nodes : " + itos(_nodes.size());
+	print_line(sz);
+
+#endif
+}
+
+void update() {
+	incremental_optimize();
+
+	// keep the expansion values up to date with the world bound
+//#define BVH_ALLOW_AUTO_EXPANSION
+#ifdef BVH_ALLOW_AUTO_EXPANSION
+	if (_auto_node_expansion || _auto_pairing_expansion) {
+		BVH_ABB world_bound;
+		world_bound.set_to_max_opposite_extents();
+
+		bool bound_valid = false;
+
+		for (int n = 0; n < NUM_TREES; n++) {
+			uint32_t node_id = _root_node_id[n];
+			if (node_id != BVHCommon::INVALID) {
+				world_bound.merge(_nodes[node_id].aabb);
+				bound_valid = true;
+			}
+		}
+
+		// if there are no nodes, do nothing, but if there are...
+		if (bound_valid) {
+			AABB bb;
+			world_bound.to(bb);
+			real_t size = bb.get_longest_axis_size();
+
+			// automatic AI decision for best parameters.
+			// These can be overridden in project settings.
+
+			// these magic numbers are determined by experiment
+			if (_auto_node_expansion) {
+				_node_expansion = size * 0.025;
+			}
+			if (_auto_pairing_expansion) {
+				_pairing_expansion = size * 0.009;
+			}
+		}
+	}
+#endif
+}
diff --git a/core/math/bvh_refit.inc b/core/math/bvh_refit.inc
new file mode 100644
index 00000000000..b16bb0ab85e
--- /dev/null
+++ b/core/math/bvh_refit.inc
@@ -0,0 +1,142 @@
+void _debug_node_verify_bound(uint32_t p_node_id) {
+	TNode &node = _nodes[p_node_id];
+	BVH_ABB abb_before = node.aabb;
+
+	node_update_aabb(node);
+
+	BVH_ABB abb_after = node.aabb;
+	CRASH_COND(abb_before != abb_after);
+}
+
+void node_update_aabb(TNode &tnode) {
+	tnode.aabb.set_to_max_opposite_extents();
+	tnode.height = 0;
+
+	if (!tnode.is_leaf()) {
+		for (int n = 0; n < tnode.num_children; n++) {
+			uint32_t child_node_id = tnode.children[n];
+
+			// merge with child aabb
+			const TNode &tchild = _nodes[child_node_id];
+			tnode.aabb.merge(tchild.aabb);
+
+			// do heights at the same time
+			if (tchild.height > tnode.height)
+				tnode.height = tchild.height;
+		}
+
+		// the height of a non leaf is always 1 bigger than the biggest child
+		tnode.height++;
+
+#ifdef BVH_CHECKS
+		if (!tnode.num_children) {
+			// the 'blank' aabb will screw up parent aabbs
+			WARN_PRINT("BVH_Tree::TNode no children, AABB is undefined");
+		}
+#endif
+	} else {
+		// leaf
+		const TLeaf &leaf = _node_get_leaf(tnode);
+
+		for (int n = 0; n < leaf.num_items; n++) {
+			tnode.aabb.merge(leaf.get_aabb(n));
+		}
+
+		// now the leaf items are unexpanded, we expand only in the node AABB
+		tnode.aabb.expand(_node_expansion);
+#ifdef BVH_CHECKS
+		if (!leaf.num_items) {
+			// the 'blank' aabb will screw up parent aabbs
+			WARN_PRINT("BVH_Tree::TLeaf no items, AABB is undefined");
+		}
+#endif
+	}
+}
+
+void refit_all(int p_tree_id) {
+	refit_downward(_root_node_id[p_tree_id]);
+}
+
+void refit_upward(uint32_t p_node_id) {
+	while (p_node_id != BVHCommon::INVALID) {
+		TNode &tnode = _nodes[p_node_id];
+		node_update_aabb(tnode);
+		p_node_id = tnode.parent_id;
+	}
+}
+
+void refit_upward_and_balance(uint32_t p_node_id) {
+	while (p_node_id != BVHCommon::INVALID) {
+		uint32_t before = p_node_id;
+		p_node_id = _logic_balance(p_node_id);
+
+		if (before != p_node_id) {
+			VERBOSE_PRINT("REBALANCED!");
+		}
+
+		TNode &tnode = _nodes[p_node_id];
+
+		// update overall aabb from the children
+		node_update_aabb(tnode);
+
+		p_node_id = tnode.parent_id;
+	}
+}
+
+void refit_downward(uint32_t p_node_id) {
+	TNode &tnode = _nodes[p_node_id];
+
+	// do children first
+	if (!tnode.is_leaf()) {
+		for (int n = 0; n < tnode.num_children; n++) {
+			refit_downward(tnode.children[n]);
+		}
+	}
+
+	node_update_aabb(tnode);
+}
+
+// go down to the leaves, then refit upward
+void refit_branch(uint32_t p_node_id) {
+	// our function parameters to keep on a stack
+	struct RefitParams {
+		uint32_t node_id;
+	};
+
+	// most of the iterative functionality is contained in this helper class
+	BVH_IterativeInfo<RefitParams> ii;
+
+	// alloca must allocate the stack from this function, it cannot be allocated in the
+	// helper class
+	ii.stack = (RefitParams *)alloca(ii.get_alloca_stacksize());
+
+	// seed the stack
+	ii.get_first()->node_id = p_node_id;
+
+	RefitParams rp;
+
+	// while there are still more nodes on the stack
+	while (ii.pop(rp)) {
+
+		TNode &tnode = _nodes[rp.node_id];
+
+		// do children first
+		if (!tnode.is_leaf()) {
+			for (int n = 0; n < tnode.num_children; n++) {
+
+				uint32_t child_id = tnode.children[n];
+
+				// add to the stack
+				RefitParams *child = ii.request();
+				child->node_id = child_id;
+			}
+		} else {
+			// leaf .. only refit upward if dirty
+			TLeaf &leaf = _node_get_leaf(tnode);
+			if (leaf.is_dirty()) {
+				leaf.set_dirty(false);
+				refit_upward(p_node_id);
+			}
+		}
+	} // while more nodes to pop
+}
diff --git a/core/math/bvh_split.inc b/core/math/bvh_split.inc
new file mode 100644
index 00000000000..e6a63d24d70
--- /dev/null
+++ b/core/math/bvh_split.inc
@@ -0,0 +1,293 @@
+void _split_inform_references(uint32_t p_node_id) {
+	TNode &node = _nodes[p_node_id];
+	TLeaf &leaf = _node_get_leaf(node);
+
+	for (int n = 0; n < leaf.num_items; n++) {
+		uint32_t ref_id = leaf.get_item_ref_id(n);
+
+		ItemRef &ref = _refs[ref_id];
+		ref.tnode_id = p_node_id;
+		ref.item_id = n;
+	}
+}
+
+void _split_leaf_sort_groups_simple(int &num_a, int &num_b, uint16_t *group_a, uint16_t *group_b, const BVH_ABB *temp_bounds, const BVH_ABB full_bound) {
+	// special case for low leaf sizes .. should static compile out
+	if (MAX_ITEMS < 4) {
+		uint32_t ind = group_a[0];
+
+		// add to b
+		group_b[num_b++] = ind;
+
+		// remove from a
+		group_a[0] = group_a[num_a - 1];
+		num_a--;
+		return;
+	}
+
+	Vector3 centre = full_bound.calculate_centre();
+	Vector3 size = full_bound.calculate_size();
+
+	int order[3];
+
+	order[0] = size.min_axis();
+	order[2] = size.max_axis();
+	order[1] = 3 - (order[0] + order[2]);
+
+	// simplest case, split on the longest axis
+	int split_axis = order[0];
+	for (int a = 0; a < num_a; a++) {
+		uint32_t ind = group_a[a];
+
+		if (temp_bounds[ind].min.coord[split_axis] > centre.coord[split_axis]) {
+			// add to b
+			group_b[num_b++] = ind;
+
+			// remove from a
+			group_a[a] = group_a[num_a - 1];
+			num_a--;
+
+			// do this one again, as it has been replaced
+			a--;
+		}
+	}
+
+	// detect when split on longest axis failed
+	int min_threshold = MAX_ITEMS / 4;
+	int min_group_size[3];
+	min_group_size[0] = MIN(num_a, num_b);
+	if (min_group_size[0] < min_threshold) {
+		// slow but sure .. first move everything back into a
+		for (int b = 0; b < num_b; b++) {
+			group_a[num_a++] = group_b[b];
+		}
+		num_b = 0;
+
+		// now calculate the best split
+		for (int axis = 1; axis < 3; axis++) {
+			split_axis = order[axis];
+			int count = 0;
+
+			for (int a = 0; a < num_a; a++) {
+				uint32_t ind = group_a[a];
+
+				if (temp_bounds[ind].min.coord[split_axis] > centre.coord[split_axis]) {
+					count++;
+				}
+			}
+
+			min_group_size[axis] = MIN(count, num_a - count);
+		} // for axis
+
+		// best axis
+		int best_axis = 0;
+		int best_min = min_group_size[0];
+		for (int axis = 1; axis < 3; axis++) {
+			if (min_group_size[axis] > best_min) {
+				best_min = min_group_size[axis];
+				best_axis = axis;
+			}
+		}
+
+		// now finally do the split
+		if (best_min > 0) {
+			split_axis = order[best_axis];
+
+			for (int a = 0; a < num_a; a++) {
+				uint32_t ind = group_a[a];
+
+				if (temp_bounds[ind].min.coord[split_axis] > centre.coord[split_axis]) {
+					// add to b
+					group_b[num_b++] = ind;
+
+					// remove from a
+					group_a[a] = group_a[num_a - 1];
+					num_a--;
+
+					// do this one again, as it has been replaced
+					a--;
+				}
+			}
+		} // if there was a split!
+	} // if the longest axis wasn't a good split
+
+	// special case, none crossed threshold
+	if (!num_b) {
+		uint32_t ind = group_a[0];
+
+		// add to b
+		group_b[num_b++] = ind;
+
+		// remove from a
+		group_a[0] = group_a[num_a - 1];
+		num_a--;
+	}
+	// opposite problem! :)
+	if (!num_a) {
+		uint32_t ind = group_b[0];
+
+		// add to a
+		group_a[num_a++] = ind;
+
+		// remove from b
+		group_b[0] = group_b[num_b - 1];
+		num_b--;
+	}
+}
+
+void _split_leaf_sort_groups(int &num_a, int &num_b, uint16_t *group_a, uint16_t *group_b, const BVH_ABB *temp_bounds) {
+	BVH_ABB groupb_aabb;
+	groupb_aabb.set_to_max_opposite_extents();
+	for (int n = 0; n < num_b; n++) {
+		int which = group_b[n];
+		groupb_aabb.merge(temp_bounds[which]);
+	}
+	BVH_ABB groupb_aabb_new;
+
+	BVH_ABB rest_aabb;
+
+	float best_size = FLT_MAX;
+	int best_candidate = -1;
+
+	// find most likely from a to move into b
+	for (int check = 0; check < num_a; check++) {
+		rest_aabb.set_to_max_opposite_extents();
+		groupb_aabb_new = groupb_aabb;
+
+		// find aabb of all the rest
+		for (int rest = 0; rest < num_a; rest++) {
+			if (rest == check)
+				continue;
+
+			int which = group_a[rest];
+			rest_aabb.merge(temp_bounds[which]);
+		}
+
+		groupb_aabb_new.merge(temp_bounds[group_a[check]]);
+
+		// now compare the sizes
+		float size = groupb_aabb_new.get_area() + rest_aabb.get_area();
+		if (size < best_size) {
+			best_size = size;
+			best_candidate = check;
+		}
+	}
+
+	// we should now have the best, move it from group a to group b
+	group_b[num_b++] = group_a[best_candidate];
+
+	// remove best candidate from group a
+	num_a--;
+	group_a[best_candidate] = group_a[num_a];
+}
+
+uint32_t split_leaf(uint32_t p_node_id, const BVH_ABB &p_added_item_aabb) {
+	return split_leaf_complex(p_node_id, p_added_item_aabb);
+}
+
+// aabb is the new inserted node
+uint32_t split_leaf_complex(uint32_t p_node_id, const BVH_ABB &p_added_item_aabb) {
+	VERBOSE_PRINT("split_leaf");
+
+	// note the tnode before and AFTER splitting may be a different address
+	// in memory because the vector could get relocated. So we need to reget
+	// the tnode after the split
+	BVH_ASSERT(_nodes[p_node_id].is_leaf());
+
+	// first create child leaf nodes
+	uint32_t *child_ids = (uint32_t *)alloca(sizeof(uint32_t) * MAX_CHILDREN);
+
+	for (int n = 0; n < MAX_CHILDREN; n++) {
+		// create node children
+		TNode *child_node = _nodes.request(child_ids[n]);
+
+		child_node->clear();
+
+		// back link to parent
+		child_node->parent_id = p_node_id;
+
+		// make each child a leaf node
+		node_make_leaf(child_ids[n]);
+	}
+
+	// don't get any leaves or nodes till AFTER the split
+	TNode &tnode = _nodes[p_node_id];
+	uint32_t orig_leaf_id = tnode.get_leaf_id();
+	const TLeaf &orig_leaf = _node_get_leaf(tnode);
+
+	// store the final child ids
+	for (int n = 0; n < MAX_CHILDREN; n++) {
+		tnode.children[n] = child_ids[n];
+	}
+
+	// mark as no longer a leaf node
+	tnode.num_children = MAX_CHILDREN;
+
+	// 2 groups, A and B, and assign children to each to split equally
+	int max_children = orig_leaf.num_items + 1; // plus 1 for the wildcard .. the item being added
+	//CRASH_COND(max_children > MAX_CHILDREN);
+
+	uint16_t *group_a = (uint16_t *)alloca(sizeof(uint16_t) * max_children);
+	uint16_t *group_b = (uint16_t *)alloca(sizeof(uint16_t) * max_children);
+
+	// we are copying the ABBs. This is ugly, but we need one extra for the inserted item...
+	BVH_ABB *temp_bounds = (BVH_ABB *)alloca(sizeof(BVH_ABB) * max_children);
+
+	int num_a = max_children;
+	int num_b = 0;
+
+	// setup - start with all in group a
+	for (int n = 0; n < orig_leaf.num_items; n++) {
+		group_a[n] = n;
+		temp_bounds[n] = orig_leaf.get_aabb(n);
+	}
+	// wildcard
+	int wildcard = orig_leaf.num_items;
+
+	group_a[wildcard] = wildcard;
+	temp_bounds[wildcard] = p_added_item_aabb;
+
+	// we can choose here either an equal split, or just 1 in the new leaf
+	_split_leaf_sort_groups_simple(num_a, num_b, group_a, group_b, temp_bounds, tnode.aabb);
+
+	uint32_t wildcard_node = BVHCommon::INVALID;
+
+	// now there should be equal numbers in both groups
+	for (int n = 0; n < num_a; n++) {
+		int which = group_a[n];
+
+		if (which != wildcard) {
+			const BVH_ABB &source_item_aabb = orig_leaf.get_aabb(which);
+			uint32_t source_item_ref_id = orig_leaf.get_item_ref_id(which);
+			//const Item &source_item = orig_leaf.get_item(which);
+			_node_add_item(tnode.children[0], source_item_ref_id, source_item_aabb);
+		} else {
+			wildcard_node = tnode.children[0];
+		}
+	}
+	for (int n = 0; n < num_b; n++) {
+		int which = group_b[n];
+
+		if (which != wildcard) {
+			const BVH_ABB &source_item_aabb = orig_leaf.get_aabb(which);
+			uint32_t source_item_ref_id = orig_leaf.get_item_ref_id(which);
+			//const Item &source_item = orig_leaf.get_item(which);
+			_node_add_item(tnode.children[1], source_item_ref_id, source_item_aabb);
+		} else {
+			wildcard_node = tnode.children[1];
+		}
+	}
+
+	// now remove all items from the parent and replace with the child nodes
+	_leaves.free(orig_leaf_id);
+
+	// we should keep the references up to date!
+	for (int n = 0; n < MAX_CHILDREN; n++) {
+		_split_inform_references(tnode.children[n]);
+	}
+
+	refit_upward(p_node_id);
+
+	BVH_ASSERT(wildcard_node != BVHCommon::INVALID);
+	return wildcard_node;
+}
diff --git a/core/math/bvh_structs.inc b/core/math/bvh_structs.inc
new file mode 100644
index 00000000000..8dc8e5b3a88
--- /dev/null
+++ b/core/math/bvh_structs.inc
@@ -0,0 +1,174 @@
+
+public:
+struct ItemRef {
+	uint32_t tnode_id; // -1 is invalid
+	uint32_t item_id; // in the leaf
+};
+
+// extra info kept in separate parallel list to the references,
+// as this is less used as keeps cache better
+struct ItemExtra {
+	uint32_t last_updated_tick;
+	uint32_t pairable;
+	uint32_t pairable_mask;
+	uint32_t pairable_type;
+
+	int32_t subindex;
+
+	// the active reference is a separate list of which references
+	// are active so that we can slowly iterate through it over many frames for
+	// slow optimize.
+	uint32_t active_ref_id;
+
+	T *userdata;
+};
+
+// this is an item OR a child node depending on whether a leaf node
+struct Item {
+	BVH_ABB aabb;
+	uint32_t item_ref_id;
+};
+
+// tree leaf
+struct TLeaf {
+	uint16_t num_items;
+
+private:
+	uint16_t dirty;
+	// separate data orientated lists for faster SIMD traversal
+	uint32_t item_ref_ids[MAX_ITEMS];
+	BVH_ABB aabbs[MAX_ITEMS];
+
+public:
+	// accessors
+	BVH_ABB &get_aabb(uint32_t p_id) { return aabbs[p_id]; }
+	const BVH_ABB &get_aabb(uint32_t p_id) const { return aabbs[p_id]; }
+
+	uint32_t &get_item_ref_id(uint32_t p_id) { return item_ref_ids[p_id]; }
+	const uint32_t &get_item_ref_id(uint32_t p_id) const { return item_ref_ids[p_id]; }
+
+	bool is_dirty() const { return dirty; }
+	void set_dirty(bool p) { dirty = p; }
+
+	void clear() {
+		num_items = 0;
+		set_dirty(true);
+	}
+	bool is_full() const { return num_items >= MAX_ITEMS; }
+
+	void remove_item_unordered(uint32_t p_id) {
+		BVH_ASSERT(p_id < num_items);
+		num_items--;
+		aabbs[p_id] = aabbs[num_items];
+		item_ref_ids[p_id] = item_ref_ids[num_items];
+	}
+
+	uint32_t request_item() {
+		if (num_items < MAX_ITEMS) {
+			uint32_t id = num_items;
+			num_items++;
+			return id;
+		}
+		return -1;
+	}
+};
+
+// tree node
+struct TNode {
+	BVH_ABB aabb;
+	// either number of children if positive
+	// or leaf id if negative (leaf id 0 is disallowed)
+	union {
+		int32_t num_children;
+		int32_t neg_leaf_id;
+	};
+	uint32_t parent_id; // or -1
+	uint16_t children[MAX_CHILDREN];
+
+	// height in the tree, where leaves are 0, and all above are 1+
+	// (or the highest where there is a tie off)
+	int32_t height;
+
+	bool is_leaf() const { return num_children < 0; }
+	void set_leaf_id(int id) { neg_leaf_id = -id; }
+	int get_leaf_id() const { return -neg_leaf_id; }
+
+	void clear() {
+		num_children = 0;
+		parent_id = BVHCommon::INVALID;
+		height = 0; // or -1 for testing
+
+		// for safety set to improbable value
+		aabb.set_to_max_opposite_extents();
+
+		// other members are not blanked for speed .. they may be uninitialized
+	}
+
+	bool is_full_of_children() const { return num_children >= MAX_CHILDREN; }
+
+	void remove_child_internal(uint32_t child_num) {
+		children[child_num] = children[num_children - 1];
+		num_children--;
+	}
+
+	int find_child(uint32_t p_child_node_id) {
+		BVH_ASSERT(!is_leaf());
+
+		for (int n = 0; n < num_children; n++) {
+			if (children[n] == p_child_node_id)
+				return n;
+		}
+
+		// not found
+		return -1;
+	}
+};
+
+// instead of using linked list we maintain
+// item references (for quick lookup)
+PooledList<ItemRef, true> _refs;
+PooledList<ItemExtra, true> _extra;
+PooledList<ItemPairs> _pairs;
+
+// these 2 are not in sync .. nodes != leaves!
+PooledList<TNode, true> _nodes;
+PooledList<TLeaf, true> _leaves;
+
+// we can maintain an un-ordered list of which references are active,
+// in order to do a slow incremental optimize of the tree over each frame.
+// This will work best if dynamic objects and static objects are in a different tree.
+LocalVector<uint32_t, uint32_t, true> _active_refs;
+uint32_t _current_active_ref = 0;
+
+// instead of translating directly to the userdata output,
+// we keep an intermediate list of hits as reference IDs, which can be used
+// for pairing collision detection
+LocalVector<uint32_t, uint32_t, true> _cull_hits;
+
+// we now have multiple root nodes, allowing us to store
+// more than 1 tree. This can be more efficient, while sharing the same
+// common lists
+enum { NUM_TREES = 2,
+};
+
+// Tree 0 - Non pairable
+// Tree 1 - Pairable
+// This is more efficient because in physics we only need check non pairable against the pairable tree.
+uint32_t _root_node_id[NUM_TREES];
+int _current_tree = 0;
+
+// these values may need tweaking according to the project
+// the bound of the world, and the average velocities of the objects
+
+// node expansion is important in the rendering tree
+// larger values give less re-insertion as items move...
+// but on the other hand over estimates the bounding box of nodes.
+// we can either use auto mode, where the expansion is based on the root node size, or specify manually
+real_t _node_expansion = 0.5;
+bool _auto_node_expansion = true;
+
+// pairing expansion important for physics pairing
+// larger values gives more 'sticky' pairing, and is less likely to exhibit tunneling
+// we can either use auto mode, where the expansion is based on the root node size, or specify manually
+real_t _pairing_expansion = 0.1;
+bool _auto_pairing_expansion = true;
diff --git a/core/math/bvh_tree.h b/core/math/bvh_tree.h
new file mode 100644
index 00000000000..61766b6fbdc
--- /dev/null
+++ b/core/math/bvh_tree.h
@@ -0,0 +1,414 @@
+/*************************************************************************/
+/*  bvh_tree.h                                                           */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef BVH_TREE_H
+#define BVH_TREE_H
+
+// BVH Tree
+// This is an implementation of a dynamic BVH with templated leaf size.
+// This differs from most dynamic BVH in that it can handle more than 1 object
+// in leaf nodes. This can make it far more efficient in certain circumstances.
+// It also means that the splitting logic etc have to be completely different
+// to a simpler tree.
+// Note that MAX_CHILDREN should be fixed at 2 for now.
+
+#include "core/local_vector.h"
+#include "core/math/aabb.h"
+#include "core/math/bvh_abb.h"
+#include "core/math/geometry.h"
+#include "core/math/vector3.h"
+#include "core/pooled_list.h"
+#include "core/print_string.h"
+#include <limits.h>
+
+// never do these checks in release
+#if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
+//#define BVH_VERBOSE
+//#define BVH_VERBOSE_TREE
+
+//#define BVH_VERBOSE_FRAME
+//#define BVH_CHECKS
+//#define BVH_INTEGRITY_CHECKS
+#endif
+
+// debug only assert
+#ifdef BVH_CHECKS
+#define BVH_ASSERT(a) CRASH_COND((a) == false)
+#else
+#define BVH_ASSERT(a)
+#endif
+
+#ifdef BVH_VERBOSE
+#define VERBOSE_PRINT print_line
+#else
+#define VERBOSE_PRINT(a)
+#endif
+
+// really just a namespace
+struct BVHCommon {
+	static const uint32_t INVALID = (0xffffffff);
+};
+
+// really a handle, can be anything
+// note that zero is a valid reference for the BVH .. this may involve using
+// a plus one based ID for clients that expect 0 to be invalid.
+struct BVHHandle {
+	// conversion operator
+	operator uint32_t() const { return _data; }
+	void set(uint32_t p_value) { _data = p_value; }
+
+	uint32_t _data;
+
+	void set_invalid() { _data = BVHCommon::INVALID; }
+	bool is_invalid() const { return _data == BVHCommon::INVALID; }
+	uint32_t id() const { return _data; }
+	void set_id(uint32_t p_id) { _data = p_id; }
+
+	bool operator==(const BVHHandle &p_h) const { return _data == p_h._data; }
+	bool operator!=(const BVHHandle &p_h) const { return (*this == p_h) == false; }
+};
+
+// helper class to make iterative versions of recursive functions
+template <class T>
+class BVH_IterativeInfo {
+public:
+	enum {
+		ALLOCA_STACK_SIZE = 128
+	};
+
+	int32_t depth = 1;
+	int32_t threshold = ALLOCA_STACK_SIZE - 2;
+	T *stack;
+	//only used in rare occasions when you run out of alloca memory
+	// because tree is too unbalanced.
+	LocalVector<T> aux_stack;
+	int32_t get_alloca_stacksize() const { return ALLOCA_STACK_SIZE * sizeof(T); }
+
+	T *get_first() const {
+		return &stack[0];
+	}
+
+	// pop the last member of the stack, or return false
+	bool pop(T &r_value) {
+		if (!depth) {
+			return false;
+		}
+
+		depth--;
+		r_value = stack[depth];
+		return true;
+	}
+
+	// request new addition to stack
+	T *request() {
+		if (depth > threshold) {
+			if (aux_stack.empty()) {
+				aux_stack.resize(ALLOCA_STACK_SIZE * 2);
+				copymem(aux_stack.ptr(), stack, get_alloca_stacksize());
+			} else {
+				aux_stack.resize(aux_stack.size() * 2);
+			}
+			stack = aux_stack.ptr();
+			threshold = aux_stack.size() - 2;
+		}
+		return &stack[depth++];
+	}
+};
+
+template <class T, int MAX_CHILDREN, int MAX_ITEMS, bool USE_PAIRS = false>
+class BVH_Tree {
+	friend class BVH;
+
+#include "bvh_pair.inc"
+#include "bvh_structs.inc"
+
+public:
+	BVH_Tree() {
+		for (int n = 0; n < NUM_TREES; n++) {
+			_root_node_id[n] = BVHCommon::INVALID;
+		}
+
+		// disallow zero leaf ids
+		// (as these ids are stored as negative numbers in the node)
+		uint32_t dummy_leaf_id;
+		_leaves.request(dummy_leaf_id);
+	}
+
+private:
+	bool node_add_child(uint32_t p_node_id, uint32_t p_child_node_id) {
+		TNode &tnode = _nodes[p_node_id];
+		if (tnode.is_full_of_children())
+			return false;
+
+		tnode.children[tnode.num_children] = p_child_node_id;
+		tnode.num_children += 1;
+
+		// back link in the child to the parent
+		TNode &tnode_child = _nodes[p_child_node_id];
+		tnode_child.parent_id = p_node_id;
+
+		return true;
+	}
+
+	void node_replace_child(uint32_t p_parent_id, uint32_t p_old_child_id, uint32_t p_new_child_id) {
+		TNode &parent = _nodes[p_parent_id];
+		BVH_ASSERT(!parent.is_leaf());
+
+		int child_num = parent.find_child(p_old_child_id);
+		BVH_ASSERT(child_num != BVHCommon::INVALID);
+		parent.children[child_num] = p_new_child_id;
+
+		TNode &new_child = _nodes[p_new_child_id];
+		new_child.parent_id = p_parent_id;
+	}
+
+	void node_remove_child(uint32_t p_parent_id, uint32_t p_child_id, bool p_prevent_sibling = false) {
+		TNode &parent = _nodes[p_parent_id];
+		BVH_ASSERT(!parent.is_leaf());
+
+		int child_num = parent.find_child(p_child_id);
+		BVH_ASSERT(child_num != BVHCommon::INVALID);
+
+		parent.remove_child_internal(child_num);
+
+		// no need to keep back references for children at the moment
+
+		uint32_t sibling_id; // always a node id, as tnode is never a leaf
+		bool sibling_present = false;
+
+		// if there are more children, or this is the root node, don't try and delete
+		if (parent.num_children > 1) {
+			return;
+		}
+
+		// if there is 1 sibling, it can be moved to be a child of the
+		if (parent.num_children == 1) {
+			// else there is now a redundant node with one child, which can be removed
+			sibling_id = parent.children[0];
+			sibling_present = true;
+		}
+
+		// now there may be no children in this node .. in which case it can be deleted
+		// remove node if empty
+		// remove link from parent
+		uint32_t grandparent_id = parent.parent_id;
+
+		// special case for root node
+		if (grandparent_id == BVHCommon::INVALID) {
+			if (sibling_present) {
+				// change the root node
+				change_root_node(sibling_id);
+
+				// delete the old root node as no longer needed
+				_nodes.free(p_parent_id);
+			}
+
+			return;
+		}
+
+		if (sibling_present) {
+			node_replace_child(grandparent_id, p_parent_id, sibling_id);
+		} else {
+			node_remove_child(grandparent_id, p_parent_id, true);
+		}
+
+		// put the node on the free list to recycle
+		_nodes.free(p_parent_id);
+	}
+
+	// this relies on _current_tree being accurate
+	void change_root_node(uint32_t p_new_root_id) {
+		_root_node_id[_current_tree] = p_new_root_id;
+		TNode &root = _nodes[p_new_root_id];
+
+		// mark no parent
+		root.parent_id = BVHCommon::INVALID;
+	}
+
+	void node_make_leaf(uint32_t p_node_id) {
+		uint32_t child_leaf_id;
+		TLeaf *child_leaf = _leaves.request(child_leaf_id);
+		child_leaf->clear();
+
+		// zero is reserved at startup, to prevent this id being used
+		// (as they are stored as negative values in the node, and zero is already taken)
+		BVH_ASSERT(child_leaf_id != 0);
+
+		TNode &node = _nodes[p_node_id];
+		node.neg_leaf_id = -(int)child_leaf_id;
+	}
+
+	void node_remove_item(uint32_t p_ref_id, BVH_ABB *r_old_aabb = nullptr) {
+		// get the reference
+		ItemRef &ref = _refs[p_ref_id];
+		uint32_t owner_node_id = ref.tnode_id;
+
+		// debug draw special
+		// This may not be needed
+		if (owner_node_id == BVHCommon::INVALID)
+			return;
+
+		TNode &tnode = _nodes[owner_node_id];
+		CRASH_COND(!tnode.is_leaf());
+
+		TLeaf &leaf = _node_get_leaf(tnode);
+
+		// if the aabb is not determining the corner size, then there is no need to refit!
+		// (optimization, as merging AABBs takes a lot of time)
+		const BVH_ABB &old_aabb = leaf.get_aabb(ref.item_id);
+
+		// shrink a little to prevent using corner aabbs
+		// in order to miss the corners first we shrink by node_expansion
+		// (which is added to the overall bound of the leaf), then we also
+		// shrink by an epsilon, in order to miss out the very corner aabbs
+		// which are important in determining the bound. Any other aabb
+		// within this can be removed and not affect the overall bound.
+		BVH_ABB node_bound = tnode.aabb;
+		node_bound.expand(-_node_expansion - 0.001f);
+		bool refit = true;
+
+		if (node_bound.is_other_within(old_aabb)) {
+			refit = false;
+		}
+
+		// record the old aabb if required (for incremental remove_and_reinsert)
+		if (r_old_aabb) {
+			*r_old_aabb = old_aabb;
+		}
+
+		leaf.remove_item_unordered(ref.item_id);
+
+		if (leaf.num_items) {
+			// the swapped item has to have its reference changed to, to point to the new item id
+			uint32_t swapped_ref_id = leaf.get_item_ref_id(ref.item_id);
+
+			ItemRef &swapped_ref = _refs[swapped_ref_id];
+
+			swapped_ref.item_id = ref.item_id;
+
+			// only have to refit if it is an edge item
+			// This is a VERY EXPENSIVE STEP
+			// we defer the refit updates until the update function is called once per frame
+			if (refit) {
+				leaf.set_dirty(true);
+			}
+		} else {
+			// remove node if empty
+			// remove link from parent
+			if (tnode.parent_id != BVHCommon::INVALID) {
+				// DANGER .. this can potentially end up with root node with 1 child ...
+				// we don't want this and must check for it
+
+				uint32_t parent_id = tnode.parent_id;
+
+				node_remove_child(parent_id, owner_node_id);
+				refit_upward(parent_id);
+
+				// put the node on the free list to recycle
+				_nodes.free(owner_node_id);
+			}
+
+			// else if no parent, it is the root node. Do not delete
+		}
+
+		ref.tnode_id = BVHCommon::INVALID;
+		ref.item_id = BVHCommon::INVALID; // unset
+	}
+
+	// returns true if needs refit of PARENT tree only, the node itself AABB is calculated
+	// within this routine
+	bool _node_add_item(uint32_t p_node_id, uint32_t p_ref_id, const BVH_ABB &p_aabb) {
+		ItemRef &ref = _refs[p_ref_id];
+		ref.tnode_id = p_node_id;
+
+		TNode &node = _nodes[p_node_id];
+		BVH_ASSERT(node.is_leaf());
+		TLeaf &leaf = _node_get_leaf(node);
+
+		// optimization - we only need to do a refit
+		// if the added item is changing the AABB of the node.
+		// in most cases it won't.
+		bool needs_refit = true;
+
+		// expand bound now
+		BVH_ABB expanded = p_aabb;
+		expanded.expand(_node_expansion);
+
+		// the bound will only be valid if there is an item in there already
+		if (leaf.num_items) {
+			if (node.aabb.is_other_within(expanded)) {
+				// no change to node AABBs
+				needs_refit = false;
+			} else {
+				node.aabb.merge(expanded);
+			}
+		} else {
+			// bound of the node = the new aabb
+			node.aabb = expanded;
+		}
+
+		ref.item_id = leaf.request_item();
+		BVH_ASSERT(ref.item_id != BVHCommon::INVALID);
+
+		// set the aabb of the new item
+		leaf.get_aabb(ref.item_id) = p_aabb;
+
+		// back reference on the item back to the item reference
+		leaf.get_item_ref_id(ref.item_id) = p_ref_id;
+
+		return needs_refit;
+	}
+
+	uint32_t _node_create_another_child(uint32_t p_node_id, const BVH_ABB &p_aabb) {
+		uint32_t child_node_id;
+		TNode *child_node = _nodes.request(child_node_id);
+		child_node->clear();
+
+		// may not be necessary
+		child_node->aabb = p_aabb;
+
+		node_add_child(p_node_id, child_node_id);
+
+		return child_node_id;
+	}
+
+#include "bvh_cull.inc"
+#include "bvh_debug.inc"
+#include "bvh_integrity.inc"
+#include "bvh_logic.inc"
+#include "bvh_misc.inc"
+#include "bvh_public.inc"
+#include "bvh_refit.inc"
+#include "bvh_split.inc"
+};
+
+#undef VERBOSE_PRINT
+
+#endif // BVH_TREE_H
diff --git a/core/math/octree_definition.inc b/core/math/octree_definition.inc
index 27982666894..8c94052be8b 100644
--- a/core/math/octree_definition.inc
+++ b/core/math/octree_definition.inc
@@ -37,6 +37,7 @@
 #include "core/math/aabb.h"
 #include "core/math/geometry.h"
 #include "core/math/vector3.h"
+#include "core/os/os.h"
 #include "core/print_string.h"
 #include "core/variant.h"
 
diff --git a/core/pooled_list.h b/core/pooled_list.h
new file mode 100644
index 00000000000..a57bfd68fac
--- /dev/null
+++ b/core/pooled_list.h
@@ -0,0 +1,95 @@
+/*************************************************************************/
+/*  pooled_list.h                                                        */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#pragma once
+
+// Simple template to provide a pool with O(1) allocate and free.
+// The freelist could alternatively be a linked list placed within the unused elements
+// to use less memory, however a separate freelist is probably more cache friendly.
+
+// NOTE : Take great care when using this with non POD types. The construction and destruction
+// is done in the LocalVector, NOT as part of the pool. So requesting a new item does not guarantee
+// a constructor is run, and free does not guarantee a destructor.
+// You should generally handle clearing
+// an item explicitly after a request, as it may contain 'leftovers'.
+// This is by design for fastest use in the BVH. If you want a more general pool
+// that does call constructors / destructors on request / free, this should probably be
+// a separate template.
+
+#include "core/local_vector.h"
+
+template <class T, bool force_trivial = false>
+class PooledList {
+	LocalVector<T, uint32_t, force_trivial> list;
+	LocalVector<uint32_t, uint32_t, true> freelist;
+
+	// not all list members are necessarily used
+	int _used_size;
+
+public:
+	PooledList() {
+		_used_size = 0;
+	}
+
+	int estimate_memory_use() const {
+		return (list.size() * sizeof(T)) + (freelist.size() * sizeof(uint32_t));
+	}
+
+	const T &operator[](uint32_t p_index) const {
+		return list[p_index];
+	}
+	T &operator[](uint32_t p_index) {
+		return list[p_index];
+	}
+
+	int size() const { return _used_size; }
+
+	T *request(uint32_t &r_id) {
+		_used_size++;
+
+		if (freelist.size()) {
+			// pop from freelist
+			int new_size = freelist.size() - 1;
+			r_id = freelist[new_size];
+			freelist.resize(new_size);
+			return &list[r_id];
+		}
+
+		r_id = list.size();
+		list.resize(r_id + 1);
+		return &list[r_id];
+	}
+	void free(const uint32_t &p_id) {
+		// should not be on free list already
+		CRASH_COND(p_id >= list.size());
+		freelist.push_back(p_id);
+		_used_size--;
+	}
+};
diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml
index 473210e55e6..af3aa51271a 100644
--- a/doc/classes/ProjectSettings.xml
+++ b/doc/classes/ProjectSettings.xml
@@ -996,6 +996,9 @@
 			The default linear damp in 3D.
 			[b]Note:[/b] Good values are in the range [code]0[/code] to [code]1[/code]. At value [code]0[/code] objects will keep moving with the same velocity. Values greater than [code]1[/code] will aim to reduce the velocity to [code]0[/code] in less than a second e.g. a value of [code]2[/code] will aim to reduce the velocity to [code]0[/code] in half a second. A value equal to or greater than the physics frame rate ([member ProjectSettings.physics/common/physics_fps], [code]60[/code] by default) will bring the object to a stop in one iteration.
 		</member>
+		<member name="physics/3d/godot_physics/use_bvh" type="bool" setter="" getter="" default="false">
+			Enables the use of bounding volume hierarchy instead of octree for physics spatial partitioning. This may give better performance.
+		</member>
 		<member name="physics/3d/physics_engine" type="String" setter="" getter="" default="&quot;DEFAULT&quot;">
 			Sets which physics engine to use for 3D physics.
 			"DEFAULT" is currently the [url=https://bulletphysics.org]Bullet[/url] physics engine. The "GodotPhysics" engine is still supported as an alternative.
@@ -1244,6 +1247,9 @@
 			The rendering octree balance can be changed to favor smaller ([code]0[/code]), or larger ([code]1[/code]) branches.
 			Larger branches can increase performance significantly in some projects.
 		</member>
+		<member name="rendering/quality/spatial_partitioning/use_bvh" type="bool" setter="" getter="" default="false">
+			Enables the use of bounding volume hierarchy instead of octree for rendering spatial partitioning. This may give better performance.
+		</member>
 		<member name="rendering/quality/subsurface_scattering/follow_surface" type="bool" setter="" getter="" default="false">
 			Improves quality of subsurface scattering, but cost significantly increases.
 		</member>
diff --git a/scene/resources/world.cpp b/scene/resources/world.cpp
index 1099852098f..972bbb2485a 100644
--- a/scene/resources/world.cpp
+++ b/scene/resources/world.cpp
@@ -335,6 +335,10 @@ void World::_bind_methods() {
 
 World::World() {
 
+	// These defaults must be created BEFORE creating the scenario, because the BVH reads
+	// the defaults at that point.
+	GLOBAL_DEF("physics/3d/godot_physics/use_bvh", true);
+
 	space = PhysicsServer::get_singleton()->space_create();
 	scenario = VisualServer::get_singleton()->scenario_create();
 
diff --git a/servers/physics/broad_phase_bvh.cpp b/servers/physics/broad_phase_bvh.cpp
new file mode 100644
index 00000000000..46db8b45dab
--- /dev/null
+++ b/servers/physics/broad_phase_bvh.cpp
@@ -0,0 +1,130 @@
+/*************************************************************************/
+/*  broad_phase_bvh.cpp                                                  */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "broad_phase_bvh.h"
+#include "collision_object_sw.h"
+#include "core/project_settings.h"
+
+BroadPhaseSW::ID BroadPhaseBVH::create(CollisionObjectSW *p_object, int p_subindex) {
+
+	ID oid = bvh.create(p_object, AABB(), p_subindex, false, 1 << p_object->get_type(), 0);
+	return oid;
+}
+
+void BroadPhaseBVH::move(ID p_id, const AABB &p_aabb) {
+
+	bvh.move(p_id, p_aabb);
+}
+
+void BroadPhaseBVH::set_static(ID p_id, bool p_static) {
+
+	CollisionObjectSW *it = bvh.get(p_id);
+	bvh.set_pairable(p_id, !p_static, 1 << it->get_type(), p_static ? 0 : 0xFFFFF); //pair everything, don't care 1?
+}
+void BroadPhaseBVH::remove(ID p_id) {
+
+	bvh.erase(p_id);
+}
+
+CollisionObjectSW *BroadPhaseBVH::get_object(ID p_id) const {
+
+	CollisionObjectSW *it = bvh.get(p_id);
+	ERR_FAIL_COND_V(!it, NULL);
+	return it;
+}
+bool BroadPhaseBVH::is_static(ID p_id) const {
+
+	return !bvh.is_pairable(p_id);
+}
+int BroadPhaseBVH::get_subindex(ID p_id) const {
+
+	return bvh.get_subindex(p_id);
+}
+
+int BroadPhaseBVH::cull_point(const Vector3 &p_point, CollisionObjectSW **p_results, int p_max_results, int *p_result_indices) {
+
+	return bvh.cull_point(p_point, p_results, p_max_results, p_result_indices);
+}
+
+int BroadPhaseBVH::cull_segment(const Vector3 &p_from, const Vector3 &p_to, CollisionObjectSW **p_results, int p_max_results, int *p_result_indices) {
+
+	return bvh.cull_segment(p_from, p_to, p_results, p_max_results, p_result_indices);
+}
+
+int BroadPhaseBVH::cull_aabb(const AABB &p_aabb, CollisionObjectSW **p_results, int p_max_results, int *p_result_indices) {
+
+	return bvh.cull_aabb(p_aabb, p_results, p_max_results, p_result_indices);
+}
+
+void *BroadPhaseBVH::_pair_callback(void *self, uint32_t p_A, CollisionObjectSW *p_object_A, int subindex_A, uint32_t p_B, CollisionObjectSW *p_object_B, int subindex_B) {
+
+	BroadPhaseBVH *bpo = (BroadPhaseBVH *)(self);
+	if (!bpo->pair_callback)
+		return NULL;
+
+	return bpo->pair_callback(p_object_A, subindex_A, p_object_B, subindex_B, bpo->pair_userdata);
+}
+
+void BroadPhaseBVH::_unpair_callback(void *self, uint32_t p_A, CollisionObjectSW *p_object_A, int subindex_A, uint32_t p_B, CollisionObjectSW *p_object_B, int subindex_B, void *pairdata) {
+
+	BroadPhaseBVH *bpo = (BroadPhaseBVH *)(self);
+	if (!bpo->unpair_callback)
+		return;
+
+	bpo->unpair_callback(p_object_A, subindex_A, p_object_B, subindex_B, pairdata, bpo->unpair_userdata);
+}
+
+void BroadPhaseBVH::set_pair_callback(PairCallback p_pair_callback, void *p_userdata) {
+
+	pair_callback = p_pair_callback;
+	pair_userdata = p_userdata;
+}
+void BroadPhaseBVH::set_unpair_callback(UnpairCallback p_unpair_callback, void *p_userdata) {
+
+	unpair_callback = p_unpair_callback;
+	unpair_userdata = p_userdata;
+}
+
+void BroadPhaseBVH::update() {
+	bvh.update();
+}
+
+BroadPhaseSW *BroadPhaseBVH::_create() {
+
+	return memnew(BroadPhaseBVH);
+}
+
+BroadPhaseBVH::BroadPhaseBVH() {
+	bvh.set_pair_callback(_pair_callback, this);
+	bvh.set_unpair_callback(_unpair_callback, this);
+	pair_callback = NULL;
+	pair_userdata = NULL;
+	unpair_userdata = NULL;
+}
diff --git a/servers/physics/broad_phase_bvh.h b/servers/physics/broad_phase_bvh.h
new file mode 100644
index 00000000000..72defd714a1
--- /dev/null
+++ b/servers/physics/broad_phase_bvh.h
@@ -0,0 +1,73 @@
+/*************************************************************************/
+/*  broad_phase_bvh.h                                                    */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef BROAD_PHASE_BVH_H
+#define BROAD_PHASE_BVH_H
+
+#include "broad_phase_sw.h"
+#include "core/math/bvh.h"
+
+class BroadPhaseBVH : public BroadPhaseSW {
+
+	BVH_Manager<CollisionObjectSW, true, 128> bvh;
+
+	static void *_pair_callback(void *, uint32_t, CollisionObjectSW *, int, uint32_t, CollisionObjectSW *, int);
+	static void _unpair_callback(void *, uint32_t, CollisionObjectSW *, int, uint32_t, CollisionObjectSW *, int, void *);
+
+	PairCallback pair_callback;
+	void *pair_userdata;
+	UnpairCallback unpair_callback;
+	void *unpair_userdata;
+
+public:
+	// 0 is an invalid ID
+	virtual ID create(CollisionObjectSW *p_object, int p_subindex = 0);
+	virtual void move(ID p_id, const AABB &p_aabb);
+	virtual void set_static(ID p_id, bool p_static);
+	virtual void remove(ID p_id);
+
+	virtual CollisionObjectSW *get_object(ID p_id) const;
+	virtual bool is_static(ID p_id) const;
+	virtual int get_subindex(ID p_id) const;
+
+	virtual int cull_point(const Vector3 &p_point, CollisionObjectSW **p_results, int p_max_results, int *p_result_indices = NULL);
+	virtual int cull_segment(const Vector3 &p_from, const Vector3 &p_to, CollisionObjectSW **p_results, int p_max_results, int *p_result_indices = NULL);
+	virtual int cull_aabb(const AABB &p_aabb, CollisionObjectSW **p_results, int p_max_results, int *p_result_indices = NULL);
+
+	virtual void set_pair_callback(PairCallback p_pair_callback, void *p_userdata);
+	virtual void set_unpair_callback(UnpairCallback p_unpair_callback, void *p_userdata);
+
+	virtual void update();
+
+	static BroadPhaseSW *_create();
+	BroadPhaseBVH();
+};
+
+#endif // BROAD_PHASE_BVH_H
diff --git a/servers/physics/physics_server_sw.cpp b/servers/physics/physics_server_sw.cpp
index bd6463d357c..9c47404627a 100644
--- a/servers/physics/physics_server_sw.cpp
+++ b/servers/physics/physics_server_sw.cpp
@@ -31,8 +31,10 @@
 #include "physics_server_sw.h"
 
 #include "broad_phase_basic.h"
+#include "broad_phase_bvh.h"
 #include "broad_phase_octree.h"
 #include "core/os/os.h"
+#include "core/project_settings.h"
 #include "core/script_language.h"
 #include "joints/cone_twist_joint_sw.h"
 #include "joints/generic_6dof_joint_sw.h"
@@ -1565,7 +1567,15 @@ void PhysicsServerSW::_shape_col_cbk(const Vector3 &p_point_A, const Vector3 &p_
 PhysicsServerSW *PhysicsServerSW::singleton = NULL;
 PhysicsServerSW::PhysicsServerSW() {
 	singleton = this;
-	BroadPhaseSW::create_func = BroadPhaseOctree::_create;
+
+	bool use_bvh_or_octree = GLOBAL_GET("physics/3d/godot_physics/use_bvh");
+
+	if (use_bvh_or_octree) {
+		BroadPhaseSW::create_func = BroadPhaseBVH::_create;
+	} else {
+		BroadPhaseSW::create_func = BroadPhaseOctree::_create;
+	}
+
 	island_count = 0;
 	active_objects = 0;
 	collision_pairs = 0;
diff --git a/servers/visual/visual_server_scene.cpp b/servers/visual/visual_server_scene.cpp
index 0c268a928c2..103797da10d 100644
--- a/servers/visual/visual_server_scene.cpp
+++ b/servers/visual/visual_server_scene.cpp
@@ -103,9 +103,104 @@ void VisualServerScene::camera_set_use_vertical_aspect(RID p_camera, bool p_enab
 	camera->vaspect = p_enable;
 }
 
+/* SPATIAL PARTITIONING */
+VisualServerScene::SpatialPartitionID VisualServerScene::SpatialPartitioningScene_BVH::create(Instance *p_userdata, const AABB &p_aabb, int p_subindex, bool p_pairable, uint32_t p_pairable_type, uint32_t p_pairable_mask) {
+	return _bvh.create(p_userdata, p_aabb, p_subindex, p_pairable, p_pairable_type, p_pairable_mask) + 1;
+}
+
+void VisualServerScene::SpatialPartitioningScene_BVH::erase(SpatialPartitionID p_handle) {
+	_bvh.erase(p_handle - 1);
+}
+
+void VisualServerScene::SpatialPartitioningScene_BVH::move(SpatialPartitionID p_handle, const AABB &p_aabb) {
+	_bvh.move(p_handle - 1, p_aabb);
+}
+
+void VisualServerScene::SpatialPartitioningScene_BVH::update() {
+	_bvh.update();
+}
+
+void VisualServerScene::SpatialPartitioningScene_BVH::set_pairable(SpatialPartitionID p_handle, bool p_pairable, uint32_t p_pairable_type, uint32_t p_pairable_mask) {
+	_bvh.set_pairable(p_handle - 1, p_pairable, p_pairable_type, p_pairable_mask);
+}
+
+int VisualServerScene::SpatialPartitioningScene_BVH::cull_convex(const Vector<Plane> &p_convex, Instance **p_result_array, int p_result_max, uint32_t p_mask) {
+	return _bvh.cull_convex(p_convex, p_result_array, p_result_max, p_mask);
+}
+
+int VisualServerScene::SpatialPartitioningScene_BVH::cull_aabb(const AABB &p_aabb, Instance **p_result_array, int p_result_max, int *p_subindex_array, uint32_t p_mask) {
+	return _bvh.cull_aabb(p_aabb, p_result_array, p_result_max, p_subindex_array, p_mask);
+}
+
+int VisualServerScene::SpatialPartitioningScene_BVH::cull_segment(const Vector3 &p_from, const Vector3 &p_to, Instance **p_result_array, int p_result_max, int *p_subindex_array, uint32_t p_mask) {
+	return _bvh.cull_segment(p_from, p_to, p_result_array, p_result_max, p_subindex_array, p_mask);
+}
+
+void VisualServerScene::SpatialPartitioningScene_BVH::set_pair_callback(PairCallback p_callback, void *p_userdata) {
+	_bvh.set_pair_callback(p_callback, p_userdata);
+}
+
+void VisualServerScene::SpatialPartitioningScene_BVH::set_unpair_callback(UnpairCallback p_callback, void *p_userdata) {
+	_bvh.set_unpair_callback(p_callback, p_userdata);
+}
+
+///////////////////////
+
+VisualServerScene::SpatialPartitionID VisualServerScene::SpatialPartitioningScene_Octree::create(Instance *p_userdata, const AABB &p_aabb, int p_subindex, bool p_pairable, uint32_t p_pairable_type, uint32_t p_pairable_mask) {
+	return _octree.create(p_userdata, p_aabb, p_subindex, p_pairable, p_pairable_type, p_pairable_mask);
+}
+
+void VisualServerScene::SpatialPartitioningScene_Octree::erase(SpatialPartitionID p_handle) {
+	_octree.erase(p_handle);
+}
+
+void VisualServerScene::SpatialPartitioningScene_Octree::move(SpatialPartitionID p_handle, const AABB &p_aabb) {
+	_octree.move(p_handle, p_aabb);
+}
+
+void VisualServerScene::SpatialPartitioningScene_Octree::set_pairable(SpatialPartitionID p_handle, bool p_pairable, uint32_t p_pairable_type, uint32_t p_pairable_mask) {
+	_octree.set_pairable(p_handle, p_pairable, p_pairable_type, p_pairable_mask);
+}
+
+int VisualServerScene::SpatialPartitioningScene_Octree::cull_convex(const Vector<Plane> &p_convex, Instance **p_result_array, int p_result_max, uint32_t p_mask) {
+	return _octree.cull_convex(p_convex, p_result_array, p_result_max, p_mask);
+}
+
+int VisualServerScene::SpatialPartitioningScene_Octree::cull_aabb(const AABB &p_aabb, Instance **p_result_array, int p_result_max, int *p_subindex_array, uint32_t p_mask) {
+	return _octree.cull_aabb(p_aabb, p_result_array, p_result_max, p_subindex_array, p_mask);
+}
+
+int VisualServerScene::SpatialPartitioningScene_Octree::cull_segment(const Vector3 &p_from, const Vector3 &p_to, Instance **p_result_array, int p_result_max, int *p_subindex_array, uint32_t p_mask) {
+	return _octree.cull_segment(p_from, p_to, p_result_array, p_result_max, p_subindex_array, p_mask);
+}
+
+void VisualServerScene::SpatialPartitioningScene_Octree::set_pair_callback(PairCallback p_callback, void *p_userdata) {
+	_octree.set_pair_callback(p_callback, p_userdata);
+}
+
+void VisualServerScene::SpatialPartitioningScene_Octree::set_unpair_callback(UnpairCallback p_callback, void *p_userdata) {
+	_octree.set_unpair_callback(p_callback, p_userdata);
+}
+
+void VisualServerScene::SpatialPartitioningScene_Octree::set_balance(float p_balance) {
+	_octree.set_balance(p_balance);
+}
+
 /* SCENARIO API */
 
-void *VisualServerScene::_instance_pair(void *p_self, OctreeElementID, Instance *p_A, int, OctreeElementID, Instance *p_B, int) {
+VisualServerScene::Scenario::Scenario() {
+	debug = VS::SCENARIO_DEBUG_DISABLED;
+
+	bool use_bvh_or_octree = GLOBAL_DEF("rendering/quality/spatial_partitioning/use_bvh", true);
+
+	if (use_bvh_or_octree) {
+		sps = memnew(SpatialPartitioningScene_BVH);
+	} else {
+		sps = memnew(SpatialPartitioningScene_Octree);
+	}
+}
+
+void *VisualServerScene::_instance_pair(void *p_self, SpatialPartitionID, Instance *p_A, int, SpatialPartitionID, Instance *p_B, int) {
 
 	//VisualServerScene *self = (VisualServerScene*)p_self;
 	Instance *A = p_A;
@@ -184,7 +279,8 @@ void *VisualServerScene::_instance_pair(void *p_self, OctreeElementID, Instance
 
 	return NULL;
 }
-void VisualServerScene::_instance_unpair(void *p_self, OctreeElementID, Instance *p_A, int, OctreeElementID, Instance *p_B, int, void *udata) {
+
+void VisualServerScene::_instance_unpair(void *p_self, SpatialPartitionID, Instance *p_A, int, SpatialPartitionID, Instance *p_B, int, void *udata) {
 
 	//VisualServerScene *self = (VisualServerScene*)p_self;
 	Instance *A = p_A;
@@ -260,9 +356,10 @@ RID VisualServerScene::scenario_create() {
 	RID scenario_rid = scenario_owner.make_rid(scenario);
 	scenario->self = scenario_rid;
 
-	scenario->octree.set_balance(GLOBAL_GET("rendering/quality/spatial_partitioning/render_tree_balance"));
-	scenario->octree.set_pair_callback(_instance_pair, this);
-	scenario->octree.set_unpair_callback(_instance_unpair, this);
+	scenario->sps->set_balance(GLOBAL_GET("rendering/quality/spatial_partitioning/render_tree_balance"));
+	scenario->sps->set_pair_callback(_instance_pair, this);
+	scenario->sps->set_unpair_callback(_instance_unpair, this);
+
 	scenario->reflection_probe_shadow_atlas = VSG::scene_render->shadow_atlas_create();
 	VSG::scene_render->shadow_atlas_set_size(scenario->reflection_probe_shadow_atlas, 1024); //make enough shadows for close distance, don't bother with rest
 	VSG::scene_render->shadow_atlas_set_quadrant_subdivision(scenario->reflection_probe_shadow_atlas, 0, 4);
@@ -358,9 +455,9 @@ void VisualServerScene::instance_set_base(RID p_instance, RID p_base) {
 			}
 		}
 
-		if (scenario && instance->octree_id) {
-			scenario->octree.erase(instance->octree_id); //make dependencies generated by the octree go away
-			instance->octree_id = 0;
+		if (scenario && instance->spatial_partition_id) {
+			scenario->sps->erase(instance->spatial_partition_id);
+			instance->spatial_partition_id = 0;
 		}
 
 		switch (instance->base_type) {
@@ -511,9 +608,9 @@ void VisualServerScene::instance_set_scenario(RID p_instance, RID p_scenario) {
 
 		instance->scenario->instances.remove(&instance->scenario_item);
 
-		if (instance->octree_id) {
-			instance->scenario->octree.erase(instance->octree_id); //make dependencies generated by the octree go away
-			instance->octree_id = 0;
+		if (instance->spatial_partition_id) {
+			instance->scenario->sps->erase(instance->spatial_partition_id);
+			instance->spatial_partition_id = 0;
 		}
 
 		switch (instance->base_type) {
@@ -677,26 +774,26 @@ void VisualServerScene::instance_set_visible(RID p_instance, bool p_visible) {
 
 	switch (instance->base_type) {
 		case VS::INSTANCE_LIGHT: {
-			if (VSG::storage->light_get_type(instance->base) != VS::LIGHT_DIRECTIONAL && instance->octree_id && instance->scenario) {
-				instance->scenario->octree.set_pairable(instance->octree_id, p_visible, 1 << VS::INSTANCE_LIGHT, p_visible ? VS::INSTANCE_GEOMETRY_MASK : 0);
+			if (VSG::storage->light_get_type(instance->base) != VS::LIGHT_DIRECTIONAL && instance->spatial_partition_id && instance->scenario) {
+				instance->scenario->sps->set_pairable(instance->spatial_partition_id, p_visible, 1 << VS::INSTANCE_LIGHT, p_visible ? VS::INSTANCE_GEOMETRY_MASK : 0);
 			}
 
 		} break;
 		case VS::INSTANCE_REFLECTION_PROBE: {
-			if (instance->octree_id && instance->scenario) {
-				instance->scenario->octree.set_pairable(instance->octree_id, p_visible, 1 << VS::INSTANCE_REFLECTION_PROBE, p_visible ? VS::INSTANCE_GEOMETRY_MASK : 0);
+			if (instance->spatial_partition_id && instance->scenario) {
+				instance->scenario->sps->set_pairable(instance->spatial_partition_id, p_visible, 1 << VS::INSTANCE_REFLECTION_PROBE, p_visible ? VS::INSTANCE_GEOMETRY_MASK : 0);
 			}
 
 		} break;
 		case VS::INSTANCE_LIGHTMAP_CAPTURE: {
-			if (instance->octree_id && instance->scenario) {
-				instance->scenario->octree.set_pairable(instance->octree_id, p_visible, 1 << VS::INSTANCE_LIGHTMAP_CAPTURE, p_visible ? VS::INSTANCE_GEOMETRY_MASK : 0);
+			if (instance->spatial_partition_id && instance->scenario) {
+				instance->scenario->sps->set_pairable(instance->spatial_partition_id, p_visible, 1 << VS::INSTANCE_LIGHTMAP_CAPTURE, p_visible ? VS::INSTANCE_GEOMETRY_MASK : 0);
 			}
 
 		} break;
 		case VS::INSTANCE_GI_PROBE: {
-			if (instance->octree_id && instance->scenario) {
-				instance->scenario->octree.set_pairable(instance->octree_id, p_visible, 1 << VS::INSTANCE_GI_PROBE, p_visible ? (VS::INSTANCE_GEOMETRY_MASK | (1 << VS::INSTANCE_LIGHT)) : 0);
+			if (instance->spatial_partition_id && instance->scenario) {
+				instance->scenario->sps->set_pairable(instance->spatial_partition_id, p_visible, 1 << VS::INSTANCE_GI_PROBE, p_visible ? (VS::INSTANCE_GEOMETRY_MASK | (1 << VS::INSTANCE_LIGHT)) : 0);
 			}
 
 		} break;
@@ -800,7 +897,7 @@ Vector<ObjectID> VisualServerScene::instances_cull_aabb(const AABB &p_aabb, RID
 
 	int culled = 0;
 	Instance *cull[1024];
-	culled = scenario->octree.cull_aabb(p_aabb, cull, 1024);
+	culled = scenario->sps->cull_aabb(p_aabb, cull, 1024);
 
 	for (int i = 0; i < culled; i++) {
 
@@ -823,7 +920,7 @@ Vector<ObjectID> VisualServerScene::instances_cull_ray(const Vector3 &p_from, co
 
 	int culled = 0;
 	Instance *cull[1024];
-	culled = scenario->octree.cull_segment(p_from, p_from + p_to * 10000, cull, 1024);
+	culled = scenario->sps->cull_segment(p_from, p_from + p_to * 10000, cull, 1024);
 
 	for (int i = 0; i < culled; i++) {
 		Instance *instance = cull[i];
@@ -846,7 +943,7 @@ Vector<ObjectID> VisualServerScene::instances_cull_convex(const Vector<Plane> &p
 	int culled = 0;
 	Instance *cull[1024];
 
-	culled = scenario->octree.cull_convex(p_convex, cull, 1024);
+	culled = scenario->sps->cull_convex(p_convex, cull, 1024);
 
 	for (int i = 0; i < culled; i++) {
 
@@ -975,7 +1072,7 @@ void VisualServerScene::_update_instance(Instance *p_instance) {
 		return;
 	}
 
-	if (p_instance->octree_id == 0) {
+	if (p_instance->spatial_partition_id == 0) {
 
 		uint32_t base_type = 1 << p_instance->base_type;
 		uint32_t pairable_mask = 0;
@@ -994,7 +1091,7 @@ void VisualServerScene::_update_instance(Instance *p_instance) {
 		}
 
 		// not inside octree
-		p_instance->octree_id = p_instance->scenario->octree.create(p_instance, new_aabb, 0, pairable, base_type, pairable_mask);
+		p_instance->spatial_partition_id = p_instance->scenario->sps->create(p_instance, new_aabb, 0, pairable, base_type, pairable_mask);
 
 	} else {
 
@@ -1003,7 +1100,7 @@ void VisualServerScene::_update_instance(Instance *p_instance) {
 			return;
 		*/
 
-		p_instance->scenario->octree.move(p_instance->octree_id, new_aabb);
+		p_instance->scenario->sps->move(p_instance->spatial_partition_id, new_aabb);
 	}
 }
 
@@ -1346,7 +1443,7 @@ bool VisualServerScene::_light_instance_update_shadow(Instance *p_instance, cons
 			if (depth_range_mode == VS::LIGHT_DIRECTIONAL_SHADOW_DEPTH_RANGE_OPTIMIZED) {
 				//optimize min/max
 				Vector<Plane> planes = p_cam_projection.get_projection_planes(p_cam_transform);
-				int cull_count = p_scenario->octree.cull_convex(planes, instance_shadow_cull_result, MAX_INSTANCE_CULL, VS::INSTANCE_GEOMETRY_MASK);
+				int cull_count = p_scenario->sps->cull_convex(planes, instance_shadow_cull_result, MAX_INSTANCE_CULL, VS::INSTANCE_GEOMETRY_MASK);
 				Plane base(p_cam_transform.origin, -p_cam_transform.basis.get_axis(2));
 				//check distance max and min
 
@@ -1544,7 +1641,7 @@ bool VisualServerScene::_light_instance_update_shadow(Instance *p_instance, cons
 				light_frustum_planes.write[4] = Plane(z_vec, z_max + 1e6);
 				light_frustum_planes.write[5] = Plane(-z_vec, -z_min); // z_min is ok, since casters further than far-light plane are not needed
 
-				int cull_count = p_scenario->octree.cull_convex(light_frustum_planes, instance_shadow_cull_result, MAX_INSTANCE_CULL, VS::INSTANCE_GEOMETRY_MASK);
+				int cull_count = p_scenario->sps->cull_convex(light_frustum_planes, instance_shadow_cull_result, MAX_INSTANCE_CULL, VS::INSTANCE_GEOMETRY_MASK);
 
 				// a pre pass will need to be needed to determine the actual z-near to be used
 
@@ -1609,7 +1706,7 @@ bool VisualServerScene::_light_instance_update_shadow(Instance *p_instance, cons
 					planes.write[4] = light_transform.xform(Plane(Vector3(0, -1, z).normalized(), radius));
 					planes.write[5] = light_transform.xform(Plane(Vector3(0, 0, -z), 0));
 
-					int cull_count = p_scenario->octree.cull_convex(planes, instance_shadow_cull_result, MAX_INSTANCE_CULL, VS::INSTANCE_GEOMETRY_MASK);
+					int cull_count = p_scenario->sps->cull_convex(planes, instance_shadow_cull_result, MAX_INSTANCE_CULL, VS::INSTANCE_GEOMETRY_MASK);
 					Plane near_plane(light_transform.origin, light_transform.basis.get_axis(2) * z);
 
 					for (int j = 0; j < cull_count; j++) {
@@ -1663,7 +1760,7 @@ bool VisualServerScene::_light_instance_update_shadow(Instance *p_instance, cons
 
 					Vector<Plane> planes = cm.get_projection_planes(xform);
 
-					int cull_count = p_scenario->octree.cull_convex(planes, instance_shadow_cull_result, MAX_INSTANCE_CULL, VS::INSTANCE_GEOMETRY_MASK);
+					int cull_count = p_scenario->sps->cull_convex(planes, instance_shadow_cull_result, MAX_INSTANCE_CULL, VS::INSTANCE_GEOMETRY_MASK);
 
 					Plane near_plane(xform.origin, -xform.basis.get_axis(2));
 					for (int j = 0; j < cull_count; j++) {
@@ -1700,7 +1797,7 @@ bool VisualServerScene::_light_instance_update_shadow(Instance *p_instance, cons
 			cm.set_perspective(angle * 2.0, 1.0, 0.01, radius);
 
 			Vector<Plane> planes = cm.get_projection_planes(light_transform);
-			int cull_count = p_scenario->octree.cull_convex(planes, instance_shadow_cull_result, MAX_INSTANCE_CULL, VS::INSTANCE_GEOMETRY_MASK);
+			int cull_count = p_scenario->sps->cull_convex(planes, instance_shadow_cull_result, MAX_INSTANCE_CULL, VS::INSTANCE_GEOMETRY_MASK);
 
 			Plane near_plane(light_transform.origin, -light_transform.basis.get_axis(2));
 			for (int j = 0; j < cull_count; j++) {
@@ -1883,7 +1980,7 @@ void VisualServerScene::_prepare_scene(const Transform p_cam_transform, const Ca
 	float z_far = p_cam_projection.get_z_far();
 
 	/* STEP 2 - CULL */
-	instance_cull_count = scenario->octree.cull_convex(planes, instance_cull_result, MAX_INSTANCE_CULL);
+	instance_cull_count = scenario->sps->cull_convex(planes, instance_cull_result, MAX_INSTANCE_CULL);
 	light_cull_count = 0;
 
 	reflection_probe_cull_count = 0;
@@ -3477,10 +3574,20 @@ void VisualServerScene::update_dirty_instances() {
 
 	VSG::storage->update_dirty_resources();
 
+	// this is just to get access to scenario so we can update the spatial partitioning scheme
+	Scenario *scenario = nullptr;
+	if (_instance_update_list.first()) {
+		scenario = _instance_update_list.first()->self()->scenario;
+	}
+
 	while (_instance_update_list.first()) {
 
 		_update_dirty_instance(_instance_update_list.first()->self());
 	}
+
+	if (scenario) {
+		scenario->sps->update();
+	}
 }
 
 bool VisualServerScene::free(RID p_rid) {
@@ -3541,6 +3648,7 @@ VisualServerScene::VisualServerScene() {
 
 	render_pass = 1;
 	singleton = this;
+	_use_bvh = false;
 }
 
 VisualServerScene::~VisualServerScene() {
diff --git a/servers/visual/visual_server_scene.h b/servers/visual/visual_server_scene.h
index ee1fd408f4b..4c6ebf7f6c6 100644
--- a/servers/visual/visual_server_scene.h
+++ b/servers/visual/visual_server_scene.h
@@ -33,6 +33,7 @@
 
 #include "servers/visual/rasterizer.h"
 
+#include "core/math/bvh.h"
 #include "core/math/geometry.h"
 #include "core/math/octree.h"
 #include "core/os/semaphore.h"
@@ -52,6 +53,7 @@ public:
 	};
 
 	uint64_t render_pass;
+	bool _use_bvh;
 
 	static VisualServerScene *singleton;
 
@@ -103,12 +105,82 @@ public:
 
 	struct Instance;
 
+	// common interface for all spatial partitioning schemes
+	// this is a bit excessive boilerplatewise but can be removed if we decide to stick with one method
+
+	// note this is actually the BVH id +1, so that visual server can test against zero
+	// for validity to maintain compatibility with octree (where 0 indicates invalid)
+	typedef uint32_t SpatialPartitionID;
+
+	class SpatialPartitioningScene {
+	public:
+		virtual SpatialPartitionID create(Instance *p_userdata, const AABB &p_aabb = AABB(), int p_subindex = 0, bool p_pairable = false, uint32_t p_pairable_type = 0, uint32_t pairable_mask = 1) = 0;
+		virtual void erase(SpatialPartitionID p_handle) = 0;
+		virtual void move(SpatialPartitionID p_handle, const AABB &p_aabb) = 0;
+		virtual void update() {}
+		virtual void set_pairable(SpatialPartitionID p_handle, bool p_pairable, uint32_t p_pairable_type, uint32_t p_pairable_mask) = 0;
+		virtual int cull_convex(const Vector<Plane> &p_convex, Instance **p_result_array, int p_result_max, uint32_t p_mask = 0xFFFFFFFF) = 0;
+		virtual int cull_aabb(const AABB &p_aabb, Instance **p_result_array, int p_result_max, int *p_subindex_array = nullptr, uint32_t p_mask = 0xFFFFFFFF) = 0;
+		virtual int cull_segment(const Vector3 &p_from, const Vector3 &p_to, Instance **p_result_array, int p_result_max, int *p_subindex_array = nullptr, uint32_t p_mask = 0xFFFFFFFF) = 0;
+
+		typedef void *(*PairCallback)(void *, uint32_t, Instance *, int, uint32_t, Instance *, int);
+		typedef void (*UnpairCallback)(void *, uint32_t, Instance *, int, uint32_t, Instance *, int, void *);
+
+		virtual void set_pair_callback(PairCallback p_callback, void *p_userdata) = 0;
+		virtual void set_unpair_callback(UnpairCallback p_callback, void *p_userdata) = 0;
+
+		// bvh specific
+		virtual void params_set_node_expansion(real_t p_value) {}
+		virtual void params_set_pairing_expansion(real_t p_value) {}
+
+		// octree specific
+		virtual void set_balance(float p_balance) {}
+
+		virtual ~SpatialPartitioningScene() {}
+	};
+
+	class SpatialPartitioningScene_Octree : public SpatialPartitioningScene {
+		Octree_CL<Instance, true> _octree;
+
+	public:
+		SpatialPartitionID create(Instance *p_userdata, const AABB &p_aabb = AABB(), int p_subindex = 0, bool p_pairable = false, uint32_t p_pairable_type = 0, uint32_t pairable_mask = 1);
+		void erase(SpatialPartitionID p_handle);
+		void move(SpatialPartitionID p_handle, const AABB &p_aabb);
+		void set_pairable(SpatialPartitionID p_handle, bool p_pairable, uint32_t p_pairable_type, uint32_t p_pairable_mask);
+		int cull_convex(const Vector<Plane> &p_convex, Instance **p_result_array, int p_result_max, uint32_t p_mask = 0xFFFFFFFF);
+		int cull_aabb(const AABB &p_aabb, Instance **p_result_array, int p_result_max, int *p_subindex_array = nullptr, uint32_t p_mask = 0xFFFFFFFF);
+		int cull_segment(const Vector3 &p_from, const Vector3 &p_to, Instance **p_result_array, int p_result_max, int *p_subindex_array = nullptr, uint32_t p_mask = 0xFFFFFFFF);
+		void set_pair_callback(PairCallback p_callback, void *p_userdata);
+		void set_unpair_callback(UnpairCallback p_callback, void *p_userdata);
+		void set_balance(float p_balance);
+	};
+
+	class SpatialPartitioningScene_BVH : public SpatialPartitioningScene {
+		// Note that SpatialPartitionIDs are +1 based when stored in visual server, to enable 0 to indicate invalid ID.
+		BVH_Manager<Instance, true, 256> _bvh;
+
+	public:
+		SpatialPartitionID create(Instance *p_userdata, const AABB &p_aabb = AABB(), int p_subindex = 0, bool p_pairable = false, uint32_t p_pairable_type = 0, uint32_t p_pairable_mask = 1);
+		void erase(SpatialPartitionID p_handle);
+		void move(SpatialPartitionID p_handle, const AABB &p_aabb);
+		void update();
+		void set_pairable(SpatialPartitionID p_handle, bool p_pairable, uint32_t p_pairable_type, uint32_t p_pairable_mask);
+		int cull_convex(const Vector<Plane> &p_convex, Instance **p_result_array, int p_result_max, uint32_t p_mask = 0xFFFFFFFF);
+		int cull_aabb(const AABB &p_aabb, Instance **p_result_array, int p_result_max, int *p_subindex_array = nullptr, uint32_t p_mask = 0xFFFFFFFF);
+		int cull_segment(const Vector3 &p_from, const Vector3 &p_to, Instance **p_result_array, int p_result_max, int *p_subindex_array = nullptr, uint32_t p_mask = 0xFFFFFFFF);
+		void set_pair_callback(PairCallback p_callback, void *p_userdata);
+		void set_unpair_callback(UnpairCallback p_callback, void *p_userdata);
+
+		void params_set_node_expansion(real_t p_value) { _bvh.params_set_node_expansion(p_value); }
+		void params_set_pairing_expansion(real_t p_value) { _bvh.params_set_pairing_expansion(p_value); }
+	};
+
 	struct Scenario : RID_Data {
 
 		VS::ScenarioDebugMode debug;
 		RID self;
 
-		Octree_CL<Instance, true> octree;
+		SpatialPartitioningScene *sps;
 
 		List<Instance *> directional_lights;
 		RID environment;
@@ -118,13 +190,14 @@ public:
 
 		SelfList<Instance>::List instances;
 
-		Scenario() { debug = VS::SCENARIO_DEBUG_DISABLED; }
+		Scenario();
+		~Scenario() { memdelete(sps); }
 	};
 
 	mutable RID_Owner<Scenario> scenario_owner;
 
-	static void *_instance_pair(void *p_self, OctreeElementID, Instance *p_A, int, OctreeElementID, Instance *p_B, int);
-	static void _instance_unpair(void *p_self, OctreeElementID, Instance *p_A, int, OctreeElementID, Instance *p_B, int, void *);
+	static void *_instance_pair(void *p_self, SpatialPartitionID, Instance *p_A, int, SpatialPartitionID, Instance *p_B, int);
+	static void _instance_unpair(void *p_self, SpatialPartitionID, Instance *p_A, int, SpatialPartitionID, Instance *p_B, int, void *);
 
 	virtual RID scenario_create();
 
@@ -144,7 +217,7 @@ public:
 
 		RID self;
 		//scenario stuff
-		OctreeElementID octree_id;
+		SpatialPartitionID spatial_partition_id;
 		Scenario *scenario;
 		SelfList<Instance> scenario_item;
 
@@ -187,7 +260,7 @@ public:
 				scenario_item(this),
 				update_item(this) {
 
-			octree_id = 0;
+			spatial_partition_id = 0;
 			scenario = NULL;
 
 			update_aabb = false;