#ifndef B3_RADIXSORT32_H #define B3_RADIXSORT32_H #include "b3OpenCLArray.h" struct b3SortData { union { unsigned int m_key; unsigned int x; }; union { unsigned int m_value; unsigned int y; }; }; #include "b3BufferInfoCL.h" class b3RadixSort32CL { b3OpenCLArray* m_workBuffer1; b3OpenCLArray* m_workBuffer2; b3OpenCLArray* m_workBuffer3; b3OpenCLArray* m_workBuffer4; b3OpenCLArray* m_workBuffer3a; b3OpenCLArray* m_workBuffer4a; cl_command_queue m_commandQueue; cl_kernel m_streamCountSortDataKernel; cl_kernel m_streamCountKernel; cl_kernel m_prefixScanKernel; cl_kernel m_sortAndScatterSortDataKernel; cl_kernel m_sortAndScatterKernel; bool m_deviceCPU; class b3PrefixScanCL* m_scan; class b3FillCL* m_fill; public: struct b3ConstData { int m_n; int m_nWGs; int m_startBit; int m_nBlocksPerWG; }; enum { DATA_ALIGNMENT = 256, WG_SIZE = 64, BLOCK_SIZE = 256, ELEMENTS_PER_WORK_ITEM = (BLOCK_SIZE/WG_SIZE), BITS_PER_PASS = 4, NUM_BUCKET=(1<& keysIn, b3OpenCLArray& keysOut, b3OpenCLArray& valuesIn, b3OpenCLArray& valuesOut, int n, int sortBits = 32); ///keys only void execute(b3OpenCLArray& keysInOut, int sortBits = 32 ); void execute(b3OpenCLArray& keyValuesInOut, int sortBits = 32 ); void executeHost(b3OpenCLArray& keyValuesInOut, int sortBits = 32); void executeHost(b3AlignedObjectArray& keyValuesInOut, int sortBits = 32); }; #endif //B3_RADIXSORT32_H