diff --git a/acceleration/prng/.gitignore b/acceleration/prng/.gitignore deleted file mode 100644 index a3b1e354e..000000000 --- a/acceleration/prng/.gitignore +++ /dev/null @@ -1 +0,0 @@ -prng diff --git a/acceleration/prng/Makefile b/acceleration/prng/Makefile deleted file mode 100755 index b4198aaa3..000000000 --- a/acceleration/prng/Makefile +++ /dev/null @@ -1,125 +0,0 @@ -.PHONY: help - -help:: - $(ECHO) "Makefile Usage:" - $(ECHO) " make all TARGET= DEVICE=" - $(ECHO) " Command to generate the design for specified Target and Device." - $(ECHO) "" - $(ECHO) " make clean " - $(ECHO) " Command to remove the generated non-hardware files." - $(ECHO) "" - $(ECHO) " make cleanall" - $(ECHO) " Command to remove all the generated files." - $(ECHO) "" - $(ECHO) " make check TARGET= DEVICE=" - $(ECHO) " Command to run application in emulation." - $(ECHO) "" - $(ECHO) " make build TARGET= DEVICE=" - $(ECHO) " Command to build xclbin application." - $(ECHO) "" - $(ECHO) " make run_nimbix DEVICE=" - $(ECHO) " Command to run application on Nimbix Cloud." - $(ECHO) "" - $(ECHO) " make aws_build DEVICE=" - $(ECHO) " Command to build AWS xclbin application on AWS Cloud." - $(ECHO) "" - -# Points to Utility Directory -COMMON_REPO = ../../ -ABS_COMMON_REPO = $(shell readlink -f $(COMMON_REPO)) - -TARGETS := hw -TARGET := $(TARGETS) -DEVICE := $(DEVICES) -XCLBIN := ./xclbin - -include ./utils.mk - -DSA := $(call device2sandsa, $(DEVICE)) -BUILD_DIR := ./_x.$(TARGET).$(DSA) - -BUILD_DIR_dma = $(BUILD_DIR)/dma - -CXX := $(XILINX_SDX)/bin/xcpp -XOCC := $(XILINX_SDX)/bin/xocc - -#Include Libraries -include $(ABS_COMMON_REPO)/libs/opencl/opencl.mk -include $(ABS_COMMON_REPO)/libs/xcl2/xcl2.mk -CXXFLAGS += $(xcl2_CXXFLAGS) -LDFLAGS += $(xcl2_LDFLAGS) -HOST_SRCS += $(xcl2_SRCS) -CXXFLAGS += $(opencl_CXXFLAGS) -Wall -O0 -g -std=c++14 -LDFLAGS += $(opencl_LDFLAGS) - -HOST_SRCS += src/prng.cpp -HOST_HDRS += src/prng.h - -# Host compiler global settings -CXXFLAGS += -fmessage-length=0 -LDFLAGS += -lrt -lstdc++ - -# Kernel compiler global settings -CLFLAGS += -t $(TARGET) --platform $(DEVICE) --save-temps - - -EXECUTABLE = prng -CMD_ARGS = $(XCLBIN)/dma.$(TARGET).$(DSA).xclbin - -EMCONFIG_DIR = $(XCLBIN)/$(DSA) - -BINARY_CONTAINERS += $(XCLBIN)/dma.$(TARGET).$(DSA).xclbin -BINARY_CONTAINER_dma_OBJS += $(XCLBIN)/dma.$(TARGET).$(DSA).xo - -CP = cp -rf - -.PHONY: all clean cleanall docs emconfig -all: check-devices $(EXECUTABLE) $(BINARY_CONTAINERS) emconfig - -.PHONY: exe -exe: $(EXECUTABLE) - -.PHONY: build -build: $(BINARY_CONTAINERS) - -# Building kernel -$(XCLBIN)/dma.$(TARGET).$(DSA).xo: src/dma.cpp - mkdir -p $(XCLBIN) - $(XOCC) $(CLFLAGS) --temp_dir $(BUILD_DIR_dma) -c -k dma -I'$( -``` - diff --git a/acceleration/prng/description.json b/acceleration/prng/description.json deleted file mode 100644 index 58ca97d42..000000000 --- a/acceleration/prng/description.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "runtime": ["OpenCL"], - "example" : "pseudo random number generator", - "overview" : [ - "This is an optimized implementation of the pseudo random number generator algorithm", - "The method used to generate a random number sequence is called complementary multiply with carry (CMWC)", - "targeting exection on an SDAccel support FPGA acceleration card" - ], - "host_exe": "prng", - "host_srcs": "src/prng.cpp", - "cmd_args": "BUILD/dma.xclbin", - "host_hdrs": "src/prng.h", - "libs" : [ - "xcl2" - ], - "containers": [ - { - "name": "dma", - "accelerators": [ - { - "name": "dma", - "location": "src/dma.cpp" - } - ] - } - ], - "perf_fields" : ["Board", "Total Number of Samples", "Kernel Duration"], - "performance" : [ - [ - "xilinx:adm-pcie-ku3:2ddr-xpr", "16777216", "59.1ms" - ] - ], - "contributors" : [ - { - "group": "Xilinx", - "url" : "http://www.xilinx.com" - } - ], - "testinfo": { - "level_up": [ - { - "hw_emu": "1", - "hw": "1" - } - ] - } -} diff --git a/acceleration/prng/src/dma.cpp b/acceleration/prng/src/dma.cpp deleted file mode 100755 index e15590da7..000000000 --- a/acceleration/prng/src/dma.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/********** -Copyright (c) 2018, Xilinx, Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its contributors -may be used to endorse or promote products derived from this software -without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, -EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********/ - -#include -#include -#include - -#include "prng.h" -#include "dma.h" - -//______________________________________________________________________________ -void initQ (data_t *Q, data_t* din, int j) { - - L_copy: - for (int i=0;i R0; - static randCMWC R1; - static randCMWC R2; - static randCMWC R3; - static randCMWC R4; - static randCMWC R5; - static randCMWC R6; - static randCMWC R7; - static randCMWC R8; - static randCMWC R9; - static randCMWC R10; - static randCMWC R11; - static randCMWC R12; - static randCMWC R13; - static randCMWC R14; - static randCMWC R15; - - data_t Q0[CMWC_CYCLE]; - data_t Q1[CMWC_CYCLE]; - data_t Q2[CMWC_CYCLE]; - data_t Q3[CMWC_CYCLE]; - data_t Q4[CMWC_CYCLE]; - data_t Q5[CMWC_CYCLE]; - data_t Q6[CMWC_CYCLE]; - data_t Q7[CMWC_CYCLE]; - data_t Q8[CMWC_CYCLE]; - data_t Q9[CMWC_CYCLE]; - data_t Q10[CMWC_CYCLE]; - data_t Q11[CMWC_CYCLE]; - data_t Q12[CMWC_CYCLE]; - data_t Q13[CMWC_CYCLE]; - data_t Q14[CMWC_CYCLE]; - data_t Q15[CMWC_CYCLE]; - -// theoretically these can happen concurrently - initQ(Q0, mem_in, 0); R0.init(Q0); - initQ(Q1, mem_in, 1); R1.init(Q1); - initQ(Q2, mem_in, 2); R2.init(Q2); - initQ(Q3, mem_in, 3); R3.init(Q3); - initQ(Q4, mem_in, 4); R4.init(Q4); - initQ(Q5, mem_in, 5); R5.init(Q5); - initQ(Q6, mem_in, 6); R6.init(Q6); - initQ(Q7, mem_in, 7); R7.init(Q7); - initQ(Q8, mem_in, 8); R8.init(Q8); - initQ(Q9, mem_in, 9); R9.init(Q9); - initQ(Q10,mem_in, 10); R10.init(Q10); - initQ(Q11,mem_in, 11); R11.init(Q11); - initQ(Q12,mem_in, 12); R12.init(Q12); - initQ(Q13,mem_in, 13); R13.init(Q13); - initQ(Q14,mem_in, 14); R14.init(Q14); - initQ(Q15,mem_in, 15); R15.init(Q15); - -// generate sequence one at a time - L_output: - for (int i=0; i -#include -#include -#include - -typedef ap_uint<32> data_t; - -const int CMWC_CYCLE = 4096; -//const int maxSizeOfBlock = 16384; -const int maxSizeOfBlock = 1024; // 32*1k = 1BRAM -//const int maxNofBlock = 16384; // 16k * 1k = 16M samples -const int maxNofBlock = 262144; // 256 * 1k = 256k samples = 1 GByte - -//TRIPCOUNT identifiers -const int maxNofSample = maxSizeOfBlock * maxNofBlock; -const int minNofSample = 1; - -// number of PRNG units -const int nofPRNG = 16; // don't go beyond 16, to limit number of bits to 512 -typedef ap_uint dout_t; - -extern "C" { - void dma (dout_t *mem_out, data_t *mem_in, int nofBlock); -} - -#endif diff --git a/acceleration/prng/src/prng.cpp b/acceleration/prng/src/prng.cpp deleted file mode 100755 index 183f56639..000000000 --- a/acceleration/prng/src/prng.cpp +++ /dev/null @@ -1,250 +0,0 @@ -/********** -Copyright (c) 2018, Xilinx, Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its contributors -may be used to endorse or promote products derived from this software -without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, -EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********/ - -// pseudo random number generator Example - -#include -#include -#include -using namespace std; - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "xcl2.hpp" -#include "dma.h" -#include "prng.h" - - -#if defined(__linux__) || defined(linux) - #include "sys/time.h" -#elif defined(WIN32) - #include "windows.h" -#endif - - -double timestamp() { - double ms = 0.0; - #if defined(__linux__) || defined(linux) - timeval time; - gettimeofday(&time, NULL); - ms = (time.tv_sec * 1000.0) + (time.tv_usec / 1000.0); - #elif defined(WIN32) - SYSTEMTIME time; - GetSystemTime(&time); - ms = (time.wSeconds * 1000) + time.wMilliseconds; - #endif - return ms; -} - -//______________________________________________________________________________ -// initialize the seed table - -void initCMWC(data_t* Q, int NC) { - for (int i = 0; i < CMWC_CYCLE; i++) - Q[i] = (i << 16) | NC; -} - -//______________________________________________________________________________ -int nofBlock; -int nofSample; - -void parseArguments(int argc, char** argv) { - if (argc==2) { - nofBlock = 1024; - } else if (argc==3) { - nofBlock = atoi(argv[2]); - if (nofBlock > maxNofBlock) { - cout << "max number of block must be < " << maxNofBlock <" << " " <>>> total number of samples (32bit) = " << nofSample << endl; - -} - - -void processInCPU ( data_t* Dout_sw, data_t* Q); -void checkResults( dout_t* Dout_hw, data_t* Dout_sw); - -//______________________________________________________________________________ -int main(int argc, char** argv) { - - parseArguments(argc, argv); - std::string binaryFile = argv[1]; - -// seed table - std::vector Q_sw(nofPRNG*CMWC_CYCLE); - std::vector> Q_hw(nofPRNG*CMWC_CYCLE); - -// results from SW, 32 bit each - std::vector Dout_sw(nofSample); - -// results from HW, (32 * number of PRNG) per sample - std::vector> Dout_hw(nofBlock*maxSizeOfBlock); - -// SW processing - processInCPU ( Dout_sw.data(), Q_sw.data()); - -// initialize seed table - for (int j=0; j devices = xcl::get_xil_devices(); - cl::Device device = devices[0]; - - OCL_CHECK(err, cl::Context context(device, NULL, NULL, NULL, &err)); - OCL_CHECK(err, cl::CommandQueue q(context, device, CL_QUEUE_PROFILING_ENABLE, &err)); - OCL_CHECK(err, std::string device_name = device.getInfo(&err)); - - char* fileBuf = xcl::read_binary_file(binaryFile, fileBufSize); - cl::Program::Binaries bins{{fileBuf, fileBufSize}}; - devices.resize(1); - OCL_CHECK(err, cl::Program program(context, devices, bins, NULL, &err)); - OCL_CHECK(err, cl::Kernel krnl(program,"dma", &err)); - - std::cout << "Creating Buffers..." << std::endl; - OCL_CHECK(err, cl::Buffer cmem_Q(context,CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, - sizeof(data_t) * CMWC_CYCLE * nofPRNG, Q_hw.data(), &err)); - - OCL_CHECK(err, cl::Buffer cmem_output(context,CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY, - sizeof(dout_t) * nofBlock * maxSizeOfBlock, Dout_hw.data(), &err)); - - std::cout << "Starting Kernel..." << std::endl; - OCL_CHECK(err, err = krnl.setArg(0, cmem_output)); - OCL_CHECK(err, err = krnl.setArg(1, cmem_Q)); - OCL_CHECK(err, err = krnl.setArg(2, sizeof(int), &nofBlock)); - - std::cout << "Copying Buffers to device...." << std::endl; - OCL_CHECK(err, err = q.enqueueMigrateMemObjects({cmem_Q}, 0/* 0 means from host*/)); - - cl::Event event; - unsigned long start = 0, stop = 0; - - OCL_CHECK(err, err = q.enqueueTask(krnl, NULL, &event)); - q.finish(); - - OCL_CHECK(err, err = event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start)); - OCL_CHECK(err, err = event.getProfilingInfo(CL_PROFILING_COMMAND_END, &stop)); - unsigned long duration = (stop - start); - - std::cout << "Kernel Duration: " << duration/1000000.0 << " ms" << std::endl; - - std::cout << "Copying results to host...." << std::endl; - - OCL_CHECK(err, err = q.enqueueMigrateMemObjects({cmem_output}, CL_MIGRATE_MEM_OBJECT_HOST)) - q.finish(); - -// check results - checkResults( Dout_hw.data(), Dout_sw.data()); - - delete[] fileBuf; - - std::cout << "Completed Successfully" << std::endl; - - return EXIT_SUCCESS; -} - -//______________________________________________________________________________ -void processInCPU ( data_t* Dout_sw, data_t* Q_sw ) { - - static randCMWC R_sw[nofPRNG]; - - double startMS = timestamp(); - - for (int j=0; j>>> CPU kernel: elapsed time (ms) = "<< timestamp() - startMS << endl; - -} - - -//______________________________________________________________________________ -void checkResults(dout_t *Dout_hw, data_t *Dout_sw) { - - // compare - int err_cnt = 0; - int i_word = 0; - int i_part = 0; - dout_t word_whole; - data_t word_part; - -#ifdef DEBUG - for (int i=0; i<32; i++) { - i_word = i / nofPRNG; - i_part = i % nofPRNG; - - cout <<"i = "<< i <<" = (" << i_word <<", "<< i_part; - word_whole = Dout_hw[i_word]; - word_part = word_whole(32*i_part+31, 32*i_part); - cout <<" whole/part/sw = " << hex << word_whole <<" / "<< word_part <<" / "< -#include - -template -class randCMWC { - - T Q[CYCLE]; - uint32_t c; // must be limited with CMWC_C_MAX - uint16_t i_cycle; - -public: - -//_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ - void init ( T* Q_init ) { - - for (int idx=0; idx> 32; - x = t + c; - if (x < c) { - x++; - c++; - } - T tmp = m - x; - Q[i_cycle] = tmp; - - return tmp; -} - - -//_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ -void make ( T* dout, const unsigned int nofSample ) { - - for (int idx=0; idx/dev/null),$(1)) -device2sandsa = $(call sanitize_dsa,$(call device2dsa,$(1))) -device2dep = $(if $(filter $(suffix $(1)),.xpfm),$(dir $(1))/$(shell $(COMMON_REPO)/utility/parsexpmf.py $(1) hw 2>/dev/null) $(1),) - -# Cleaning stuff -RM = rm -f -RMDIR = rm -rf - -ECHO:= @echo - -docs: README.md - -README.md: description.json - $(ABS_COMMON_REPO)/utility/readme_gen/readme_gen.py description.json - -check-devices: -ifndef DEVICE - $(error DEVICE not set. Please set the DEVICE properly and rerun. Run "make help" for more details.) -endif - -check-aws_repo: -ifndef SDACCEL_DIR - $(error SDACCEL_DIR not set. Please set it properly and rerun. Run "make help" for more details.) -endif diff --git a/acceleration/prng/xrt.ini b/acceleration/prng/xrt.ini deleted file mode 100644 index c75131c19..000000000 --- a/acceleration/prng/xrt.ini +++ /dev/null @@ -1,2 +0,0 @@ -[Debug] -profile=true