diff --git a/README.md b/README.md index 06f1cab..026c7fd 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,40 @@ The fastest and most memory efficient lattice Boltzmann CFD software, running on all GPUs via [OpenCL](https://github.com/ProjectPhysX/OpenCL-Wrapper "OpenCL-Wrapper"). -FluidX3D - A New Era of Computational Fluid Dynamics Software8 billion voxel raindrop simulation
-Hydraulic jump simulationStar Wars X-wing simulation +
+ + + +
Update History + +- v1.0 + - initial release +- v1.1 + - added solid voxelization on GPU (slow algorithm) + - added tool to print current camera position (key_H) + - minor bug fix (workaround for Intel iGPU driver bug with triangle rendering) +- v1.2 + - added functions to compute force/torque on objects + - added function to translate Mesh + - added Stokes drag validation setup +- v1.3 + - added unit conversion functions for torque + - `FORCE_FIELD` and `VOLUME_FORCE` can now be used independently + - minor bug fix (workaround for AMD legacy driver bug with binary number literals) +- v1.4 + - added interactive graphics mode on Linux with X11 + - fixed streamline visualization bug in 2D +- v2.0 + - added (cross-vendor) multi-GPU support on a single node (PC/laptop/server) +- v2.1 + - made solid voxelization on GPU lightning fast (new algorithm, from minutes to milliseconds) +- v2.2 + - added option to voxelize moving/rotating geometry on GPU, with automatic velocity initialization for each grid point based on center of rotation, linear velocity and rotational velocity + - cells that are converted from solid->fluid during re-voxelization now have their DDFs properly initialized + - added option to not auto-scale mesh during `read_stl(...)`, with negative `size` parameter + - added kernel for solid boundary rendering with marching-cubes +
## Compute Features diff --git a/src/info.cpp b/src/info.cpp index f42f73a..b713ef7 100644 --- a/src/info.cpp +++ b/src/info.cpp @@ -70,7 +70,7 @@ void Info::print_logo() const { print("| "); print("\\ \\ / /", c); print(" |\n"); print("| "); print("\\ ' /", c); print(" |\n"); print("| "); print("\\ /", c); print(" |\n"); - print("| "); print("\\ /", c); print(" FluidX3D Version 2.0 |\n"); + print("| "); print("\\ /", c); print(" FluidX3D Version 2.2 |\n"); print("| "); print("'", c); print(" Copyright (c) Moritz Lehmann |\n"); } void Info::print_initialize() { diff --git a/src/kernel.cpp b/src/kernel.cpp index 36c2355..76599ea 100644 --- a/src/kernel.cpp +++ b/src/kernel.cpp @@ -2065,9 +2065,13 @@ string opencl_c_container() { return R( // ########################## begin of O -)+R(kernel void voxelize_mesh(const uint direction, global uchar* flags, const uchar flag, const global float* p0, const global float* p1, const global float* p2, const uint triangle_number, float x0, float y0, float z0, float x1, float y1, float z1) { // voxelize triangle mesh +)+R(kernel void voxelize_mesh(const uint direction, global fpxx* fi, global float* u, global uchar* flags, const ulong t, const uchar flag, const global float* p0, const global float* p1, const global float* p2, const global float* bbu) { // voxelize triangle mesh const uint a=get_global_id(0), A=get_area(direction); // a = domain area index for each side, A = area of the domain boundary if(a>=A) return; // area might not be a multiple of def_workgroup_size, so return here to avoid writing in unallocated memory space + const uint triangle_number = as_uint(bbu[0]); + const float x0=bbu[ 1], y0=bbu[ 2], z0=bbu[ 3], x1=bbu[ 4], y1=bbu[ 5], z1=bbu[ 6]; + const float cx=bbu[ 7], cy=bbu[ 8], cz=bbu[ 9], ux=bbu[10], uy=bbu[11], uz=bbu[12], rx=bbu[13], ry=bbu[14], rz=bbu[15]; + const uint3 xyz = direction==0u ? (uint3)((uint)max(0, (int)x0-def_Ox), a%def_Ny, a/def_Ny) : direction==1u ? (uint3)(a/def_Nz, (uint)max(0, (int)y0-def_Oy), a%def_Nz) : (uint3)(a%def_Nx, a/def_Nx, (uint)max(0, (int)z0-def_Oz)); const float3 p = position(xyz); const float3 offset = (float3)(0.5f*(float)((def_Nx-2u*(def_Dx>1u))*def_Dx)-0.5f, 0.5f*(float)((def_Ny-2u*(def_Dy>1u))*def_Dy)-0.5f, 0.5f*(float)((def_Nz-2u*(def_Dz>1u))*def_Dz)-0.5f)+(float3)(def_domain_offset_x, def_domain_offset_y, def_domain_offset_z); @@ -2127,10 +2131,7 @@ string opencl_c_container() { return R( // ########################## begin of O } }/**/ - if(intersections==0u) return; // no intersection for the entire column, so return immediately - bool inside = (intersections%2u)&&(intersections_check%2u); - - for(int i=1; i<(int)intersections; i++) { // insertion sort of distances + for(int i=1; i<(int)intersections; i++) { // insertion-sort distances ushort t = distances[i]; int j = i-1; while(distances[j]>t&&j>=0) { @@ -2140,15 +2141,47 @@ string opencl_c_container() { return R( // ########################## begin of O distances[j+1] = t; } - uint intersection = 0u; // iterate through column + bool inside = (intersections%2u)&&(intersections_check%2u); + const bool set_u = sq(ux)+sq(uy)+sq(uz)+sq(rx)+sq(ry)+sq(rz)>0.0f; + uint intersection = intersections%2u!=intersections_check%2u; // iterate through column, start with 0 regularly, start with 1 if forward and backward intersection count evenness differs (error correction) const uint h0 = direction==0u ? xyz.x : direction==1u ? xyz.y : xyz.z; - for(uint h=h0; hh0+(uint)distances[intersection]) { inside = !inside; // passed mesh intersection, so switch inside/outside state intersection++; } + inside &= (intersection0.0f) { + flagsn = (flagsn&TYPE_BO)==TYPE_MS ? flagsn&~TYPE_MS : flagsn&~flag; + } + } else { + flagsn = (flagsn&TYPE_BO)==TYPE_MS ? flagsn&~TYPE_MS : flagsn&~flag; + } + } + flags[n] = flagsn; } } // voxelize_mesh() @@ -2164,7 +2197,7 @@ string opencl_c_container() { return R( // ########################## begin of O )+"#ifdef GRAPHICS"+R( -)+"#ifndef FORCE_FIELD"+R( +)+"#ifndef FORCE_FIELD"+R( // render flags as grid )+R(kernel void graphics_flags(const global uchar* flags, const global float* camera, global int* bitmap, global int* zbuffer) { )+"#else"+R( // FORCE_FIELD )+R(kernel void graphics_flags(const global uchar* flags, const global float* camera, global int* bitmap, global int* zbuffer, const global float* F) { @@ -2231,7 +2264,62 @@ string opencl_c_container() { return R( // ########################## begin of O } } )+"#endif"+R( // FORCE_FIELD -} +}/**/ + +/*)+"#ifndef FORCE_FIELD"+R( // render solid boundaries with marching-cubes +)+R(kernel void graphics_flags(const global uchar* flags, const global float* camera, global int* bitmap, global int* zbuffer) { +)+"#else"+R( // FORCE_FIELD +)+R(kernel void graphics_flags(const global uchar* flags, const global float* camera, global int* bitmap, global int* zbuffer, const global float* F) { +)+"#endif"+R( // FORCE_FIELD + const uint n = get_global_id(0); + if(n>=(uint)def_N||is_halo(n)) return; // don't execute graphics_flags() on halo + const uint3 xyz = coordinates(n); + if(xyz.x>=def_Nx-1u||xyz.y>=def_Ny-1u||xyz.z>=def_Nz-1u) return; + //if(xyz.x==0u||xyz.y==0u||xyz.z==0u||xyz.x>=def_Nx-2u||xyz.y>=def_Ny-2u||xyz.z>=def_Nz-2u) return; + uint j[8]; + const uint x0 = xyz.x; // cube stencil + const uint xp = xyz.x+1u; + const uint y0 = xyz.y *def_Nx; + const uint yp = (xyz.y+1u)*def_Nx; + const uint z0 = xyz.z *def_Ny*def_Nx; + const uint zp = (xyz.z+1u)*def_Ny*def_Nx; + j[0] = n ; // 000 + j[1] = xp+y0+z0; // +00 + j[2] = xp+y0+zp; // +0+ + j[3] = x0+y0+zp; // 00+ + j[4] = x0+yp+z0; // 0+0 + j[5] = xp+yp+z0; // ++0 + j[6] = xp+yp+zp; // +++ + j[7] = x0+yp+zp; // 0++ + float v[8]; + for(uint i=0u; i<8u; i++) v[i] = (float)((flags[j[i]]&TYPE_BO)==TYPE_S); + float3 triangles[15]; // maximum of 5 triangles with 3 vertices each + const uint tn = marching_cubes(v, 0.5f, triangles); // run marching cubes algorithm + if(tn==0u) return; + float camera_cache[15]; // cache camera parameters in case the kernel draws more than one shape + for(uint i=0u; i<15u; i++) camera_cache[i] = camera[i]; + const float3 offset = (float3)((float)xyz.x+0.5f-0.5f*(float)def_Nx, (float)xyz.y+0.5f-0.5f*(float)def_Ny, (float)xyz.z+0.5f-0.5f*(float)def_Nz); + for(uint i=0u; i0.0f) { + const int c = iron_color(255.0f*Fnl); // color boundaries depending on the force on them + draw_line(p, p+5.0f*Fn, c, camera_cache, bitmap, zbuffer); // draw colored force vectors + } + } +)+"#endif"+R( // FORCE_FIELD +}/**/ )+R(kernel void graphics_field(const global uchar* flags, const global float* u, const global float* camera, global int* bitmap, global int* zbuffer) { const uint n = get_global_id(0); diff --git a/src/lbm.cpp b/src/lbm.cpp index 5128c92..8a59305 100644 --- a/src/lbm.cpp +++ b/src/lbm.cpp @@ -164,25 +164,54 @@ uint LBM_Domain::get_velocity_set() const { return velocity_set; } -void LBM_Domain::voxelize_mesh_on_device(const Mesh* mesh, const uchar flag) { // voxelize triangle mesh +void LBM_Domain::voxelize_mesh_on_device(const Mesh* mesh, const uchar flag, const float3& rotation_center, const float3& linear_velocity, const float3& rotational_velocity) { // voxelize triangle mesh Memory p0(device, mesh->triangle_number, 1u, mesh->p0); Memory p1(device, mesh->triangle_number, 1u, mesh->p1); Memory p2(device, mesh->triangle_number, 1u, mesh->p2); + Memory bounding_box_and_velocity(device, 16u); const float x0=mesh->pmin.x, y0=mesh->pmin.y, z0=mesh->pmin.z, x1=mesh->pmax.x, y1=mesh->pmax.y, z1=mesh->pmax.z; // use bounding box of mesh to speed up voxelization - const float M[3] = { (y1-y0)*(z1-z0), (z1-z0)*(x1-x0), (x1-x0)*(y1-y0) }; - float Mmin = M[0]; + bounding_box_and_velocity[ 0] = as_float(mesh->triangle_number); + bounding_box_and_velocity[ 1] = x0; + bounding_box_and_velocity[ 2] = y0; + bounding_box_and_velocity[ 3] = z0; + bounding_box_and_velocity[ 4] = x1; + bounding_box_and_velocity[ 5] = y1; + bounding_box_and_velocity[ 6] = z1; + bounding_box_and_velocity[ 7] = rotation_center.x; + bounding_box_and_velocity[ 8] = rotation_center.y; + bounding_box_and_velocity[ 9] = rotation_center.z; + bounding_box_and_velocity[10] = linear_velocity.x; + bounding_box_and_velocity[11] = linear_velocity.y; + bounding_box_and_velocity[12] = linear_velocity.z; + bounding_box_and_velocity[13] = rotational_velocity.x; + bounding_box_and_velocity[14] = rotational_velocity.y; + bounding_box_and_velocity[15] = rotational_velocity.z; uint direction = 0u; - for(uint i=1u; i<3u; i++) { - if(M[i]vmax) { + vmax = v[i]; + direction = i; // find direction of minimum bounding-box cross-section area + } } } const ulong A[3] = { (ulong)Ny*(ulong)Nz, (ulong)Nz*(ulong)Nx, (ulong)Nx*(ulong)Ny }; - Kernel kernel_voxelize_mesh(device, A[direction], "voxelize_mesh", direction, flags, flag, p0, p1, p2, mesh->triangle_number, x0, y0, z0, x1, y1, z1); + Kernel kernel_voxelize_mesh(device, A[direction], "voxelize_mesh", direction, fi, u, flags, t+1ull, flag, p0, p1, p2, bounding_box_and_velocity); p0.write_to_device(); p1.write_to_device(); p2.write_to_device(); + bounding_box_and_velocity.write_to_device(); kernel_voxelize_mesh.run(); } void LBM_Domain::enqueue_unvoxelize_mesh_on_device(const Mesh* mesh, const uchar flag) { // remove voxelized triangle mesh from LBM grid @@ -750,16 +779,23 @@ void LBM::write_status(const string& path) { // write LBM status report to a .tx write_file(filename, status); } -void LBM::voxelize_mesh_on_device(const Mesh* mesh, const uchar flag) { // voxelize triangle mesh +void LBM::voxelize_mesh_on_device(const Mesh* mesh, const uchar flag, const float3& rotation_center, const float3& linear_velocity, const float3& rotational_velocity) { // voxelize triangle mesh if(get_D()==1u) { - lbm[0]->voxelize_mesh_on_device(mesh, flag); // if this crashes on Windows, create a TdrDelay 32-bit DWORD with decimal value 300 in Computer\HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Control\GraphicsDrivers + lbm[0]->voxelize_mesh_on_device(mesh, flag, rotation_center, linear_velocity, rotational_velocity); // if this crashes on Windows, create a TdrDelay 32-bit DWORD with decimal value 300 in Computer\HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Control\GraphicsDrivers } else { thread* threads=new thread[get_D()]; for(uint d=0u; dvoxelize_mesh_on_device(mesh, flag); + lbm[d]->voxelize_mesh_on_device(mesh, flag, rotation_center, linear_velocity, rotational_velocity); }); for(uint d=0u; d0.0f||length(rotational_velocity)>0.0f)) update_moving_boundaries(); +#endif // MOVING_BOUNDARIES + if(!initialized) { + flags.read_from_device(); + u.read_from_device(); + } } -void LBM::unvoxelize_mesh_on_device(const Mesh* mesh, const uchar flag) { // remove voxelized triangle mesh from LBM grid +void LBM::unvoxelize_mesh_on_device(const Mesh* mesh, const uchar flag) { // remove voxelized triangle mesh from LBM grid by removing all flags in mesh bounding box (only required when bounding box size changes during re-voxelization) for(uint d=0u; denqueue_unvoxelize_mesh_on_device(mesh, flag); for(uint d=0u; dfinish_queue(); } diff --git a/src/lbm.hpp b/src/lbm.hpp index 4b0eb75..09aabfc 100644 --- a/src/lbm.hpp +++ b/src/lbm.hpp @@ -121,7 +121,7 @@ class LBM_Domain { void set_fz(const float fz) { this->fz = fz; } // set global froce per volume void set_f(const float fx, const float fy, const float fz) { set_fx(fx); set_fy(fy); set_fz(fz); } // set global froce per volume - void voxelize_mesh_on_device(const Mesh* mesh, const uchar flag=TYPE_S); // voxelize mesh + void voxelize_mesh_on_device(const Mesh* mesh, const uchar flag=TYPE_S, const float3& rotation_center=float3(0.0f), const float3& linear_velocity=float3(0.0f), const float3& rotational_velocity=float3(0.0f)); // voxelize mesh void enqueue_unvoxelize_mesh_on_device(const Mesh* mesh, const uchar flag=TYPE_S); // remove voxelized triangle mesh from LBM grid #ifdef GRAPHICS @@ -473,7 +473,7 @@ class LBM { } void write_status(const string& path=""); // write LBM status report to a .txt file - void voxelize_mesh_on_device(const Mesh* mesh, const uchar flag=TYPE_S); // voxelize mesh + void voxelize_mesh_on_device(const Mesh* mesh, const uchar flag=TYPE_S, const float3& rotation_center=float3(0.0f), const float3& linear_velocity=float3(0.0f), const float3& rotational_velocity=float3(0.0f)); // voxelize mesh void unvoxelize_mesh_on_device(const Mesh* mesh, const uchar flag=TYPE_S); // remove voxelized triangle mesh from LBM grid void voxelize_stl(const string& path, const float3& center, const float3x3& rotation, const float size=-1.0f, const uchar flag=TYPE_S); // read and voxelize binary .stl file void voxelize_stl(const string& path, const float3x3& rotation, const float size=-1.0f, const uchar flag=TYPE_S); // read and voxelize binary .stl file (place in box center) diff --git a/src/setup.cpp b/src/setup.cpp index 50ad9a0..a568c9c 100644 --- a/src/setup.cpp +++ b/src/setup.cpp @@ -396,7 +396,7 @@ // lbm.graphics.write_frame_png(get_exe_path()+"export/f/"); // lbm.graphics.set_camera_free(float3(2.5f*(float)Nx, 0.0f*(float)Ny, 0.0f*(float)Nz), 0.0f, 0.0f, 50.0f); // lbm.graphics.write_frame_png(get_exe_path()+"export/s/"); - lbm.run(28u); // run LBM in parallel while CPU is voxelizing the next frame + lbm.run(28u); const float3x3 rotation = float3x3(float3(0.2f, 1.0f, 0.1f), radians(0.4032f)); // create rotation matrix to rotate mesh lbm.unvoxelize_mesh_on_device(mesh); mesh->rotate(rotation); // rotate mesh @@ -407,6 +407,77 @@ +/*void main_setup() { // radial fan (enable MOVING_BOUNDARIES and SUBGRID) + // ######################################################### define simulation box size, viscosity and volume force ############################################################################ + const uint L = 872u/4u; + const float Re = 100000.0f; + const float u = 0.12f; + LBM lbm(L, L, L/3u, units.nu_from_Re(Re, (float)L, u)); + // ############################################################################################################################################################################################# + const float radius = 0.25f*(float)L; + const float3 center = float3(lbm.center().x, lbm.center().y, 0.36f*radius); + const uint dt = 10u; + const float omega=u/radius, domega=omega*dt; + Mesh* mesh = read_stl(get_exe_path()+"../stl/fan.stl", lbm.size(), center, 2.0f*radius); // https://www.thingiverse.com/thing:6113/files + const ulong N=lbm.get_N(); uint Nx=lbm.get_Nx(), Ny=lbm.get_Ny(), Nz=lbm.get_Nz(); for(ulong n=0ull; nrotate(float3x3(float3(0.0f, 0.0f, 1.0f), domega)); // rotate mesh + } +} /**/ + + + +/*void main_setup() { // electric ducted fan (EDF) (enable MOVING_BOUNDARIES and SUBGRID) + // ######################################################### define simulation box size, viscosity and volume force ############################################################################ + const uint L = 476u/2u; + const float Re = 1000000.0f; + const float u = 0.12f; + LBM lbm(L, L*2u, L, units.nu_from_Re(Re, (float)L, u)); + // ############################################################################################################################################################################################# + const float3 center = lbm.center(); + const float3x3 rotation = float3x3(float3(0, 0, 1), radians(180.0f)); + Mesh* rotor = read_stl(get_exe_path()+"../stl/edf_rotor.stl", lbm.size(), center, rotation, -(float)L/110.0f); // https://www.thingiverse.com/thing:3014759/files + Mesh* stator = read_stl(get_exe_path()+"../stl/edf_stator.stl", lbm.size(), center, rotation, -(float)L/110.0f); // https://www.thingiverse.com/thing:3014759/files + rotor->translate(float3(0.0f, -0.35f*lbm.size().y, 0.0f)); + stator->translate(float3(0.0f, -0.25f*lbm.size().y, 0.0f)); + const float radius = 0.5f*rotor->get_max_size(); + const uint dt = 10u; + const float omega=u/radius, domega=omega*dt; + lbm.voxelize_mesh_on_device(stator, TYPE_S, center); + lbm.voxelize_mesh_on_device(rotor, TYPE_S, center, float3(0.0f), float3(0.0f, omega, 0.0f)); + const ulong N=lbm.get_N(); uint Nx=lbm.get_Nx(), Ny=lbm.get_Ny(), Nz=lbm.get_Nz(); for(ulong n=0ull; nrotate(float3x3(float3(0.0f, 1.0f, 0.0f), domega)); // rotate mesh + lbm.voxelize_mesh_on_device(rotor, TYPE_S, center, float3(0.0f), float3(0.0f, omega, 0.0f)); + lbm.run(dt); + } +} /**/ + + + /*void main_setup() { // F1 car // ######################################################### define simulation box size, viscosity and volume force ############################################################################ const uint L = 256u; // 2152u on 8x MI200 diff --git a/src/utilities.hpp b/src/utilities.hpp index 848ef16..0be7520 100644 --- a/src/utilities.hpp +++ b/src/utilities.hpp @@ -4203,6 +4203,12 @@ struct Mesh { // triangle mesh inline const float3& get_center() const { return center; } + inline float get_min_size() { + return fmin(fmin(pmax.x-pmin.x, pmax.y-pmin.y), pmax.z-pmin.z); + } + inline float get_max_size() { + return fmax(fmax(pmax.x-pmin.x, pmax.y-pmin.y), pmax.z-pmin.z); + } }; inline Mesh* read_stl(const string& path, const float3& box_size, const float3& center, const float3x3& rotation, const float size) { // read binary .stl file const string filename = create_file_extension(path, ".stl"); @@ -4232,13 +4238,15 @@ inline Mesh* read_stl(const string& path, const float3& box_size, const float3& mesh->find_bounds(); const float3 offset = -0.5f*(mesh->pmin+mesh->pmax); // auto-rescale mesh float scale = 1.0f; - if(size>0) { // rescale to specified size - scale = size/fmax(fmax(mesh->pmax.x-mesh->pmin.x, mesh->pmax.y-mesh->pmin.y), mesh->pmax.z-mesh->pmin.z); - } else { // auto-rescale to largest possible size + if(size==0.0f) { // auto-rescale to largest possible size const float scale_x = box_size.x/(mesh->pmax.x-mesh->pmin.x); const float scale_y = box_size.y/(mesh->pmax.y-mesh->pmin.y); const float scale_z = box_size.z/(mesh->pmax.z-mesh->pmin.z); scale = fmin(fmin(scale_x, scale_y), scale_z); + } else if(size>0.0f) { // rescale to specified size relative to box + scale = size/fmax(fmax(mesh->pmax.x-mesh->pmin.x, mesh->pmax.y-mesh->pmin.y), mesh->pmax.z-mesh->pmin.z); + } else { // rescale to specified size relative to original size (input size as negative number) + scale = -size; } for(uint i=0u; ip0[i] = center+scale*(offset+mesh->p0[i]);