From f8784f30a887bb104995ef56bc0ea0a1623daaf1 Mon Sep 17 00:00:00 2001 From: Seth Hillbrand Date: Thu, 20 Sep 2018 21:23:15 -0700 Subject: [PATCH] Removing OpenMP This commit finishes the removal of OpenMP from the KiCad codebase. Removed in this commit are the OpenMP calls in 3d-viewer and qa/polygon_triangulation as well as all references in CMakeLists.txt std::thread is used instead for multithreaded computation --- .../3d_canvas/create_3Dgraphic_brd_items.cpp | 8 +- 3d-viewer/3d_canvas/create_layer_items.cpp | 116 +- .../3d_render_ogl_legacy/clayer_triangles.cpp | 49 +- .../3d_render_ogl_legacy/clayer_triangles.h | 3 + .../accelerators/ccontainer2d.cpp | 2 + .../accelerators/ccontainer2d.h | 3 + .../c3d_render_raytracing.cpp | 1361 +++++++++-------- .../c3d_render_raytracing.h | 4 +- 3d-viewer/3d_rendering/cimage.cpp | 57 +- 3d-viewer/openmp_mutex.h | 81 - CMakeLists.txt | 18 - common/geometry/shape_poly_set.cpp | 11 +- cvpcb/CMakeLists.txt | 1 - include/geometry/shape_poly_set.h | 3 +- pcbnew/CMakeLists.txt | 7 - .../test_polygon_triangulation.cpp | 62 +- 16 files changed, 905 insertions(+), 881 deletions(-) delete mode 100644 3d-viewer/openmp_mutex.h diff --git a/3d-viewer/3d_canvas/create_3Dgraphic_brd_items.cpp b/3d-viewer/3d_canvas/create_3Dgraphic_brd_items.cpp index a666aac47b..c61961ff4e 100644 --- a/3d-viewer/3d_canvas/create_3Dgraphic_brd_items.cpp +++ b/3d-viewer/3d_canvas/create_3Dgraphic_brd_items.cpp @@ -40,7 +40,7 @@ #include "../3d_rendering/3d_render_raytracing/accelerators/ccontainer2d.h" #include "../3d_rendering/3d_render_raytracing/shapes3D/ccylinder.h" #include "../3d_rendering/3d_render_raytracing/shapes3D/clayeritem.h" -#include + #include #include #include @@ -845,11 +845,7 @@ void CINFO3D_VISU::AddSolidAreasShapesToContainer( const ZONE_CONTAINER* aZoneCo PCB_LAYER_ID aLayerId ) { // Copy the polys list because we have to simplify it - SHAPE_POLY_SET polyList = SHAPE_POLY_SET(aZoneContainer->GetFilledPolysList()); - polyList.Simplify( SHAPE_POLY_SET::PM_FAST ); - - if( polyList.IsEmpty() ) - return; + SHAPE_POLY_SET polyList = SHAPE_POLY_SET( aZoneContainer->GetFilledPolysList(), true ); // This convert the poly in outline and holes Convert_shape_line_polygon_to_triangles( polyList, diff --git a/3d-viewer/3d_canvas/create_layer_items.cpp b/3d-viewer/3d_canvas/create_layer_items.cpp index 888925c24d..ac965c6808 100644 --- a/3d-viewer/3d_canvas/create_layer_items.cpp +++ b/3d-viewer/3d_canvas/create_layer_items.cpp @@ -40,7 +40,7 @@ #include "../3d_rendering/3d_render_raytracing/accelerators/ccontainer2d.h" #include "../3d_rendering/3d_render_raytracing/shapes3D/ccylinder.h" #include "../3d_rendering/3d_render_raytracing/shapes3D/clayeritem.h" -#include + #include #include #include @@ -52,6 +52,9 @@ #include #include #include +#include +#include +#include #include @@ -788,36 +791,43 @@ void CINFO3D_VISU::createLayers( REPORTER *aStatusTextReporter ) // Add zones objects // ///////////////////////////////////////////////////////////////////// - for( unsigned int lIdx = 0; lIdx < layer_id.size(); ++lIdx ) + std::atomic nextZone( 0 ); + std::atomic threadsFinished( 0 ); + + size_t parallelThreadCount = std::max( std::thread::hardware_concurrency(), 2 ); + for( size_t ii = 0; ii < parallelThreadCount; ++ii ) { - const PCB_LAYER_ID curr_layer_id = layer_id[lIdx]; - - if( aStatusTextReporter ) - aStatusTextReporter->Report( wxString::Format( _( "Create zones of layer %s" ), - LSET::Name( curr_layer_id ) ) ); - - wxASSERT( m_layers_container2D.find( curr_layer_id ) != m_layers_container2D.end() ); - - CBVHCONTAINER2D *layerContainer = m_layers_container2D[curr_layer_id]; - - // ADD COPPER ZONES - for( int ii = 0; ii < m_board->GetAreaCount(); ++ii ) + std::thread t = std::thread( [&]() { - const ZONE_CONTAINER* zone = m_board->GetArea( ii ); - const PCB_LAYER_ID zonelayer = zone->GetLayer(); - - if( zonelayer == curr_layer_id ) + for( size_t areaId = nextZone.fetch_add( 1 ); + areaId < static_cast( m_board->GetAreaCount() ); + areaId = nextZone.fetch_add( 1 ) ) { - AddSolidAreasShapesToContainer( zone, - layerContainer, - curr_layer_id ); + const ZONE_CONTAINER* zone = m_board->GetArea( areaId ); + + if( zone == nullptr ) + break; + + auto layerContainer = m_layers_container2D.find( zone->GetLayer() ); + + if( layerContainer != m_layers_container2D.end() ) + AddSolidAreasShapesToContainer( zone, layerContainer->second, + zone->GetLayer() ); } - } + + threadsFinished++; + } ); + + t.detach(); } + + while( threadsFinished < parallelThreadCount ) + std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); + } #ifdef PRINT_STATISTICS_3D_VIEWER - printf( "T13: %.3f ms\n", (float)( GetRunningMicroSecs() - start_Time ) / 1e3 ); + printf( "fill zones T13: %.3f ms\n", (float)( GetRunningMicroSecs() - start_Time ) / 1e3 ); start_Time = GetRunningMicroSecs(); #endif @@ -825,29 +835,18 @@ void CINFO3D_VISU::createLayers( REPORTER *aStatusTextReporter ) GetFlag( FL_RENDER_OPENGL_COPPER_THICKNESS ) && (m_render_engine == RENDER_ENGINE_OPENGL_LEGACY) ) { - // Add zones poly contourns - // ///////////////////////////////////////////////////////////////////// - for( unsigned int lIdx = 0; lIdx < layer_id.size(); ++lIdx ) + // ADD COPPER ZONES + for( int ii = 0; ii < m_board->GetAreaCount(); ++ii ) { - const PCB_LAYER_ID curr_layer_id = layer_id[lIdx]; + const ZONE_CONTAINER* zone = m_board->GetArea( ii ); - wxASSERT( m_layers_poly.find( curr_layer_id ) != m_layers_poly.end() ); + if( zone == nullptr ) + break; - SHAPE_POLY_SET *layerPoly = m_layers_poly[curr_layer_id]; + auto layerContainer = m_layers_poly.find( zone->GetLayer() ); - // ADD COPPER ZONES - for( int ii = 0; ii < m_board->GetAreaCount(); ++ii ) - { - const ZONE_CONTAINER* zone = m_board->GetArea( ii ); - const LAYER_NUM zonelayer = zone->GetLayer(); - - if( zonelayer == curr_layer_id ) - { - zone->TransformSolidAreasShapesToPolygonSet( *layerPoly, - segcountforcircle, - correctionFactor ); - } - } + if( layerContainer != m_layers_poly.end() ) + zone->TransformSolidAreasShapesToPolygonSet( *layerContainer->second, segcountforcircle, correctionFactor ); } } @@ -865,22 +864,35 @@ void CINFO3D_VISU::createLayers( REPORTER *aStatusTextReporter ) if( GetFlag( FL_RENDER_OPENGL_COPPER_THICKNESS ) && (m_render_engine == RENDER_ENGINE_OPENGL_LEGACY) ) { - const int nLayers = layer_id.size(); + std::atomic nextItem( 0 ); + std::atomic threadsFinished( 0 ); - #pragma omp parallel for - for( signed int lIdx = 0; lIdx < nLayers; ++lIdx ) + size_t parallelThreadCount = std::min( + std::max( std::thread::hardware_concurrency(), 2 ), + layer_id.size() ); + for( size_t ii = 0; ii < parallelThreadCount; ++ii ) { - const PCB_LAYER_ID curr_layer_id = layer_id[lIdx]; + std::thread t = std::thread( [&nextItem, &threadsFinished, &layer_id, this]() + { + for( size_t i = nextItem.fetch_add( 1 ); + i < layer_id.size(); + i = nextItem.fetch_add( 1 ) ) + { + auto layerPoly = m_layers_poly.find( layer_id[i] ); - wxASSERT( m_layers_poly.find( curr_layer_id ) != m_layers_poly.end() ); + if( layerPoly != m_layers_poly.end() ) + // This will make a union of all added contours + layerPoly->second->Simplify( SHAPE_POLY_SET::PM_FAST ); + } - SHAPE_POLY_SET *layerPoly = m_layers_poly[curr_layer_id]; + threadsFinished++; + } ); - wxASSERT( layerPoly != NULL ); - - // This will make a union of all added contourns - layerPoly->Simplify( SHAPE_POLY_SET::PM_FAST ); + t.detach(); } + + while( threadsFinished < parallelThreadCount ) + std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); } #ifdef PRINT_STATISTICS_3D_VIEWER diff --git a/3d-viewer/3d_rendering/3d_render_ogl_legacy/clayer_triangles.cpp b/3d-viewer/3d_rendering/3d_render_ogl_legacy/clayer_triangles.cpp index ddb28a2d6f..942a8fee5a 100644 --- a/3d-viewer/3d_rendering/3d_render_ogl_legacy/clayer_triangles.cpp +++ b/3d-viewer/3d_rendering/3d_render_ogl_legacy/clayer_triangles.cpp @@ -30,6 +30,9 @@ #include "clayer_triangles.h" #include // For the wxASSERT +#include +#include +#include CLAYER_TRIANGLE_CONTAINER::CLAYER_TRIANGLE_CONTAINER( unsigned int aNrReservedTriangles, @@ -219,8 +222,8 @@ void CLAYER_TRIANGLES::AddToMiddleContourns( const std::vector< SFVEC2F > &aCont const SFVEC2F &v0 = aContournPoints[i + 0]; const SFVEC2F &v1 = aContournPoints[i + 1]; - #pragma omp critical { + std::lock_guard lock( m_middle_layer_lock ); m_layer_middle_contourns_quads->AddQuad( SFVEC3F( v0.x, v0.y, zTop ), SFVEC3F( v1.x, v1.y, zTop ), SFVEC3F( v1.x, v1.y, zBot ), @@ -305,21 +308,41 @@ void CLAYER_TRIANGLES::AddToMiddleContourns( const SHAPE_POLY_SET &aPolySet, m_layer_middle_contourns_quads->Reserve_More( nrContournPointsToReserve * 2, true ); - #pragma omp parallel for - for( signed int i = 0; i < aPolySet.OutlineCount(); ++i ) + std::atomic nextItem( 0 ); + std::atomic threadsFinished( 0 ); + + size_t parallelThreadCount = std::min( + std::max( std::thread::hardware_concurrency(), 2 ), + static_cast( aPolySet.OutlineCount() ) ); + for( size_t ii = 0; ii < parallelThreadCount; ++ii ) { - // Add outline - const SHAPE_LINE_CHAIN& pathOutline = aPolySet.COutline( i ); - - AddToMiddleContourns( pathOutline, zBot, zTop, aBiuTo3Du, aInvertFaceDirection ); - - // Add holes for this outline - for( int h = 0; h < aPolySet.HoleCount( i ); ++h ) + std::thread t = std::thread( [&]() { - const SHAPE_LINE_CHAIN &hole = aPolySet.CHole( i, h ); - AddToMiddleContourns( hole, zBot, zTop, aBiuTo3Du, aInvertFaceDirection ); - } + for( int i = nextItem.fetch_add( 1 ); + i < aPolySet.OutlineCount(); + i = nextItem.fetch_add( 1 ) ) + { + // Add outline + const SHAPE_LINE_CHAIN& pathOutline = aPolySet.COutline( i ); + + AddToMiddleContourns( pathOutline, zBot, zTop, aBiuTo3Du, aInvertFaceDirection ); + + // Add holes for this outline + for( int h = 0; h < aPolySet.HoleCount( i ); ++h ) + { + const SHAPE_LINE_CHAIN &hole = aPolySet.CHole( i, h ); + AddToMiddleContourns( hole, zBot, zTop, aBiuTo3Du, aInvertFaceDirection ); + } + } + + threadsFinished++; + } ); + + t.detach(); } + + while( threadsFinished < parallelThreadCount ) + std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); } diff --git a/3d-viewer/3d_rendering/3d_render_ogl_legacy/clayer_triangles.h b/3d-viewer/3d_rendering/3d_render_ogl_legacy/clayer_triangles.h index 4a4f08c126..9deb119782 100644 --- a/3d-viewer/3d_rendering/3d_render_ogl_legacy/clayer_triangles.h +++ b/3d-viewer/3d_rendering/3d_render_ogl_legacy/clayer_triangles.h @@ -35,6 +35,7 @@ #include #include #include +#include typedef std::vector< SFVEC3F > SFVEC3F_VECTOR; @@ -174,6 +175,8 @@ public: float zTop, bool aInvertFaceDirection ); + std::mutex m_middle_layer_lock; + CLAYER_TRIANGLE_CONTAINER *m_layer_top_segment_ends; CLAYER_TRIANGLE_CONTAINER *m_layer_top_triangles; CLAYER_TRIANGLE_CONTAINER *m_layer_middle_contourns_quads; diff --git a/3d-viewer/3d_rendering/3d_render_raytracing/accelerators/ccontainer2d.cpp b/3d-viewer/3d_rendering/3d_render_raytracing/accelerators/ccontainer2d.cpp index cc2d0bb393..eef660b802 100644 --- a/3d-viewer/3d_rendering/3d_render_raytracing/accelerators/ccontainer2d.cpp +++ b/3d-viewer/3d_rendering/3d_render_raytracing/accelerators/ccontainer2d.cpp @@ -29,6 +29,7 @@ #include "ccontainer2d.h" #include +#include #include #include #include @@ -46,6 +47,7 @@ CGENERICCONTAINER2D::CGENERICCONTAINER2D( OBJECT2D_TYPE aObjType ) void CGENERICCONTAINER2D::Clear() { + std::lock_guard lock( m_lock ); m_bbox.Reset(); for( LIST_OBJECT2D::iterator ii = m_objects.begin(); diff --git a/3d-viewer/3d_rendering/3d_render_raytracing/accelerators/ccontainer2d.h b/3d-viewer/3d_rendering/3d_render_raytracing/accelerators/ccontainer2d.h index 4686fa0a34..54b660afc3 100644 --- a/3d-viewer/3d_rendering/3d_render_raytracing/accelerators/ccontainer2d.h +++ b/3d-viewer/3d_rendering/3d_render_raytracing/accelerators/ccontainer2d.h @@ -32,6 +32,7 @@ #include "../shapes2D/cobject2d.h" #include +#include typedef std::list LIST_OBJECT2D; typedef std::list CONST_LIST_OBJECT2D; @@ -52,6 +53,7 @@ public: { if( aObject ) // Only add if it is a valid pointer { + std::lock_guard lock( m_lock ); m_objects.push_back( aObject ); m_bbox.Union( aObject->GetBBox() ); } @@ -70,6 +72,7 @@ public: CONST_LIST_OBJECT2D &aOutList ) const = 0; private: + std::mutex m_lock; }; diff --git a/3d-viewer/3d_rendering/3d_render_raytracing/c3d_render_raytracing.cpp b/3d-viewer/3d_rendering/3d_render_raytracing/c3d_render_raytracing.cpp index 474ac7b9ba..b059cfeaf3 100644 --- a/3d-viewer/3d_rendering/3d_render_raytracing/c3d_render_raytracing.cpp +++ b/3d-viewer/3d_rendering/3d_render_raytracing/c3d_render_raytracing.cpp @@ -29,6 +29,9 @@ #include #include +#include +#include +#include #include "c3d_render_raytracing.h" #include "mortoncodes.h" @@ -42,10 +45,6 @@ // convertLinearToSRGB //#include -#ifdef _OPENMP -#include -#endif - C3D_RENDER_RAYTRACING::C3D_RENDER_RAYTRACING( CINFO3D_VISU &aSettings ) : C3D_RENDER_BASE( aSettings ), m_postshader_ssao( aSettings.CameraGet() ) @@ -137,7 +136,7 @@ void C3D_RENDER_RAYTRACING::restart_render_state() // Mark the blocks not processed yet std::fill( m_blockPositionsWasProcessed.begin(), m_blockPositionsWasProcessed.end(), - false ); + 0 ); } @@ -364,61 +363,58 @@ void C3D_RENDER_RAYTRACING::rt_render_tracing( GLubyte *ptrPBO , REPORTER *aStatusTextReporter ) { m_isPreview = false; - wxASSERT( m_blockPositions.size() <= LONG_MAX ); - const long nrBlocks = (long) m_blockPositions.size(); - const unsigned startTime = GetRunningMicroSecs(); + auto startTime = std::chrono::steady_clock::now(); bool breakLoop = false; - int numBlocksRendered = 0; - #pragma omp parallel for schedule(dynamic) shared(breakLoop) \ - firstprivate(ptrPBO) reduction(+:numBlocksRendered) default(none) - for( long iBlock = 0; iBlock < nrBlocks; iBlock++ ) + std::atomic numBlocksRendered( 0 ); + std::atomic currentBlock( 0 ); + std::atomic threadsFinished( 0 ); + + size_t parallelThreadCount = std::min( + std::max( std::thread::hardware_concurrency(), 2 ), + m_blockPositions.size() ); + for( size_t ii = 0; ii < parallelThreadCount; ++ii ) { - - #pragma omp flush(breakLoop) - if( !breakLoop ) + std::thread t = std::thread( [&]() { - bool process_block; - - // std::vector stuffs eight bools to each byte, so access to - // them can never be natively atomic. - #pragma omp critical(checkProcessBlock) + for( size_t iBlock = currentBlock.fetch_add( 1 ); + iBlock < m_blockPositions.size() && !breakLoop; + iBlock = currentBlock.fetch_add( 1 ) ) { - process_block = !m_blockPositionsWasProcessed[iBlock]; - m_blockPositionsWasProcessed[iBlock] = true; - } + if( !m_blockPositionsWasProcessed[iBlock] ) + { + rt_render_trace_block( ptrPBO, iBlock ); + numBlocksRendered++; + m_blockPositionsWasProcessed[iBlock] = 1; - if( process_block ) - { - rt_render_trace_block( ptrPBO, iBlock ); - numBlocksRendered++; - - - // Check if it spend already some time render and request to exit - // to display the progress - #ifdef _OPENMP - if( omp_get_thread_num() == 0 ) - #endif - if( (GetRunningMicroSecs() - startTime) > 150000 ) - { + // Check if it spend already some time render and request to exit + // to display the progress + if( std::chrono::duration_cast( + std::chrono::steady_clock::now() - startTime ).count() > 150 ) breakLoop = true; - #pragma omp flush(breakLoop) - } + } } - } + + threadsFinished++; + } ); + + t.detach(); } + while( threadsFinished < parallelThreadCount ) + std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); + m_nrBlocksRenderProgress += numBlocksRendered; if( aStatusTextReporter ) aStatusTextReporter->Report( wxString::Format( _( "Rendering: %.0f %%" ), (float)(m_nrBlocksRenderProgress * 100) / - (float)nrBlocks ) ); + (float)m_blockPositions.size() ) ); // Check if it finish the rendering and if should continue to a post processing // or mark it as finished - if( m_nrBlocksRenderProgress >= nrBlocks ) + if( m_nrBlocksRenderProgress >= m_blockPositions.size() ) { if( m_settings.GetFlag( FL_RENDER_RAYTRACING_POST_PROCESSING ) ) m_rt_render_state = RT_RENDER_STATE_POST_PROCESS_SHADE; @@ -925,21 +921,35 @@ void C3D_RENDER_RAYTRACING::rt_render_post_process_shade( GLubyte *ptrPBO, if( aStatusTextReporter ) aStatusTextReporter->Report( _("Rendering: Post processing shader") ); - // Compute the shader value - #pragma omp parallel for schedule(dynamic) - for( signed int y = 0; y < (int)m_realBufferSize.y; ++y ) - { - SFVEC3F *ptr = &m_shaderBuffer[ y * m_realBufferSize.x ]; + std::atomic nextBlock( 0 ); + std::atomic threadsFinished( 0 ); - for( signed int x = 0; x < (int)m_realBufferSize.x; ++x ) + size_t parallelThreadCount = std::max( std::thread::hardware_concurrency(), 2 ); + for( size_t ii = 0; ii < parallelThreadCount; ++ii ) + { + std::thread t = std::thread( [&]() { - *ptr = m_postshader_ssao.Shade( SFVEC2I( x, y ) ); - ptr++; - } + for( size_t y = nextBlock.fetch_add( 1 ); + y < m_realBufferSize.y; + y = nextBlock.fetch_add( 1 ) ) + { + SFVEC3F *ptr = &m_shaderBuffer[ y * m_realBufferSize.x ]; + + for( signed int x = 0; x < (int)m_realBufferSize.x; ++x ) + { + *ptr = m_postshader_ssao.Shade( SFVEC2I( x, y ) ); + ptr++; + } + } + + threadsFinished++; + } ); + + t.detach(); } - // Wait for all threads to finish - #pragma omp barrier + while( threadsFinished < parallelThreadCount ) + std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); // Set next state m_rt_render_state = RT_RENDER_STATE_POST_PROCESS_BLUR_AND_FINISH; @@ -960,91 +970,107 @@ void C3D_RENDER_RAYTRACING::rt_render_post_process_blur_finish( GLubyte *ptrPBO, if( m_settings.GetFlag( FL_RENDER_RAYTRACING_POST_PROCESSING ) ) { // Now blurs the shader result and compute the final color - #pragma omp parallel for schedule(dynamic) - for( signed int y = 0; y < (int)m_realBufferSize.y; ++y ) + std::atomic nextBlock( 0 ); + std::atomic threadsFinished( 0 ); + + size_t parallelThreadCount = std::max( std::thread::hardware_concurrency(), 2 ); + for( size_t ii = 0; ii < parallelThreadCount; ++ii ) { - GLubyte *ptr = &ptrPBO[ y * m_realBufferSize.x * 4 ]; - - const SFVEC3F *ptrShaderY0 = - &m_shaderBuffer[ glm::max((int)y - 2, 0) * m_realBufferSize.x ]; - const SFVEC3F *ptrShaderY1 = - &m_shaderBuffer[ glm::max((int)y - 1, 0) * m_realBufferSize.x ]; - const SFVEC3F *ptrShaderY2 = - &m_shaderBuffer[ y * m_realBufferSize.x ]; - const SFVEC3F *ptrShaderY3 = - &m_shaderBuffer[ glm::min((int)y + 1, (int)(m_realBufferSize.y - 1)) * - m_realBufferSize.x ]; - const SFVEC3F *ptrShaderY4 = - &m_shaderBuffer[ glm::min((int)y + 2, (int)(m_realBufferSize.y - 1)) * - m_realBufferSize.x ]; - - for( signed int x = 0; x < (int)m_realBufferSize.x; ++x ) + std::thread t = std::thread( [&]() { -// This #if should be 1, it is here that can be used for debug proposes during development -#if 1 - int idx = x > 1 ? -2 : 0; - SFVEC3F bluredShadeColor = ptrShaderY0[idx] * 1.0f / 273.0f + - ptrShaderY1[idx] * 4.0f / 273.0f + - ptrShaderY2[idx] * 7.0f / 273.0f + - ptrShaderY3[idx] * 4.0f / 273.0f + - ptrShaderY4[idx] * 1.0f / 273.0f; + for( size_t y = nextBlock.fetch_add( 1 ); + y < m_realBufferSize.y; + y = nextBlock.fetch_add( 1 ) ) + { + GLubyte *ptr = &ptrPBO[ y * m_realBufferSize.x * 4 ]; - idx = x > 0 ? -1 : 0; - bluredShadeColor += ptrShaderY0[idx] * 4.0f / 273.0f + - ptrShaderY1[idx] * 16.0f / 273.0f + - ptrShaderY2[idx] * 26.0f / 273.0f + - ptrShaderY3[idx] * 16.0f / 273.0f + - ptrShaderY4[idx] * 4.0f / 273.0f; + const SFVEC3F *ptrShaderY0 = + &m_shaderBuffer[ glm::max((int)y - 2, 0) * m_realBufferSize.x ]; + const SFVEC3F *ptrShaderY1 = + &m_shaderBuffer[ glm::max((int)y - 1, 0) * m_realBufferSize.x ]; + const SFVEC3F *ptrShaderY2 = + &m_shaderBuffer[ y * m_realBufferSize.x ]; + const SFVEC3F *ptrShaderY3 = + &m_shaderBuffer[ glm::min((int)y + 1, (int)(m_realBufferSize.y - 1)) * + m_realBufferSize.x ]; + const SFVEC3F *ptrShaderY4 = + &m_shaderBuffer[ glm::min((int)y + 2, (int)(m_realBufferSize.y - 1)) * + m_realBufferSize.x ]; - bluredShadeColor += (*ptrShaderY0) * 7.0f / 273.0f + - (*ptrShaderY1) * 26.0f / 273.0f + - (*ptrShaderY2) * 41.0f / 273.0f + - (*ptrShaderY3) * 26.0f / 273.0f + - (*ptrShaderY4) * 7.0f / 273.0f; + for( signed int x = 0; x < (int)m_realBufferSize.x; ++x ) + { + // This #if should be 1, it is here that can be used for debug proposes during development + #if 1 + int idx = x > 1 ? -2 : 0; + SFVEC3F bluredShadeColor = ptrShaderY0[idx] * 1.0f / 273.0f + + ptrShaderY1[idx] * 4.0f / 273.0f + + ptrShaderY2[idx] * 7.0f / 273.0f + + ptrShaderY3[idx] * 4.0f / 273.0f + + ptrShaderY4[idx] * 1.0f / 273.0f; - idx = (x < (int)m_realBufferSize.x - 1) ? 1 : 0; - bluredShadeColor += ptrShaderY0[idx] * 4.0f / 273.0f + - ptrShaderY1[idx] *16.0f / 273.0f + - ptrShaderY2[idx] *26.0f / 273.0f + - ptrShaderY3[idx] *16.0f / 273.0f + - ptrShaderY4[idx] * 4.0f / 273.0f; + idx = x > 0 ? -1 : 0; + bluredShadeColor += ptrShaderY0[idx] * 4.0f / 273.0f + + ptrShaderY1[idx] * 16.0f / 273.0f + + ptrShaderY2[idx] * 26.0f / 273.0f + + ptrShaderY3[idx] * 16.0f / 273.0f + + ptrShaderY4[idx] * 4.0f / 273.0f; - idx = (x < (int)m_realBufferSize.x - 2) ? 2 : 0; - bluredShadeColor += ptrShaderY0[idx] * 1.0f / 273.0f + - ptrShaderY1[idx] * 4.0f / 273.0f + - ptrShaderY2[idx] * 7.0f / 273.0f + - ptrShaderY3[idx] * 4.0f / 273.0f + - ptrShaderY4[idx] * 1.0f / 273.0f; + bluredShadeColor += (*ptrShaderY0) * 7.0f / 273.0f + + (*ptrShaderY1) * 26.0f / 273.0f + + (*ptrShaderY2) * 41.0f / 273.0f + + (*ptrShaderY3) * 26.0f / 273.0f + + (*ptrShaderY4) * 7.0f / 273.0f; - // process next pixel - ++ptrShaderY0; - ++ptrShaderY1; - ++ptrShaderY2; - ++ptrShaderY3; - ++ptrShaderY4; + idx = (x < (int)m_realBufferSize.x - 1) ? 1 : 0; + bluredShadeColor += ptrShaderY0[idx] * 4.0f / 273.0f + + ptrShaderY1[idx] *16.0f / 273.0f + + ptrShaderY2[idx] *26.0f / 273.0f + + ptrShaderY3[idx] *16.0f / 273.0f + + ptrShaderY4[idx] * 4.0f / 273.0f; -#ifdef USE_SRGB_SPACE - const SFVEC3F originColor = convertLinearToSRGB( m_postshader_ssao.GetColorAtNotProtected( SFVEC2I( x,y ) ) ); -#else - const SFVEC3F originColor = m_postshader_ssao.GetColorAtNotProtected( SFVEC2I( x,y ) ); -#endif + idx = (x < (int)m_realBufferSize.x - 2) ? 2 : 0; + bluredShadeColor += ptrShaderY0[idx] * 1.0f / 273.0f + + ptrShaderY1[idx] * 4.0f / 273.0f + + ptrShaderY2[idx] * 7.0f / 273.0f + + ptrShaderY3[idx] * 4.0f / 273.0f + + ptrShaderY4[idx] * 1.0f / 273.0f; - const SFVEC3F shadedColor = m_postshader_ssao.ApplyShadeColor( SFVEC2I( x,y ), originColor, bluredShadeColor ); -#else - // Debug code - //const SFVEC3F shadedColor = SFVEC3F( 1.0f ) - - // m_shaderBuffer[ y * m_realBufferSize.x + x]; - const SFVEC3F shadedColor = m_shaderBuffer[ y * m_realBufferSize.x + x ]; -#endif + // process next pixel + ++ptrShaderY0; + ++ptrShaderY1; + ++ptrShaderY2; + ++ptrShaderY3; + ++ptrShaderY4; - rt_final_color( ptr, shadedColor, false ); + #ifdef USE_SRGB_SPACE + const SFVEC3F originColor = convertLinearToSRGB( m_postshader_ssao.GetColorAtNotProtected( SFVEC2I( x,y ) ) ); + #else + const SFVEC3F originColor = m_postshader_ssao.GetColorAtNotProtected( SFVEC2I( x,y ) ); + #endif - ptr += 4; - } + const SFVEC3F shadedColor = m_postshader_ssao.ApplyShadeColor( SFVEC2I( x,y ), originColor, bluredShadeColor ); + #else + // Debug code + //const SFVEC3F shadedColor = SFVEC3F( 1.0f ) - + // m_shaderBuffer[ y * m_realBufferSize.x + x]; + const SFVEC3F shadedColor = m_shaderBuffer[ y * m_realBufferSize.x + x ]; + #endif + + rt_final_color( ptr, shadedColor, false ); + + ptr += 4; + } + } + + threadsFinished++; + } ); + + t.detach(); } - // Wait for all threads to finish - #pragma omp barrier + while( threadsFinished < parallelThreadCount ) + std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); + // Debug code //m_postshader_ssao.DebugBuffersOutputAsImages(); @@ -1059,614 +1085,629 @@ void C3D_RENDER_RAYTRACING::render_preview( GLubyte *ptrPBO ) { m_isPreview = true; - unsigned int nrBlocks = m_blockPositionsFast.size(); + std::atomic nextBlock( 0 ); + std::atomic threadsFinished( 0 ); - #pragma omp parallel for schedule(dynamic) - for( signed int iBlock = 0; iBlock < (int)nrBlocks; iBlock++ ) + size_t parallelThreadCount = std::min( + std::max( std::thread::hardware_concurrency(), 2 ), + m_blockPositions.size() ); + for( size_t ii = 0; ii < parallelThreadCount; ++ii ) { - const SFVEC2UI &windowPosUI = m_blockPositionsFast[ iBlock ]; - const SFVEC2I windowsPos = SFVEC2I( windowPosUI.x + m_xoffset, - windowPosUI.y + m_yoffset ); - - RAYPACKET blockPacket( m_settings.CameraGet(), windowsPos, 4 ); - - HITINFO_PACKET hitPacket[RAYPACKET_RAYS_PER_PACKET]; - - // Initialize hitPacket with a "not hit" information - for( unsigned int i = 0; i < RAYPACKET_RAYS_PER_PACKET; ++i ) + std::thread t = std::thread( [&]() { - hitPacket[i].m_HitInfo.m_tHit = std::numeric_limits::infinity(); - hitPacket[i].m_HitInfo.m_acc_node_info = 0; - hitPacket[i].m_hitresult = false; - } - - // Intersect packet block - m_accelerator->Intersect( blockPacket, hitPacket ); - - - // Calculate background gradient color - // ///////////////////////////////////////////////////////////////////// - SFVEC3F bgColor[RAYPACKET_DIM]; - - for( unsigned int y = 0; y < RAYPACKET_DIM; ++y ) - { - const float posYfactor = (float)(windowsPos.y + y * 4.0f) / (float)m_windowSize.y; - - bgColor[y] = (SFVEC3F)m_settings.m_BgColorTop * SFVEC3F(posYfactor) + - (SFVEC3F)m_settings.m_BgColorBot * ( SFVEC3F(1.0f) - SFVEC3F(posYfactor) ); - } - - CCOLORRGB hitColorShading[RAYPACKET_RAYS_PER_PACKET]; - - for( unsigned int i = 0; i < RAYPACKET_RAYS_PER_PACKET; ++i ) - { - const SFVEC3F bhColorY = bgColor[i / RAYPACKET_DIM]; - - if( hitPacket[i].m_hitresult == true ) + for( size_t iBlock = nextBlock.fetch_add( 1 ); + iBlock < m_blockPositionsFast.size(); + iBlock = nextBlock.fetch_add( 1 ) ) { - const SFVEC3F hitColor = shadeHit( bhColorY, - blockPacket.m_ray[i], - hitPacket[i].m_HitInfo, - false, - 0, - false ); + const SFVEC2UI &windowPosUI = m_blockPositionsFast[ iBlock ]; + const SFVEC2I windowsPos = SFVEC2I( windowPosUI.x + m_xoffset, + windowPosUI.y + m_yoffset ); - hitColorShading[i] = CCOLORRGB( hitColor ); - } - else - hitColorShading[i] = bhColorY; - } + RAYPACKET blockPacket( m_settings.CameraGet(), windowsPos, 4 ); - CCOLORRGB cLRB_old[(RAYPACKET_DIM - 1)]; + HITINFO_PACKET hitPacket[RAYPACKET_RAYS_PER_PACKET]; - for( unsigned int y = 0; y < (RAYPACKET_DIM - 1); ++y ) - { - - const SFVEC3F bgColorY = bgColor[y]; - const CCOLORRGB bgColorYRGB = CCOLORRGB( bgColorY ); - - // This stores cRTB from the last block to be reused next time in a cLTB pixel - CCOLORRGB cRTB_old; - - //RAY cRTB_ray; - //HITINFO cRTB_hitInfo; - - for( unsigned int x = 0; x < (RAYPACKET_DIM - 1); ++x ) - { - // pxl 0 pxl 1 pxl 2 pxl 3 pxl 4 - // x0 x1 ... - // .---------------------------. - // y0 | cLT | cxxx | cLRT | cxxx | cRT | - // | cxxx | cLTC | cxxx | cRTC | cxxx | - // | cLTB | cxxx | cC | cxxx | cRTB | - // | cxxx | cLBC | cxxx | cRBC | cxxx | - // '---------------------------' - // y1 | cLB | cxxx | cLRB | cxxx | cRB | - - const unsigned int iLT = ((x + 0) + RAYPACKET_DIM * (y + 0)); - const unsigned int iRT = ((x + 1) + RAYPACKET_DIM * (y + 0)); - const unsigned int iLB = ((x + 0) + RAYPACKET_DIM * (y + 1)); - const unsigned int iRB = ((x + 1) + RAYPACKET_DIM * (y + 1)); - - // !TODO: skip when there are no hits - - - const CCOLORRGB &cLT = hitColorShading[ iLT ]; - const CCOLORRGB &cRT = hitColorShading[ iRT ]; - const CCOLORRGB &cLB = hitColorShading[ iLB ]; - const CCOLORRGB &cRB = hitColorShading[ iRB ]; - - // Trace and shade cC - // ///////////////////////////////////////////////////////////// - CCOLORRGB cC = bgColorYRGB; - - const SFVEC3F &oriLT = blockPacket.m_ray[ iLT ].m_Origin; - const SFVEC3F &oriRB = blockPacket.m_ray[ iRB ].m_Origin; - - const SFVEC3F &dirLT = blockPacket.m_ray[ iLT ].m_Dir; - const SFVEC3F &dirRB = blockPacket.m_ray[ iRB ].m_Dir; - - SFVEC3F oriC; - SFVEC3F dirC; - - HITINFO centerHitInfo; - centerHitInfo.m_tHit = std::numeric_limits::infinity(); - - bool hittedC = false; - - if( (hitPacket[ iLT ].m_hitresult == true) || - (hitPacket[ iRT ].m_hitresult == true) || - (hitPacket[ iLB ].m_hitresult == true) || - (hitPacket[ iRB ].m_hitresult == true) ) + // Initialize hitPacket with a "not hit" information + for( unsigned int i = 0; i < RAYPACKET_RAYS_PER_PACKET; ++i ) { - - oriC = ( oriLT + oriRB ) * 0.5f; - dirC = glm::normalize( ( dirLT + dirRB ) * 0.5f ); - - // Trace the center ray - RAY centerRay; - centerRay.Init( oriC, dirC ); - - const unsigned int nodeLT = hitPacket[ iLT ].m_HitInfo.m_acc_node_info; - const unsigned int nodeRT = hitPacket[ iRT ].m_HitInfo.m_acc_node_info; - const unsigned int nodeLB = hitPacket[ iLB ].m_HitInfo.m_acc_node_info; - const unsigned int nodeRB = hitPacket[ iRB ].m_HitInfo.m_acc_node_info; - - if( nodeLT != 0 ) - hittedC |= m_accelerator->Intersect( centerRay, centerHitInfo, nodeLT ); - - if( ( nodeRT != 0 ) && - ( nodeRT != nodeLT ) ) - hittedC |= m_accelerator->Intersect( centerRay, centerHitInfo, nodeRT ); - - if( ( nodeLB != 0 ) && - ( nodeLB != nodeLT ) && - ( nodeLB != nodeRT ) ) - hittedC |= m_accelerator->Intersect( centerRay, centerHitInfo, nodeLB ); - - if( ( nodeRB != 0 ) && - ( nodeRB != nodeLB ) && - ( nodeRB != nodeLT ) && - ( nodeRB != nodeRT ) ) - hittedC |= m_accelerator->Intersect( centerRay, centerHitInfo, nodeRB ); - - if( hittedC ) - cC = CCOLORRGB( shadeHit( bgColorY, centerRay, centerHitInfo, false, 0, false ) ); - else - { - centerHitInfo.m_tHit = std::numeric_limits::infinity(); - hittedC = m_accelerator->Intersect( centerRay, centerHitInfo ); - - if( hittedC ) - cC = CCOLORRGB( shadeHit( bgColorY, - centerRay, - centerHitInfo, - false, - 0, - false ) ); - } + hitPacket[i].m_HitInfo.m_tHit = std::numeric_limits::infinity(); + hitPacket[i].m_HitInfo.m_acc_node_info = 0; + hitPacket[i].m_hitresult = false; } - // Trace and shade cLRT - // ///////////////////////////////////////////////////////////// - CCOLORRGB cLRT = bgColorYRGB; + // Intersect packet block + m_accelerator->Intersect( blockPacket, hitPacket ); - const SFVEC3F &oriRT = blockPacket.m_ray[ iRT ].m_Origin; - const SFVEC3F &dirRT = blockPacket.m_ray[ iRT ].m_Dir; - if( y == 0 ) + // Calculate background gradient color + // ///////////////////////////////////////////////////////////////////// + SFVEC3F bgColor[RAYPACKET_DIM]; + + for( unsigned int y = 0; y < RAYPACKET_DIM; ++y ) { - // Trace the center ray - RAY rayLRT; - rayLRT.Init( ( oriLT + oriRT ) * 0.5f, - glm::normalize( ( dirLT + dirRT ) * 0.5f ) ); + const float posYfactor = (float)(windowsPos.y + y * 4.0f) / (float)m_windowSize.y; - HITINFO hitInfoLRT; - hitInfoLRT.m_tHit = std::numeric_limits::infinity(); + bgColor[y] = (SFVEC3F)m_settings.m_BgColorTop * SFVEC3F(posYfactor) + + (SFVEC3F)m_settings.m_BgColorBot * ( SFVEC3F(1.0f) - SFVEC3F(posYfactor) ); + } - if( hitPacket[ iLT ].m_hitresult && - hitPacket[ iRT ].m_hitresult && - (hitPacket[ iLT ].m_HitInfo.pHitObject == hitPacket[ iRT ].m_HitInfo.pHitObject) ) + CCOLORRGB hitColorShading[RAYPACKET_RAYS_PER_PACKET]; + + for( unsigned int i = 0; i < RAYPACKET_RAYS_PER_PACKET; ++i ) + { + const SFVEC3F bhColorY = bgColor[i / RAYPACKET_DIM]; + + if( hitPacket[i].m_hitresult == true ) { - hitInfoLRT.pHitObject = hitPacket[ iLT ].m_HitInfo.pHitObject; - hitInfoLRT.m_tHit = ( hitPacket[ iLT ].m_HitInfo.m_tHit + - hitPacket[ iRT ].m_HitInfo.m_tHit ) * 0.5f; - hitInfoLRT.m_HitNormal = - glm::normalize( ( hitPacket[ iLT ].m_HitInfo.m_HitNormal + - hitPacket[ iRT ].m_HitInfo.m_HitNormal ) * 0.5f ); + const SFVEC3F hitColor = shadeHit( bhColorY, + blockPacket.m_ray[i], + hitPacket[i].m_HitInfo, + false, + 0, + false ); - cLRT = CCOLORRGB( shadeHit( bgColorY, rayLRT, hitInfoLRT, false, 0, false ) ); - cLRT = BlendColor( cLRT, BlendColor( cLT, cRT) ); + hitColorShading[i] = CCOLORRGB( hitColor ); } else + hitColorShading[i] = bhColorY; + } + + CCOLORRGB cLRB_old[(RAYPACKET_DIM - 1)]; + + for( unsigned int y = 0; y < (RAYPACKET_DIM - 1); ++y ) + { + + const SFVEC3F bgColorY = bgColor[y]; + const CCOLORRGB bgColorYRGB = CCOLORRGB( bgColorY ); + + // This stores cRTB from the last block to be reused next time in a cLTB pixel + CCOLORRGB cRTB_old; + + //RAY cRTB_ray; + //HITINFO cRTB_hitInfo; + + for( unsigned int x = 0; x < (RAYPACKET_DIM - 1); ++x ) { - if( hitPacket[ iLT ].m_hitresult || - hitPacket[ iRT ].m_hitresult ) // If any hits + // pxl 0 pxl 1 pxl 2 pxl 3 pxl 4 + // x0 x1 ... + // .---------------------------. + // y0 | cLT | cxxx | cLRT | cxxx | cRT | + // | cxxx | cLTC | cxxx | cRTC | cxxx | + // | cLTB | cxxx | cC | cxxx | cRTB | + // | cxxx | cLBC | cxxx | cRBC | cxxx | + // '---------------------------' + // y1 | cLB | cxxx | cLRB | cxxx | cRB | + + const unsigned int iLT = ((x + 0) + RAYPACKET_DIM * (y + 0)); + const unsigned int iRT = ((x + 1) + RAYPACKET_DIM * (y + 0)); + const unsigned int iLB = ((x + 0) + RAYPACKET_DIM * (y + 1)); + const unsigned int iRB = ((x + 1) + RAYPACKET_DIM * (y + 1)); + + // !TODO: skip when there are no hits + + + const CCOLORRGB &cLT = hitColorShading[ iLT ]; + const CCOLORRGB &cRT = hitColorShading[ iRT ]; + const CCOLORRGB &cLB = hitColorShading[ iLB ]; + const CCOLORRGB &cRB = hitColorShading[ iRB ]; + + // Trace and shade cC + // ///////////////////////////////////////////////////////////// + CCOLORRGB cC = bgColorYRGB; + + const SFVEC3F &oriLT = blockPacket.m_ray[ iLT ].m_Origin; + const SFVEC3F &oriRB = blockPacket.m_ray[ iRB ].m_Origin; + + const SFVEC3F &dirLT = blockPacket.m_ray[ iLT ].m_Dir; + const SFVEC3F &dirRB = blockPacket.m_ray[ iRB ].m_Dir; + + SFVEC3F oriC; + SFVEC3F dirC; + + HITINFO centerHitInfo; + centerHitInfo.m_tHit = std::numeric_limits::infinity(); + + bool hittedC = false; + + if( (hitPacket[ iLT ].m_hitresult == true) || + (hitPacket[ iRT ].m_hitresult == true) || + (hitPacket[ iLB ].m_hitresult == true) || + (hitPacket[ iRB ].m_hitresult == true) ) { + + oriC = ( oriLT + oriRB ) * 0.5f; + dirC = glm::normalize( ( dirLT + dirRB ) * 0.5f ); + + // Trace the center ray + RAY centerRay; + centerRay.Init( oriC, dirC ); + const unsigned int nodeLT = hitPacket[ iLT ].m_HitInfo.m_acc_node_info; const unsigned int nodeRT = hitPacket[ iRT ].m_HitInfo.m_acc_node_info; - - bool hittedLRT = false; + const unsigned int nodeLB = hitPacket[ iLB ].m_HitInfo.m_acc_node_info; + const unsigned int nodeRB = hitPacket[ iRB ].m_HitInfo.m_acc_node_info; if( nodeLT != 0 ) - hittedLRT |= m_accelerator->Intersect( rayLRT, hitInfoLRT, nodeLT ); + hittedC |= m_accelerator->Intersect( centerRay, centerHitInfo, nodeLT ); if( ( nodeRT != 0 ) && ( nodeRT != nodeLT ) ) - hittedLRT |= m_accelerator->Intersect( rayLRT, - hitInfoLRT, - nodeRT ); - - if( hittedLRT ) - cLRT = CCOLORRGB( shadeHit( bgColorY, - rayLRT, - hitInfoLRT, - false, - 0, - false ) ); - else - { - hitInfoLRT.m_tHit = std::numeric_limits::infinity(); - - if( m_accelerator->Intersect( rayLRT,hitInfoLRT ) ) - cLRT = CCOLORRGB( shadeHit( bgColorY, - rayLRT, - hitInfoLRT, - false, - 0, - false ) ); - } - } - } - } - else - cLRT = cLRB_old[x]; - - - // Trace and shade cLTB - // ///////////////////////////////////////////////////////////// - CCOLORRGB cLTB = bgColorYRGB; - - if( x == 0 ) - { - const SFVEC3F &oriLB = blockPacket.m_ray[ iLB ].m_Origin; - const SFVEC3F &dirLB = blockPacket.m_ray[ iLB ].m_Dir; - - // Trace the center ray - RAY rayLTB; - rayLTB.Init( ( oriLT + oriLB ) * 0.5f, - glm::normalize( ( dirLT + dirLB ) * 0.5f ) ); - - HITINFO hitInfoLTB; - hitInfoLTB.m_tHit = std::numeric_limits::infinity(); - - if( hitPacket[ iLT ].m_hitresult && - hitPacket[ iLB ].m_hitresult && - ( hitPacket[ iLT ].m_HitInfo.pHitObject == - hitPacket[ iLB ].m_HitInfo.pHitObject ) ) - { - hitInfoLTB.pHitObject = hitPacket[ iLT ].m_HitInfo.pHitObject; - hitInfoLTB.m_tHit = ( hitPacket[ iLT ].m_HitInfo.m_tHit + - hitPacket[ iLB ].m_HitInfo.m_tHit ) * 0.5f; - hitInfoLTB.m_HitNormal = - glm::normalize( ( hitPacket[ iLT ].m_HitInfo.m_HitNormal + - hitPacket[ iLB ].m_HitInfo.m_HitNormal ) * 0.5f ); - cLTB = CCOLORRGB( shadeHit( bgColorY, rayLTB, hitInfoLTB, false, 0, false ) ); - cLTB = BlendColor( cLTB, BlendColor( cLT, cLB) ); - } - else - { - if( hitPacket[ iLT ].m_hitresult || - hitPacket[ iLB ].m_hitresult ) // If any hits - { - const unsigned int nodeLT = hitPacket[ iLT ].m_HitInfo.m_acc_node_info; - const unsigned int nodeLB = hitPacket[ iLB ].m_HitInfo.m_acc_node_info; - - bool hittedLTB = false; - - if( nodeLT != 0 ) - hittedLTB |= m_accelerator->Intersect( rayLTB, - hitInfoLTB, - nodeLT ); + hittedC |= m_accelerator->Intersect( centerRay, centerHitInfo, nodeRT ); if( ( nodeLB != 0 ) && - ( nodeLB != nodeLT ) ) - hittedLTB |= m_accelerator->Intersect( rayLTB, - hitInfoLTB, - nodeLB ); + ( nodeLB != nodeLT ) && + ( nodeLB != nodeRT ) ) + hittedC |= m_accelerator->Intersect( centerRay, centerHitInfo, nodeLB ); - if( hittedLTB ) - cLTB = CCOLORRGB( shadeHit( bgColorY, - rayLTB, - hitInfoLTB, - false, - 0, - false ) ); + if( ( nodeRB != 0 ) && + ( nodeRB != nodeLB ) && + ( nodeRB != nodeLT ) && + ( nodeRB != nodeRT ) ) + hittedC |= m_accelerator->Intersect( centerRay, centerHitInfo, nodeRB ); + + if( hittedC ) + cC = CCOLORRGB( shadeHit( bgColorY, centerRay, centerHitInfo, false, 0, false ) ); else { - hitInfoLTB.m_tHit = std::numeric_limits::infinity(); + centerHitInfo.m_tHit = std::numeric_limits::infinity(); + hittedC = m_accelerator->Intersect( centerRay, centerHitInfo ); - if( m_accelerator->Intersect( rayLTB, hitInfoLTB ) ) - cLTB = CCOLORRGB( shadeHit( bgColorY, - rayLTB, - hitInfoLTB, - false, - 0, - false ) ); + if( hittedC ) + cC = CCOLORRGB( shadeHit( bgColorY, + centerRay, + centerHitInfo, + false, + 0, + false ) ); } } - } - } - else - cLTB = cRTB_old; + + // Trace and shade cLRT + // ///////////////////////////////////////////////////////////// + CCOLORRGB cLRT = bgColorYRGB; + + const SFVEC3F &oriRT = blockPacket.m_ray[ iRT ].m_Origin; + const SFVEC3F &dirRT = blockPacket.m_ray[ iRT ].m_Dir; + + if( y == 0 ) + { + // Trace the center ray + RAY rayLRT; + rayLRT.Init( ( oriLT + oriRT ) * 0.5f, + glm::normalize( ( dirLT + dirRT ) * 0.5f ) ); + + HITINFO hitInfoLRT; + hitInfoLRT.m_tHit = std::numeric_limits::infinity(); + + if( hitPacket[ iLT ].m_hitresult && + hitPacket[ iRT ].m_hitresult && + (hitPacket[ iLT ].m_HitInfo.pHitObject == hitPacket[ iRT ].m_HitInfo.pHitObject) ) + { + hitInfoLRT.pHitObject = hitPacket[ iLT ].m_HitInfo.pHitObject; + hitInfoLRT.m_tHit = ( hitPacket[ iLT ].m_HitInfo.m_tHit + + hitPacket[ iRT ].m_HitInfo.m_tHit ) * 0.5f; + hitInfoLRT.m_HitNormal = + glm::normalize( ( hitPacket[ iLT ].m_HitInfo.m_HitNormal + + hitPacket[ iRT ].m_HitInfo.m_HitNormal ) * 0.5f ); + + cLRT = CCOLORRGB( shadeHit( bgColorY, rayLRT, hitInfoLRT, false, 0, false ) ); + cLRT = BlendColor( cLRT, BlendColor( cLT, cRT) ); + } + else + { + if( hitPacket[ iLT ].m_hitresult || + hitPacket[ iRT ].m_hitresult ) // If any hits + { + const unsigned int nodeLT = hitPacket[ iLT ].m_HitInfo.m_acc_node_info; + const unsigned int nodeRT = hitPacket[ iRT ].m_HitInfo.m_acc_node_info; + + bool hittedLRT = false; + + if( nodeLT != 0 ) + hittedLRT |= m_accelerator->Intersect( rayLRT, hitInfoLRT, nodeLT ); + + if( ( nodeRT != 0 ) && + ( nodeRT != nodeLT ) ) + hittedLRT |= m_accelerator->Intersect( rayLRT, + hitInfoLRT, + nodeRT ); + + if( hittedLRT ) + cLRT = CCOLORRGB( shadeHit( bgColorY, + rayLRT, + hitInfoLRT, + false, + 0, + false ) ); + else + { + hitInfoLRT.m_tHit = std::numeric_limits::infinity(); + + if( m_accelerator->Intersect( rayLRT,hitInfoLRT ) ) + cLRT = CCOLORRGB( shadeHit( bgColorY, + rayLRT, + hitInfoLRT, + false, + 0, + false ) ); + } + } + } + } + else + cLRT = cLRB_old[x]; - // Trace and shade cRTB - // ///////////////////////////////////////////////////////////// - CCOLORRGB cRTB = bgColorYRGB; + // Trace and shade cLTB + // ///////////////////////////////////////////////////////////// + CCOLORRGB cLTB = bgColorYRGB; - // Trace the center ray - RAY rayRTB; - rayRTB.Init( ( oriRT + oriRB ) * 0.5f, - glm::normalize( ( dirRT + dirRB ) * 0.5f ) ); + if( x == 0 ) + { + const SFVEC3F &oriLB = blockPacket.m_ray[ iLB ].m_Origin; + const SFVEC3F &dirLB = blockPacket.m_ray[ iLB ].m_Dir; - HITINFO hitInfoRTB; - hitInfoRTB.m_tHit = std::numeric_limits::infinity(); + // Trace the center ray + RAY rayLTB; + rayLTB.Init( ( oriLT + oriLB ) * 0.5f, + glm::normalize( ( dirLT + dirLB ) * 0.5f ) ); - if( hitPacket[ iRT ].m_hitresult && - hitPacket[ iRB ].m_hitresult && - ( hitPacket[ iRT ].m_HitInfo.pHitObject == - hitPacket[ iRB ].m_HitInfo.pHitObject ) ) - { - hitInfoRTB.pHitObject = hitPacket[ iRT ].m_HitInfo.pHitObject; + HITINFO hitInfoLTB; + hitInfoLTB.m_tHit = std::numeric_limits::infinity(); - hitInfoRTB.m_tHit = ( hitPacket[ iRT ].m_HitInfo.m_tHit + - hitPacket[ iRB ].m_HitInfo.m_tHit ) * 0.5f; + if( hitPacket[ iLT ].m_hitresult && + hitPacket[ iLB ].m_hitresult && + ( hitPacket[ iLT ].m_HitInfo.pHitObject == + hitPacket[ iLB ].m_HitInfo.pHitObject ) ) + { + hitInfoLTB.pHitObject = hitPacket[ iLT ].m_HitInfo.pHitObject; + hitInfoLTB.m_tHit = ( hitPacket[ iLT ].m_HitInfo.m_tHit + + hitPacket[ iLB ].m_HitInfo.m_tHit ) * 0.5f; + hitInfoLTB.m_HitNormal = + glm::normalize( ( hitPacket[ iLT ].m_HitInfo.m_HitNormal + + hitPacket[ iLB ].m_HitInfo.m_HitNormal ) * 0.5f ); + cLTB = CCOLORRGB( shadeHit( bgColorY, rayLTB, hitInfoLTB, false, 0, false ) ); + cLTB = BlendColor( cLTB, BlendColor( cLT, cLB) ); + } + else + { + if( hitPacket[ iLT ].m_hitresult || + hitPacket[ iLB ].m_hitresult ) // If any hits + { + const unsigned int nodeLT = hitPacket[ iLT ].m_HitInfo.m_acc_node_info; + const unsigned int nodeLB = hitPacket[ iLB ].m_HitInfo.m_acc_node_info; - hitInfoRTB.m_HitNormal = - glm::normalize( ( hitPacket[ iRT ].m_HitInfo.m_HitNormal + - hitPacket[ iRB ].m_HitInfo.m_HitNormal ) * 0.5f ); + bool hittedLTB = false; - cRTB = CCOLORRGB( shadeHit( bgColorY, rayRTB, hitInfoRTB, false, 0, false ) ); - cRTB = BlendColor( cRTB, BlendColor( cRT, cRB) ); - } - else - { - if( hitPacket[ iRT ].m_hitresult || - hitPacket[ iRB ].m_hitresult ) // If any hits - { - const unsigned int nodeRT = hitPacket[ iRT ].m_HitInfo.m_acc_node_info; - const unsigned int nodeRB = hitPacket[ iRB ].m_HitInfo.m_acc_node_info; + if( nodeLT != 0 ) + hittedLTB |= m_accelerator->Intersect( rayLTB, + hitInfoLTB, + nodeLT ); - bool hittedRTB = false; + if( ( nodeLB != 0 ) && + ( nodeLB != nodeLT ) ) + hittedLTB |= m_accelerator->Intersect( rayLTB, + hitInfoLTB, + nodeLB ); - if( nodeRT != 0 ) - hittedRTB |= m_accelerator->Intersect( rayRTB, hitInfoRTB, nodeRT ); + if( hittedLTB ) + cLTB = CCOLORRGB( shadeHit( bgColorY, + rayLTB, + hitInfoLTB, + false, + 0, + false ) ); + else + { + hitInfoLTB.m_tHit = std::numeric_limits::infinity(); - if( ( nodeRB != 0 ) && - ( nodeRB != nodeRT ) ) - hittedRTB |= m_accelerator->Intersect( rayRTB, hitInfoRTB, nodeRB ); + if( m_accelerator->Intersect( rayLTB, hitInfoLTB ) ) + cLTB = CCOLORRGB( shadeHit( bgColorY, + rayLTB, + hitInfoLTB, + false, + 0, + false ) ); + } + } + } + } + else + cLTB = cRTB_old; - if( hittedRTB ) - cRTB = CCOLORRGB( shadeHit( bgColorY, - rayRTB, - hitInfoRTB, - false, - 0, - false) ); + + // Trace and shade cRTB + // ///////////////////////////////////////////////////////////// + CCOLORRGB cRTB = bgColorYRGB; + + // Trace the center ray + RAY rayRTB; + rayRTB.Init( ( oriRT + oriRB ) * 0.5f, + glm::normalize( ( dirRT + dirRB ) * 0.5f ) ); + + HITINFO hitInfoRTB; + hitInfoRTB.m_tHit = std::numeric_limits::infinity(); + + if( hitPacket[ iRT ].m_hitresult && + hitPacket[ iRB ].m_hitresult && + ( hitPacket[ iRT ].m_HitInfo.pHitObject == + hitPacket[ iRB ].m_HitInfo.pHitObject ) ) + { + hitInfoRTB.pHitObject = hitPacket[ iRT ].m_HitInfo.pHitObject; + + hitInfoRTB.m_tHit = ( hitPacket[ iRT ].m_HitInfo.m_tHit + + hitPacket[ iRB ].m_HitInfo.m_tHit ) * 0.5f; + + hitInfoRTB.m_HitNormal = + glm::normalize( ( hitPacket[ iRT ].m_HitInfo.m_HitNormal + + hitPacket[ iRB ].m_HitInfo.m_HitNormal ) * 0.5f ); + + cRTB = CCOLORRGB( shadeHit( bgColorY, rayRTB, hitInfoRTB, false, 0, false ) ); + cRTB = BlendColor( cRTB, BlendColor( cRT, cRB) ); + } else { - hitInfoRTB.m_tHit = std::numeric_limits::infinity(); + if( hitPacket[ iRT ].m_hitresult || + hitPacket[ iRB ].m_hitresult ) // If any hits + { + const unsigned int nodeRT = hitPacket[ iRT ].m_HitInfo.m_acc_node_info; + const unsigned int nodeRB = hitPacket[ iRB ].m_HitInfo.m_acc_node_info; - if( m_accelerator->Intersect( rayRTB, hitInfoRTB ) ) - cRTB = CCOLORRGB( shadeHit( bgColorY, - rayRTB, - hitInfoRTB, - false, - 0, - false ) ); + bool hittedRTB = false; + + if( nodeRT != 0 ) + hittedRTB |= m_accelerator->Intersect( rayRTB, hitInfoRTB, nodeRT ); + + if( ( nodeRB != 0 ) && + ( nodeRB != nodeRT ) ) + hittedRTB |= m_accelerator->Intersect( rayRTB, hitInfoRTB, nodeRB ); + + if( hittedRTB ) + cRTB = CCOLORRGB( shadeHit( bgColorY, + rayRTB, + hitInfoRTB, + false, + 0, + false) ); + else + { + hitInfoRTB.m_tHit = std::numeric_limits::infinity(); + + if( m_accelerator->Intersect( rayRTB, hitInfoRTB ) ) + cRTB = CCOLORRGB( shadeHit( bgColorY, + rayRTB, + hitInfoRTB, + false, + 0, + false ) ); + } + } } - } - } - cRTB_old = cRTB; + cRTB_old = cRTB; - // Trace and shade cLRB - // ///////////////////////////////////////////////////////////// - CCOLORRGB cLRB = bgColorYRGB; + // Trace and shade cLRB + // ///////////////////////////////////////////////////////////// + CCOLORRGB cLRB = bgColorYRGB; - const SFVEC3F &oriLB = blockPacket.m_ray[ iLB ].m_Origin; - const SFVEC3F &dirLB = blockPacket.m_ray[ iLB ].m_Dir; + const SFVEC3F &oriLB = blockPacket.m_ray[ iLB ].m_Origin; + const SFVEC3F &dirLB = blockPacket.m_ray[ iLB ].m_Dir; - // Trace the center ray - RAY rayLRB; - rayLRB.Init( ( oriLB + oriRB ) * 0.5f, - glm::normalize( ( dirLB + dirRB ) * 0.5f ) ); + // Trace the center ray + RAY rayLRB; + rayLRB.Init( ( oriLB + oriRB ) * 0.5f, + glm::normalize( ( dirLB + dirRB ) * 0.5f ) ); - HITINFO hitInfoLRB; - hitInfoLRB.m_tHit = std::numeric_limits::infinity(); + HITINFO hitInfoLRB; + hitInfoLRB.m_tHit = std::numeric_limits::infinity(); - if( hitPacket[ iLB ].m_hitresult && - hitPacket[ iRB ].m_hitresult && - ( hitPacket[ iLB ].m_HitInfo.pHitObject == - hitPacket[ iRB ].m_HitInfo.pHitObject ) ) - { - hitInfoLRB.pHitObject = hitPacket[ iLB ].m_HitInfo.pHitObject; + if( hitPacket[ iLB ].m_hitresult && + hitPacket[ iRB ].m_hitresult && + ( hitPacket[ iLB ].m_HitInfo.pHitObject == + hitPacket[ iRB ].m_HitInfo.pHitObject ) ) + { + hitInfoLRB.pHitObject = hitPacket[ iLB ].m_HitInfo.pHitObject; - hitInfoLRB.m_tHit = ( hitPacket[ iLB ].m_HitInfo.m_tHit + - hitPacket[ iRB ].m_HitInfo.m_tHit ) * 0.5f; + hitInfoLRB.m_tHit = ( hitPacket[ iLB ].m_HitInfo.m_tHit + + hitPacket[ iRB ].m_HitInfo.m_tHit ) * 0.5f; - hitInfoLRB.m_HitNormal = - glm::normalize( ( hitPacket[ iLB ].m_HitInfo.m_HitNormal + - hitPacket[ iRB ].m_HitInfo.m_HitNormal ) * 0.5f ); + hitInfoLRB.m_HitNormal = + glm::normalize( ( hitPacket[ iLB ].m_HitInfo.m_HitNormal + + hitPacket[ iRB ].m_HitInfo.m_HitNormal ) * 0.5f ); - cLRB = CCOLORRGB( shadeHit( bgColorY, rayLRB, hitInfoLRB, false, 0, false ) ); - cLRB = BlendColor( cLRB, BlendColor( cLB, cRB) ); - } - else - { - if( hitPacket[ iLB ].m_hitresult || - hitPacket[ iRB ].m_hitresult ) // If any hits - { - const unsigned int nodeLB = hitPacket[ iLB ].m_HitInfo.m_acc_node_info; - const unsigned int nodeRB = hitPacket[ iRB ].m_HitInfo.m_acc_node_info; - - bool hittedLRB = false; - - if( nodeLB != 0 ) - hittedLRB |= m_accelerator->Intersect( rayLRB, hitInfoLRB, nodeLB ); - - if( ( nodeRB != 0 ) && - ( nodeRB != nodeLB ) ) - hittedLRB |= m_accelerator->Intersect( rayLRB, hitInfoLRB, nodeRB ); - - if( hittedLRB ) cLRB = CCOLORRGB( shadeHit( bgColorY, rayLRB, hitInfoLRB, false, 0, false ) ); + cLRB = BlendColor( cLRB, BlendColor( cLB, cRB) ); + } else { - hitInfoLRB.m_tHit = std::numeric_limits::infinity(); + if( hitPacket[ iLB ].m_hitresult || + hitPacket[ iRB ].m_hitresult ) // If any hits + { + const unsigned int nodeLB = hitPacket[ iLB ].m_HitInfo.m_acc_node_info; + const unsigned int nodeRB = hitPacket[ iRB ].m_HitInfo.m_acc_node_info; - if( m_accelerator->Intersect( rayLRB, hitInfoLRB ) ) - cLRB = CCOLORRGB( shadeHit( bgColorY, - rayLRB, - hitInfoLRB, - false, - 0, - false ) ); + bool hittedLRB = false; + + if( nodeLB != 0 ) + hittedLRB |= m_accelerator->Intersect( rayLRB, hitInfoLRB, nodeLB ); + + if( ( nodeRB != 0 ) && + ( nodeRB != nodeLB ) ) + hittedLRB |= m_accelerator->Intersect( rayLRB, hitInfoLRB, nodeRB ); + + if( hittedLRB ) + cLRB = CCOLORRGB( shadeHit( bgColorY, rayLRB, hitInfoLRB, false, 0, false ) ); + else + { + hitInfoLRB.m_tHit = std::numeric_limits::infinity(); + + if( m_accelerator->Intersect( rayLRB, hitInfoLRB ) ) + cLRB = CCOLORRGB( shadeHit( bgColorY, + rayLRB, + hitInfoLRB, + false, + 0, + false ) ); + } + } } + + cLRB_old[x] = cLRB; + + + // Trace and shade cLTC + // ///////////////////////////////////////////////////////////// + CCOLORRGB cLTC = BlendColor( cLT , cC ); + + if( hitPacket[ iLT ].m_hitresult || hittedC ) + { + // Trace the center ray + RAY rayLTC; + rayLTC.Init( ( oriLT + oriC ) * 0.5f, + glm::normalize( ( dirLT + dirC ) * 0.5f ) ); + + HITINFO hitInfoLTC; + hitInfoLTC.m_tHit = std::numeric_limits::infinity(); + + bool hitted = false; + + if( hittedC ) + hitted = centerHitInfo.pHitObject->Intersect( rayLTC, hitInfoLTC ); + else + if( hitPacket[ iLT ].m_hitresult ) + hitted = hitPacket[ iLT ].m_HitInfo.pHitObject->Intersect( rayLTC, + hitInfoLTC ); + + if( hitted ) + cLTC = CCOLORRGB( shadeHit( bgColorY, rayLTC, hitInfoLTC, false, 0, false ) ); + } + + + // Trace and shade cRTC + // ///////////////////////////////////////////////////////////// + CCOLORRGB cRTC = BlendColor( cRT , cC ); + + if( hitPacket[ iRT ].m_hitresult || hittedC ) + { + // Trace the center ray + RAY rayRTC; + rayRTC.Init( ( oriRT + oriC ) * 0.5f, + glm::normalize( ( dirRT + dirC ) * 0.5f ) ); + + HITINFO hitInfoRTC; + hitInfoRTC.m_tHit = std::numeric_limits::infinity(); + + bool hitted = false; + + if( hittedC ) + hitted = centerHitInfo.pHitObject->Intersect( rayRTC, hitInfoRTC ); + else + if( hitPacket[ iRT ].m_hitresult ) + hitted = hitPacket[ iRT ].m_HitInfo.pHitObject->Intersect( rayRTC, + hitInfoRTC ); + + if( hitted ) + cRTC = CCOLORRGB( shadeHit( bgColorY, rayRTC, hitInfoRTC, false, 0, false ) ); + } + + + // Trace and shade cLBC + // ///////////////////////////////////////////////////////////// + CCOLORRGB cLBC = BlendColor( cLB , cC ); + + if( hitPacket[ iLB ].m_hitresult || hittedC ) + { + // Trace the center ray + RAY rayLBC; + rayLBC.Init( ( oriLB + oriC ) * 0.5f, + glm::normalize( ( dirLB + dirC ) * 0.5f ) ); + + HITINFO hitInfoLBC; + hitInfoLBC.m_tHit = std::numeric_limits::infinity(); + + bool hitted = false; + + if( hittedC ) + hitted = centerHitInfo.pHitObject->Intersect( rayLBC, hitInfoLBC ); + else + if( hitPacket[ iLB ].m_hitresult ) + hitted = hitPacket[ iLB ].m_HitInfo.pHitObject->Intersect( rayLBC, + hitInfoLBC ); + + if( hitted ) + cLBC = CCOLORRGB( shadeHit( bgColorY, rayLBC, hitInfoLBC, false, 0, false ) ); + } + + + // Trace and shade cRBC + // ///////////////////////////////////////////////////////////// + CCOLORRGB cRBC = BlendColor( cRB , cC ); + + if( hitPacket[ iRB ].m_hitresult || hittedC ) + { + // Trace the center ray + RAY rayRBC; + rayRBC.Init( ( oriRB + oriC ) * 0.5f, + glm::normalize( ( dirRB + dirC ) * 0.5f ) ); + + HITINFO hitInfoRBC; + hitInfoRBC.m_tHit = std::numeric_limits::infinity(); + + bool hitted = false; + + if( hittedC ) + hitted = centerHitInfo.pHitObject->Intersect( rayRBC, hitInfoRBC ); + else + if( hitPacket[ iRB ].m_hitresult ) + hitted = hitPacket[ iRB ].m_HitInfo.pHitObject->Intersect( rayRBC, + hitInfoRBC ); + + if( hitted ) + cRBC = CCOLORRGB( shadeHit( bgColorY, rayRBC, hitInfoRBC, false, 0, false ) ); + } + + + // Set pixel colors + // ///////////////////////////////////////////////////////////// + + GLubyte *ptr = &ptrPBO[ (4 * x + m_blockPositionsFast[iBlock].x + + m_realBufferSize.x * + (m_blockPositionsFast[iBlock].y + 4 * y)) * 4 ]; + SetPixel( ptr + 0, cLT ); + SetPixel( ptr + 4, BlendColor( cLT, cLRT, cLTC ) ); + SetPixel( ptr + 8, cLRT ); + SetPixel( ptr + 12, BlendColor( cLRT, cRT, cRTC ) ); + + ptr += m_realBufferSize.x * 4; + SetPixel( ptr + 0, BlendColor( cLT , cLTB, cLTC ) ); + SetPixel( ptr + 4, BlendColor( cLTC, BlendColor( cLT , cC ) ) ); + SetPixel( ptr + 8, BlendColor( cC, BlendColor( cLRT, cLTC, cRTC ) ) ); + SetPixel( ptr + 12, BlendColor( cRTC, BlendColor( cRT , cC ) ) ); + + ptr += m_realBufferSize.x * 4; + SetPixel( ptr + 0, cLTB ); + SetPixel( ptr + 4, BlendColor( cC, BlendColor( cLTB, cLTC, cLBC ) ) ); + SetPixel( ptr + 8, cC ); + SetPixel( ptr + 12, BlendColor( cC, BlendColor( cRTB, cRTC, cRBC ) ) ); + + ptr += m_realBufferSize.x * 4; + SetPixel( ptr + 0, BlendColor( cLB , cLTB, cLBC ) ); + SetPixel( ptr + 4, BlendColor( cLBC, BlendColor( cLB , cC ) ) ); + SetPixel( ptr + 8, BlendColor( cC, BlendColor( cLRB, cLBC, cRBC ) ) ); + SetPixel( ptr + 12, BlendColor( cRBC, BlendColor( cRB , cC ) ) ); } } - - cLRB_old[x] = cLRB; - - - // Trace and shade cLTC - // ///////////////////////////////////////////////////////////// - CCOLORRGB cLTC = BlendColor( cLT , cC ); - - if( hitPacket[ iLT ].m_hitresult || hittedC ) - { - // Trace the center ray - RAY rayLTC; - rayLTC.Init( ( oriLT + oriC ) * 0.5f, - glm::normalize( ( dirLT + dirC ) * 0.5f ) ); - - HITINFO hitInfoLTC; - hitInfoLTC.m_tHit = std::numeric_limits::infinity(); - - bool hitted = false; - - if( hittedC ) - hitted = centerHitInfo.pHitObject->Intersect( rayLTC, hitInfoLTC ); - else - if( hitPacket[ iLT ].m_hitresult ) - hitted = hitPacket[ iLT ].m_HitInfo.pHitObject->Intersect( rayLTC, - hitInfoLTC ); - - if( hitted ) - cLTC = CCOLORRGB( shadeHit( bgColorY, rayLTC, hitInfoLTC, false, 0, false ) ); - } - - - // Trace and shade cRTC - // ///////////////////////////////////////////////////////////// - CCOLORRGB cRTC = BlendColor( cRT , cC ); - - if( hitPacket[ iRT ].m_hitresult || hittedC ) - { - // Trace the center ray - RAY rayRTC; - rayRTC.Init( ( oriRT + oriC ) * 0.5f, - glm::normalize( ( dirRT + dirC ) * 0.5f ) ); - - HITINFO hitInfoRTC; - hitInfoRTC.m_tHit = std::numeric_limits::infinity(); - - bool hitted = false; - - if( hittedC ) - hitted = centerHitInfo.pHitObject->Intersect( rayRTC, hitInfoRTC ); - else - if( hitPacket[ iRT ].m_hitresult ) - hitted = hitPacket[ iRT ].m_HitInfo.pHitObject->Intersect( rayRTC, - hitInfoRTC ); - - if( hitted ) - cRTC = CCOLORRGB( shadeHit( bgColorY, rayRTC, hitInfoRTC, false, 0, false ) ); - } - - - // Trace and shade cLBC - // ///////////////////////////////////////////////////////////// - CCOLORRGB cLBC = BlendColor( cLB , cC ); - - if( hitPacket[ iLB ].m_hitresult || hittedC ) - { - // Trace the center ray - RAY rayLBC; - rayLBC.Init( ( oriLB + oriC ) * 0.5f, - glm::normalize( ( dirLB + dirC ) * 0.5f ) ); - - HITINFO hitInfoLBC; - hitInfoLBC.m_tHit = std::numeric_limits::infinity(); - - bool hitted = false; - - if( hittedC ) - hitted = centerHitInfo.pHitObject->Intersect( rayLBC, hitInfoLBC ); - else - if( hitPacket[ iLB ].m_hitresult ) - hitted = hitPacket[ iLB ].m_HitInfo.pHitObject->Intersect( rayLBC, - hitInfoLBC ); - - if( hitted ) - cLBC = CCOLORRGB( shadeHit( bgColorY, rayLBC, hitInfoLBC, false, 0, false ) ); - } - - - // Trace and shade cRBC - // ///////////////////////////////////////////////////////////// - CCOLORRGB cRBC = BlendColor( cRB , cC ); - - if( hitPacket[ iRB ].m_hitresult || hittedC ) - { - // Trace the center ray - RAY rayRBC; - rayRBC.Init( ( oriRB + oriC ) * 0.5f, - glm::normalize( ( dirRB + dirC ) * 0.5f ) ); - - HITINFO hitInfoRBC; - hitInfoRBC.m_tHit = std::numeric_limits::infinity(); - - bool hitted = false; - - if( hittedC ) - hitted = centerHitInfo.pHitObject->Intersect( rayRBC, hitInfoRBC ); - else - if( hitPacket[ iRB ].m_hitresult ) - hitted = hitPacket[ iRB ].m_HitInfo.pHitObject->Intersect( rayRBC, - hitInfoRBC ); - - if( hitted ) - cRBC = CCOLORRGB( shadeHit( bgColorY, rayRBC, hitInfoRBC, false, 0, false ) ); - } - - - // Set pixel colors - // ///////////////////////////////////////////////////////////// - - GLubyte *ptr = &ptrPBO[ (4 * x + m_blockPositionsFast[iBlock].x + - m_realBufferSize.x * - (m_blockPositionsFast[iBlock].y + 4 * y)) * 4 ]; - SetPixel( ptr + 0, cLT ); - SetPixel( ptr + 4, BlendColor( cLT, cLRT, cLTC ) ); - SetPixel( ptr + 8, cLRT ); - SetPixel( ptr + 12, BlendColor( cLRT, cRT, cRTC ) ); - - ptr += m_realBufferSize.x * 4; - SetPixel( ptr + 0, BlendColor( cLT , cLTB, cLTC ) ); - SetPixel( ptr + 4, BlendColor( cLTC, BlendColor( cLT , cC ) ) ); - SetPixel( ptr + 8, BlendColor( cC, BlendColor( cLRT, cLTC, cRTC ) ) ); - SetPixel( ptr + 12, BlendColor( cRTC, BlendColor( cRT , cC ) ) ); - - ptr += m_realBufferSize.x * 4; - SetPixel( ptr + 0, cLTB ); - SetPixel( ptr + 4, BlendColor( cC, BlendColor( cLTB, cLTC, cLBC ) ) ); - SetPixel( ptr + 8, cC ); - SetPixel( ptr + 12, BlendColor( cC, BlendColor( cRTB, cRTC, cRBC ) ) ); - - ptr += m_realBufferSize.x * 4; - SetPixel( ptr + 0, BlendColor( cLB , cLTB, cLBC ) ); - SetPixel( ptr + 4, BlendColor( cLBC, BlendColor( cLB , cC ) ) ); - SetPixel( ptr + 8, BlendColor( cC, BlendColor( cLRB, cLBC, cRBC ) ) ); - SetPixel( ptr + 12, BlendColor( cRBC, BlendColor( cRB , cC ) ) ); } - } + + threadsFinished++; + } ); + + t.detach(); } - // Wait for all threads to finish (not sure if this is need) - #pragma omp barrier + while( threadsFinished < parallelThreadCount ) + std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); } diff --git a/3d-viewer/3d_rendering/3d_render_raytracing/c3d_render_raytracing.h b/3d-viewer/3d_rendering/3d_render_raytracing/c3d_render_raytracing.h index 1f46689501..4f5b417755 100644 --- a/3d-viewer/3d_rendering/3d_render_raytracing/c3d_render_raytracing.h +++ b/3d-viewer/3d_rendering/3d_render_raytracing/c3d_render_raytracing.h @@ -131,7 +131,7 @@ private: unsigned long int m_stats_start_rendering_time; /// Save the number of blocks progress of the render - long m_nrBlocksRenderProgress; + size_t m_nrBlocksRenderProgress; CPOSTSHADER_SSAO m_postshader_ssao; @@ -165,7 +165,7 @@ private: std::vector< SFVEC2UI > m_blockPositions; /// this flags if a position was already processed (cleared each new render) - std::vector< bool > m_blockPositionsWasProcessed; + std::vector< int > m_blockPositionsWasProcessed; /// this encodes the Morton code positions (on fast preview mode) std::vector< SFVEC2UI > m_blockPositionsFast; diff --git a/3d-viewer/3d_rendering/cimage.cpp b/3d-viewer/3d_rendering/cimage.cpp index 78622970b5..d562346d96 100644 --- a/3d-viewer/3d_rendering/cimage.cpp +++ b/3d-viewer/3d_rendering/cimage.cpp @@ -31,6 +31,10 @@ #include "buffers_debug.h" #include // For memcpy +#include +#include +#include + #ifndef CLAMP #define CLAMP(n, min, max) {if( n < min ) n=min; else if( n > max ) n = max;} #endif @@ -469,34 +473,51 @@ void CIMAGE::EfxFilter( CIMAGE *aInImg, E_FILTER aFilterType ) aInImg->m_wraping = WRAP_CLAMP; m_wraping = WRAP_CLAMP; - #pragma omp parallel for - for( int iy = 0; iy < (int)m_height; iy++) + std::atomic nextRow( 0 ); + std::atomic threadsFinished( 0 ); + + size_t parallelThreadCount = std::max( std::thread::hardware_concurrency(), 2 ); + + for( size_t ii = 0; ii < parallelThreadCount; ++ii ) { - for( int ix = 0; ix < (int)m_width; ix++ ) + std::thread t = std::thread( [&]() { - int v = 0; - - for( int sy = 0; sy < 5; sy++ ) + for( size_t iy = nextRow.fetch_add( 1 ); + iy < m_height; + iy = nextRow.fetch_add( 1 ) ) { - for( int sx = 0; sx < 5; sx++ ) + for( size_t ix = 0; ix < m_width; ix++ ) { - int factor = filter.kernel[sx][sy]; - unsigned char pixelv = aInImg->Getpixel( ix + sx - 2, - iy + sy - 2 ); + int v = 0; - v += pixelv * factor; + for( size_t sy = 0; sy < 5; sy++ ) + { + for( size_t sx = 0; sx < 5; sx++ ) + { + int factor = filter.kernel[sx][sy]; + unsigned char pixelv = aInImg->Getpixel( ix + sx - 2, + iy + sy - 2 ); + + v += pixelv * factor; + } + } + + v /= filter.div; + v += filter.offset; + CLAMP(v, 0, 255); + //TODO: This needs to write to a separate buffer + m_pixels[ix + iy * m_width] = v; } } - v /= filter.div; + threadsFinished++; + } ); - v += filter.offset; - - CLAMP(v, 0, 255); - - m_pixels[ix + iy * m_width] = v; - } + t.detach(); } + + while( threadsFinished < parallelThreadCount ) + std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); } diff --git a/3d-viewer/openmp_mutex.h b/3d-viewer/openmp_mutex.h deleted file mode 100644 index 1b78d6f6d9..0000000000 --- a/3d-viewer/openmp_mutex.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * This program source code file is part of KiCad, a free EDA CAD application. - * - * Copyright (C) 2016 Mario Luzeiro - * Copyright (C) 1992-2016 KiCad Developers, see AUTHORS.txt for contributors. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you may find one here: - * http://www.gnu.org/licenses/old-licenses/gpl-2.0.html - * or you may search the http://www.gnu.org website for the version 2 license, - * or you may write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - - -/** - * @file openmp_mutex.h - * @brief a mutex for openmp got from the website: - * http://bisqwit.iki.fi/story/howto/openmp/ - * by Joel Yliluoma - */ - -#ifndef _OPENMP_MUTEX_H -#define _OPENMP_MUTEX_H - -#ifdef _OPENMP - -# include - -struct MutexType -{ - MutexType() { omp_init_lock( &lock ); } - ~MutexType() { omp_destroy_lock( &lock ); } - void Lock() { omp_set_lock( &lock ); } - void Unlock() { omp_unset_lock( &lock ); } - - MutexType( const MutexType& ) { omp_init_lock( &lock ); } - MutexType& operator= ( const MutexType& ) { return *this; } -public: - omp_lock_t lock; -}; - -#else - -/// A dummy mutex that doesn't actually exclude anything, -/// but as there is no parallelism either, no worries. -struct MutexType -{ - void Lock() {} - void Unlock() {} -}; -#endif - -/// An exception-safe scoped lock-keeper. -struct ScopedLock -{ - explicit ScopedLock( MutexType& m ) : mut( m ), locked( true ) { mut.Lock(); } - ~ScopedLock() { Unlock(); } - void Unlock() { if( !locked ) return; locked = false; mut.Unlock(); } - void LockAgain() { if( locked ) return; mut.Lock(); locked = true; } - -private: - MutexType& mut; - bool locked; - -private: // prevent copying the scoped lock. - void operator=(const ScopedLock&); - ScopedLock(const ScopedLock&); -}; - -#endif // _OPENMP_MUTEX_H diff --git a/CMakeLists.txt b/CMakeLists.txt index 903835f240..bc2a8e9f7e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -543,24 +543,6 @@ include( ExternalProject ) #================================================ include( CheckFindPackageResult ) -# -# Find OpenMP support, optional -# - -find_package( OpenMP ) - -if( OPENMP_FOUND ) - set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" ) - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" ) - add_definitions( -DUSE_OPENMP ) - - # MinGW does not include the OpenMP link library and FindOpenMP.cmake does not - # set it either. Not sure this is the most elegant solution but it works. - if( MINGW ) - set( OPENMP_LIBRARIES gomp ) - endif() -endif() - # # Find wxWidgets library, required # diff --git a/common/geometry/shape_poly_set.cpp b/common/geometry/shape_poly_set.cpp index fc2ec98574..38bafe35f1 100644 --- a/common/geometry/shape_poly_set.cpp +++ b/common/geometry/shape_poly_set.cpp @@ -52,9 +52,18 @@ SHAPE_POLY_SET::SHAPE_POLY_SET() : } -SHAPE_POLY_SET::SHAPE_POLY_SET( const SHAPE_POLY_SET& aOther ) : +SHAPE_POLY_SET::SHAPE_POLY_SET( const SHAPE_POLY_SET& aOther, bool aDeepCopy ) : SHAPE( SH_POLY_SET ), m_polys( aOther.m_polys ) { + if( aOther.IsTriangulationUpToDate() ) + { + for( unsigned i = 0; i < aOther.TriangulatedPolyCount(); i++ ) + m_triangulatedPolys.push_back( + std::make_unique( *aOther.TriangulatedPolygon( i ) ) ); + + m_hash = aOther.GetHash(); + m_triangulationValid = true; + } } diff --git a/cvpcb/CMakeLists.txt b/cvpcb/CMakeLists.txt index 5574b972a3..0bd4b002ba 100644 --- a/cvpcb/CMakeLists.txt +++ b/cvpcb/CMakeLists.txt @@ -151,7 +151,6 @@ target_link_libraries( cvpcb_kiface gal ${wxWidgets_LIBRARIES} ${GDI_PLUS_LIBRARIES} - ${OPENMP_LIBRARIES} # used by 3d viewer ) if( BUILD_GITHUB_PLUGIN ) diff --git a/include/geometry/shape_poly_set.h b/include/geometry/shape_poly_set.h index bf453d2795..f0dabef526 100644 --- a/include/geometry/shape_poly_set.h +++ b/include/geometry/shape_poly_set.h @@ -424,8 +424,9 @@ class SHAPE_POLY_SET : public SHAPE * Copy constructor SHAPE_POLY_SET * Performs a deep copy of \p aOther into \p this. * @param aOther is the SHAPE_POLY_SET object that will be copied. + * @param aDeepCopy if true, make new copies of the triangulated unique_ptr vector */ - SHAPE_POLY_SET( const SHAPE_POLY_SET& aOther ); + SHAPE_POLY_SET( const SHAPE_POLY_SET& aOther, bool aDeepCopy = false ); ~SHAPE_POLY_SET(); diff --git a/pcbnew/CMakeLists.txt b/pcbnew/CMakeLists.txt index 18a62825cc..bc170b60d1 100644 --- a/pcbnew/CMakeLists.txt +++ b/pcbnew/CMakeLists.txt @@ -652,12 +652,6 @@ if ( KICAD_BUILD_TESTS ) endif () -if( ${OPENMP_FOUND} ) - set_target_properties( pcbnew_kiface PROPERTIES - COMPILE_FLAGS ${OpenMP_CXX_FLAGS} - ) -endif() - set( PCBNEW_KIFACE_LIBRARIES 3d-viewer pcbcommon @@ -675,7 +669,6 @@ set( PCBNEW_KIFACE_LIBRARIES ${PYTHON_LIBRARIES} ${Boost_LIBRARIES} # must follow GITHUB ${PCBNEW_EXTRA_LIBS} # -lrt must follow Boost - ${OPENMP_LIBRARIES} ) diff --git a/qa/polygon_triangulation/test_polygon_triangulation.cpp b/qa/polygon_triangulation/test_polygon_triangulation.cpp index 2eae35c24d..0410e12255 100644 --- a/qa/polygon_triangulation/test_polygon_triangulation.cpp +++ b/qa/polygon_triangulation/test_polygon_triangulation.cpp @@ -32,6 +32,7 @@ #include #include +#include #include #include #include @@ -229,31 +230,50 @@ int main( int argc, char *argv[] ) PROF_COUNTER cnt( "allBoard" ); - #pragma omp parallel for schedule(dynamic) - for( int z = 0; zGetAreaCount(); z++ ) + std::atomic zonesToTriangulate( 0 ); + std::atomic threadsFinished( 0 ); + + size_t parallelThreadCount = std::max( std::thread::hardware_concurrency(), 2 ); + for( size_t ii = 0; ii < parallelThreadCount; ++ii ) { - auto zone = brd->GetArea( z ); - SHAPE_POLY_SET poly = zone->GetFilledPolysList(); - - poly.CacheTriangulation(); - - (void) poly; - printf("zone %d/%d\n", ( z+1 ), brd->GetAreaCount() ); -#if 0 - PROF_COUNTER unfrac("unfrac"); - poly.Unfracture( SHAPE_POLY_SET::PM_FAST ); - unfrac.Show(); - - PROF_COUNTER triangulate("triangulate"); - - for(int i =0; i< poly.OutlineCount(); i++) + std::thread t = std::thread( [brd, &zonesToTriangulate, &threadsFinished] () { - poly.triangulatePoly( &poly.Polygon(i) ); - } - triangulate.Show(); -#endif + for( size_t areaId = zonesToTriangulate.fetch_add( 1 ); + areaId < static_cast( brd->GetAreaCount() ); + areaId = zonesToTriangulate.fetch_add( 1 ) ) + { + auto zone = brd->GetArea( areaId ); + SHAPE_POLY_SET poly = zone->GetFilledPolysList(); + + poly.CacheTriangulation(); + + (void) poly; + printf("zone %zu/%d\n", ( areaId + 1 ), brd->GetAreaCount() ); + #if 0 + PROF_COUNTER unfrac("unfrac"); + poly.Unfracture( SHAPE_POLY_SET::PM_FAST ); + unfrac.Show(); + + PROF_COUNTER triangulate("triangulate"); + + for(int i =0; i< poly.OutlineCount(); i++) + { + poly.triangulatePoly( &poly.Polygon(i) ); + } + triangulate.Show(); + #endif + } + + threadsFinished++; + } ); + + t.detach(); } + while( threadsFinished < parallelThreadCount ) + std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); + + cnt.Show(); delete brd;