diff --git a/CMakeLists.txt b/CMakeLists.txt index c3027f8..4cffdb6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,8 +6,8 @@ project(hdrplus) # set c++ standard set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED True) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -Wall") # make sure we use Release and warn otherwise if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) diff --git a/INSTALL.md b/INSTALL.md index ab3f45f..16acc57 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -17,6 +17,7 @@ step 3: ```shell cd LibRaw-X.YY + autoreconf -f -i ./configure # with optional args make ``` diff --git a/include/hdrplus/utility.h b/include/hdrplus/utility.h index c192ab9..45a2378 100644 --- a/include/hdrplus/utility.h +++ b/include/hdrplus/utility.h @@ -3,21 +3,32 @@ #include #include // std::runtime_error #include // all opencv header -// TODO: add openmp support - -#if defined(__clang__) - #define LOOP_UNROLL unroll -#elif defined(__GNUC__) || defined(__GNUG__) - #define LOOP_UNROLL GCC unroll -#elif defined(_MSC_VER) - #define LOOP_UNROLL unroll + +// https://stackoverflow.com/questions/63404539/portable-loop-unrolling-with-template-parameter-in-c-with-gcc-icc +/// Helper macros for stringification +#define TO_STRING_HELPER(X) #X +#define TO_STRING(X) TO_STRING_HELPER(X) + +// Define loop unrolling depending on the compiler +#if defined(__ICC) || defined(__ICL) + #define UNROLL_LOOP(n) _Pragma(TO_STRING(unroll (n))) +#elif defined(__clang__) + #define UNROLL_LOOP(n) _Pragma(TO_STRING(unroll (n))) +#elif defined(__GNUC__) && !defined(__clang__) + #define UNROLL_LOOP(n) _Pragma(TO_STRING(GCC unroll (16))) +#elif defined(_MSC_BUILD) + #pragma message ("Microsoft Visual C++ (MSVC) detected: Loop unrolling not supported!") + #define UNROLL_LOOP(n) +#else + #warning "Unknown compiler: Loop unrolling not supported!" + #define UNROLL_LOOP(n) #endif + namespace hdrplus { - template cv::Mat box_filter_kxk( const cv::Mat& src_image ) { @@ -45,10 +56,11 @@ cv::Mat box_filter_kxk( const cv::Mat& src_image ) { // Take ceiling for rounding T box_sum = T( 0 ); - //#pragma LOOP_UNROLL + + UNROLL_LOOP( kernel ) for ( int kernel_row_i = 0; kernel_row_i < kernel; ++kernel_row_i ) { - //#pragma LOOP_UNROLL + UNROLL_LOOP( kernel ) for ( int kernel_col_i = 0; kernel_col_i < kernel; ++kernel_col_i ) { box_sum += src_image_ptr[ ( row_i * kernel + kernel_row_i ) * src_step + ( col_i * kernel + kernel_col_i ) ]; @@ -84,6 +96,7 @@ cv::Mat downsample_nearest_neighbour( const cv::Mat& src_image ) // -03 should be enough to optimize below code for ( int row_i = 0; row_i < dst_height; row_i++ ) { + UNROLL_LOOP( 32 ) for ( int col_i = 0; col_i < dst_width; col_i++ ) { dst_image_ptr[ row_i * dst_step + col_i ] = \ @@ -184,8 +197,6 @@ template void print_tile( const cv::Mat& img, int tile_size, int start_idx_row, int start_idx_col ) { const T* img_ptr = (T*)img.data; - int src_height = img.size().height; - int src_width = img.size().width; int src_step = img.step1(); for ( int row = start_idx_row; row < tile_size + start_idx_row; ++row ) diff --git a/src/align.cpp b/src/align.cpp index 0a100b0..9045b41 100644 --- a/src/align.cpp +++ b/src/align.cpp @@ -30,7 +30,7 @@ static void build_per_grayimg_pyramid( \ images_pyramid.resize( inv_scale_factors.size() ); - for ( int i = 0; i < inv_scale_factors.size(); ++i ) + for ( size_t i = 0; i < inv_scale_factors.size(); ++i ) { cv::Mat blur_image; cv::Mat downsample_image; @@ -81,7 +81,7 @@ static void build_upsampled_prev_aligement( \ constexpr int repeat_factor = pyramid_scale_factor_prev_curr / tilesize_scale_factor_prev_curr; // printf("build_upsampled_prev_aligement with scale factor %d, repeat factor %d, tile size factor %d\n", \ - pyramid_scale_factor_prev_curr, repeat_factor, tilesize_scale_factor_prev_curr ); + // pyramid_scale_factor_prev_curr, repeat_factor, tilesize_scale_factor_prev_curr ); int dst_height = src_height * repeat_factor; int dst_width = src_width * repeat_factor; @@ -107,8 +107,10 @@ static void build_upsampled_prev_aligement( \ align_i.second *= pyramid_scale_factor_prev_curr; // repeat + UNROLL_LOOP( repeat_factor ) for ( int repeat_row_i = 0; repeat_row_i < repeat_factor; ++repeat_row_i ) { + UNROLL_LOOP( repeat_factor ) for ( int repeat_col_i = 0; repeat_col_i < repeat_factor; ++repeat_col_i ) { dst_alignment[ row_i * repeat_factor + repeat_row_i ][ col_i * repeat_factor + repeat_col_i ] = align_i; @@ -167,6 +169,7 @@ static unsigned long long l1_distance( const cv::Mat& img1, const cv::Mat& img2, const data_type* img1_ptr_row_i = img1_ptr + (img1_tile_row_start_idx + row_i) * img1_step + img1_tile_col_start_idx; const data_type* img2_ptr_row_i = img2_ptr + (img2_tile_row_start_idx + row_i) * img2_step + img2_tile_col_start_idx; + UNROLL_LOOP( tile_size ) for ( int col_i = 0; col_i < tile_size; ++col_i ) { data_type l1 = CUSTOME_ABS( img1_ptr_row_i[ col_i ] - img2_ptr_row_i[ col_i ] ); @@ -232,6 +235,7 @@ static return_type l2_distance( const cv::Mat& img1, const cv::Mat& img2, \ const data_type* img1_ptr_row_i = img1_ptr + (img1_tile_row_start_idx + row_i) * img1_step + img1_tile_col_start_idx; const data_type* img2_ptr_row_i = img2_ptr + (img2_tile_row_start_idx + row_i) * img2_step + img2_tile_col_start_idx; + UNROLL_LOOP( tile_size ) for ( int col_i = 0; col_i < tile_size; ++col_i ) { data_type l1 = CUSTOME_ABS( img1_ptr_row_i[ col_i ] - img2_ptr_row_i[ col_i ] ); @@ -372,7 +376,7 @@ void align_image_level( \ // printf("Alter image pad h=%d, w=%d: \n", alt_img_pad.size().height, alt_img_pad.size().width ); // print_img( alt_img_pad ); - //printf("!! enlarged tile size %d\n", curr_tile_size + 2 * search_radiou ); + // printf("!! enlarged tile size %d\n", curr_tile_size + 2 * search_radiou ); int alt_tile_row_idx_max = alt_img_pad.size().height - ( curr_tile_size + 2 * search_radiou ); int alt_tile_col_idx_max = alt_img_pad.size().width - ( curr_tile_size + 2 * search_radiou ); @@ -389,8 +393,8 @@ void align_image_level( \ int ref_tile_row_start_idx_i = ref_tile_row_i * curr_tile_size / 2; int ref_tile_col_start_idx_i = ref_tile_col_i * curr_tile_size / 2; - //printf("\nRef img tile [%d, %d] -> start idx [%d, %d] (row, col)\n", \ - ref_tile_row_i, ref_tile_col_i, ref_tile_row_start_idx_i, ref_tile_col_start_idx_i ); + // printf("\nRef img tile [%d, %d] -> start idx [%d, %d] (row, col)\n", \ + // ref_tile_row_i, ref_tile_col_i, ref_tile_row_start_idx_i, ref_tile_col_start_idx_i ); // printf("\nRef img tile [%d, %d]\n", ref_tile_row_i, ref_tile_col_i ); // print_tile( ref_img, curr_tile_size, ref_tile_row_start_idx_i, ref_tile_col_start_idx_i ); @@ -410,21 +414,21 @@ void align_image_level( \ alt_tile_col_start_idx_i = 0; if ( alt_tile_row_start_idx_i > alt_tile_row_idx_max ) { - int before = alt_tile_row_start_idx_i; + // int before = alt_tile_row_start_idx_i; alt_tile_row_start_idx_i = alt_tile_row_idx_max; // printf("@@ change start x from %d to %d\n", before, alt_tile_row_idx_max); } if ( alt_tile_col_start_idx_i > alt_tile_col_idx_max ) { - int before = alt_tile_col_start_idx_i; + // int before = alt_tile_col_start_idx_i; alt_tile_col_start_idx_i = alt_tile_col_idx_max; // printf("@@ change start y from %d to %d\n", before, alt_tile_col_idx_max ); } // Because alternative image is padded with search radious. // Using same coordinate with reference image will automatically considered search radious * 2 - //printf("Alt image tile [%d, %d]-> start idx [%d, %d]\n", \ - ref_tile_row_i, ref_tile_col_i, alt_tile_row_start_idx_i, alt_tile_col_start_idx_i ); + // printf("Alt image tile [%d, %d]-> start idx [%d, %d]\n", \ + // ref_tile_row_i, ref_tile_col_i, alt_tile_row_start_idx_i, alt_tile_col_start_idx_i ); // printf("\nAlt image tile [%d, %d]\n", ref_tile_row_i, ref_tile_col_i ); // print_tile( alt_img_pad, curr_tile_size + 2 * search_radiou, alt_tile_row_start_idx_i, alt_tile_col_start_idx_i ); @@ -436,16 +440,16 @@ void align_image_level( \ { for ( int search_col_j = 0; search_col_j < ( search_radiou * 2 + 1 ); search_col_j++ ) { - //printf("\n--->tile at [%d, %d] search (%d, %d)\n", \ - ref_tile_row_i, ref_tile_col_i, search_row_j - search_radiou, search_col_j - search_radiou ); + // printf("\n--->tile at [%d, %d] search (%d, %d)\n", \ + // ref_tile_row_i, ref_tile_col_i, search_row_j - search_radiou, search_col_j - search_radiou ); // TODO: currently distance is incorrect unsigned long long distance_j = distance_func_ptr( ref_img, alt_img_pad, \ ref_tile_row_start_idx_i, ref_tile_col_start_idx_i, \ alt_tile_row_start_idx_i + search_row_j, alt_tile_col_start_idx_i + search_col_j ); - //printf("<---tile at [%d, %d] search (%d, %d), new dis %llu, old dis %llu\n", \ - ref_tile_row_i, ref_tile_col_i, search_row_j - search_radiou, search_col_j - search_radiou, distance_j, min_distance_i ); + // printf("<---tile at [%d, %d] search (%d, %d), new dis %llu, old dis %llu\n", \ + // ref_tile_row_i, ref_tile_col_i, search_row_j - search_radiou, search_col_j - search_radiou, distance_j, min_distance_i ); // If this is smaller distance if ( distance_j < min_distance_i ) @@ -456,30 +460,30 @@ void align_image_level( \ } // If same value, choose the one closer to the original tile location - // if ( distance_j == min_distance_i && min_distance_row_i != -1 && min_distance_col_i != -1 ) - // { - // int prev_distance_row_2_ref = min_distance_row_i - search_radiou; - // int prev_distance_col_2_ref = min_distance_col_i - search_radiou; - // int curr_distance_row_2_ref = search_row_j - search_radiou; - // int curr_distance_col_2_ref = search_col_j - search_radiou; - - // int prev_distance_2_ref_sqr = prev_distance_row_2_ref * prev_distance_row_2_ref + prev_distance_col_2_ref * prev_distance_col_2_ref; - // int curr_distance_2_ref_sqr = curr_distance_row_2_ref * curr_distance_row_2_ref + curr_distance_col_2_ref * curr_distance_col_2_ref; - - // // previous min distance idx is farther away from ref tile start location - // if ( prev_distance_2_ref_sqr > curr_distance_2_ref_sqr ) - // { - // // printf("@@@ Same distance %d, choose closer one (%d, %d) instead of (%d, %d)\n", \ - // distance_j, search_row_j, search_col_j, min_distance_row_i, min_distance_col_i); - // min_distance_col_i = search_col_j; - // min_distance_row_i = search_row_j; - // } - // } + if ( distance_j == min_distance_i && min_distance_row_i != -1 && min_distance_col_i != -1 ) + { + int prev_distance_row_2_ref = min_distance_row_i - search_radiou; + int prev_distance_col_2_ref = min_distance_col_i - search_radiou; + int curr_distance_row_2_ref = search_row_j - search_radiou; + int curr_distance_col_2_ref = search_col_j - search_radiou; + + int prev_distance_2_ref_sqr = prev_distance_row_2_ref * prev_distance_row_2_ref + prev_distance_col_2_ref * prev_distance_col_2_ref; + int curr_distance_2_ref_sqr = curr_distance_row_2_ref * curr_distance_row_2_ref + curr_distance_col_2_ref * curr_distance_col_2_ref; + + // previous min distance idx is farther away from ref tile start location + if ( prev_distance_2_ref_sqr > curr_distance_2_ref_sqr ) + { + // printf("@@@ Same distance %d, choose closer one (%d, %d) instead of (%d, %d)\n", \ + // distance_j, search_row_j, search_col_j, min_distance_row_i, min_distance_col_i); + min_distance_col_i = search_col_j; + min_distance_row_i = search_row_j; + } + } } } - //printf("tile at (%d, %d) alignment (%d, %d)\n", \ - ref_tile_row_i, ref_tile_col_i, min_distance_row_i, min_distance_col_i ); + // printf("tile at (%d, %d) alignment (%d, %d)\n", \ + // ref_tile_row_i, ref_tile_col_i, min_distance_row_i, min_distance_col_i ); int alignment_row_i = prev_alignment_row_i + min_distance_row_i - search_radiou; int alignment_col_i = prev_alignment_col_i + min_distance_col_i - search_radiou; @@ -516,38 +520,6 @@ void align_image_level( \ } -static void build_per_pyramid_reftiles_start( \ - std::vector>>>& per_pyramid_reftiles_start, \ - const std::vector>& per_grayimg_pyramid, \ - const std::vector& grayimg_tile_sizes ) -{ - per_pyramid_reftiles_start.resize( per_grayimg_pyramid.at(0).size() ); - - // Every image pyramid level - for ( int level_i = 0; level_i < per_grayimg_pyramid.at(0).size(); level_i++ ) - { - int level_i_img_h = per_grayimg_pyramid.at(0).at( level_i ).size().height; - int level_i_img_w = per_grayimg_pyramid.at(0).at( level_i ).size().width; - - int level_i_tile_size = grayimg_tile_sizes.at( level_i ); - - int num_tiles_h = level_i_img_h / (level_i_tile_size / 2) - 1; - int num_tiles_w = level_i_img_w / (level_i_tile_size / 2) - 1; - - // Allocate memory - per_pyramid_reftiles_start.at( level_i ).resize( num_tiles_h, std::vector>( num_tiles_w ) ); - - for ( int tile_col_i = 0; tile_col_i < num_tiles_h; tile_col_i++ ) - { - for ( int tile_row_j = 0; tile_row_j < num_tiles_w; tile_row_j++ ) - { - per_pyramid_reftiles_start.at( level_i ).at( tile_col_i ).at( tile_row_j ) \ - = std::make_pair( tile_col_i * level_i_tile_size, tile_row_j * level_i_tile_size ); - } - } - } -} - void align::process( const hdrplus::burst& burst_images, \ std::vector>>>& images_alignment ) diff --git a/src/burst.cpp b/src/burst.cpp index c6156fe..b3fb001 100644 --- a/src/burst.cpp +++ b/src/burst.cpp @@ -30,7 +30,7 @@ burst::burst( const std::string& burst_path, const std::string& reference_image_ // Find reference image path in input directory // reference image path need to be absolute path reference_image_idx = -1; - for ( int i = 0; i < bayer_image_paths.size(); ++i ) + for ( size_t i = 0; i < bayer_image_paths.size(); ++i ) { if ( bayer_image_paths[ i ] == reference_image_path ) { diff --git a/tests/test_align.cpp b/tests/test_align.cpp index 43521b7..2c79fa5 100644 --- a/tests/test_align.cpp +++ b/tests/test_align.cpp @@ -43,7 +43,7 @@ void test_align_one_level(int argc, char** argv) int num_tiles_h = rggb_imgs.at(0).size().height / ( tilesize / 2 ) - 1; int num_tiles_w = rggb_imgs.at(0).size().width / ( tilesize / 2 ) - 1; - for ( int img_channel = 0; img_channel < rggb_imgs.size(); ++img_channel ) + for ( int img_channel = 0; img_channel < int(rggb_imgs.size()); ++img_channel ) { for ( int tile_row_i = 0; tile_row_i < num_tiles_h; ++tile_row_i ) {