diff --git a/include/hdrplus/utility.h b/include/hdrplus/utility.h index ad972a9..6c7254f 100644 --- a/include/hdrplus/utility.h +++ b/include/hdrplus/utility.h @@ -96,7 +96,7 @@ cv::Mat downsample_nearest_neighbour( const cv::Mat& src_image ) int dst_step = dst_image.step1(); // -03 should be enough to optimize below code - #pragma omp parallel for + //#pragma omp parallel for for ( int row_i = 0; row_i < dst_height; row_i++ ) { UNROLL_LOOP( 32 ) @@ -142,10 +142,10 @@ void print_cvmat( cv::Mat image ) * @return vector of RGB image. OpenCV internally maintain reference count. * Thus this step won't create deep copy overhead. * - * @example extract_rgb_fmom_bayer( bayer_img, rgb_vector_container ); + * @example extract_rgb_from_bayer( bayer_img, rgb_vector_container ); */ template -void extract_rgb_fmom_bayer( const cv::Mat& bayer_img, \ +void extract_rgb_from_bayer( const cv::Mat& bayer_img, \ cv::Mat& img_ch1, cv::Mat& img_ch2, cv::Mat& img_ch3, cv::Mat& img_ch4 ) { const T* bayer_img_ptr = (const T*)bayer_img.data; diff --git a/src/align.cpp b/src/align.cpp index 9529f91..6c531a6 100644 --- a/src/align.cpp +++ b/src/align.cpp @@ -98,7 +98,7 @@ static void build_upsampled_prev_aligement( \ dst_alignment.resize( num_tiles_h, std::vector>( num_tiles_w, std::pair(0, 0) ) ); // Upsample alignment - #pragma omp parallel for collapse(2) + #pragma omp parallel for for ( int row_i = 0; row_i < src_height; row_i++ ) { for ( int col_i = 0; col_i < src_width; col_i++ ) @@ -255,6 +255,45 @@ static return_type l2_distance( const cv::Mat& img1, const cv::Mat& img2, \ } +template +static cv::Mat extract_img_tile( const cv::Mat& img, int img_tile_row_start_idx, int img_tile_col_start_idx ) +{ + const T* img_ptr = (const T*)img.data; + int img_width = img.size().width; + int img_height = img.size().height; + int img_step = img.step1(); + + if ( img_tile_row_start_idx < 0 || img_tile_row_start_idx > img_height - tile_size ) + { + throw std::runtime_error("l1 distance img1_tile_row_start_idx out of valid range\n"); + } + + if ( img_tile_col_start_idx < 0 || img_tile_col_start_idx > img_width - tile_size ) + { + throw std::runtime_error("l1 distance img1_tile_col_start_idx out of valid range\n"); + } + + cv::Mat img_tile( tile_size, tile_size, img.type() ); + T* img_tile_ptr = (T*)img_tile.data; + int img_tile_step = img_tile.step1(); + + UNROLL_LOOP( tile_size ) + for ( int row_i = 0; row_i < tile_size; ++row_i ) + { + const T* img_ptr_row_i = img_ptr + img_step * ( img_tile_row_start_idx + row_i ); + T* img_tile_ptr_row_i = img_tile_ptr + img_tile_step * row_i; + + UNROLL_LOOP( tile_size ) + for ( int col_i = 0; col_i < tile_size; ++col_i ) + { + img_tile_ptr_row_i[ col_i ] = img_ptr_row_i[ img_tile_col_start_idx + col_i ]; + } + } + + return img_tile; +} + + void align_image_level( \ const cv::Mat& ref_img, \ const cv::Mat& alt_img, \ @@ -326,6 +365,42 @@ void align_image_level( \ } } + // Function to extract reference image tile for memory cache + cv::Mat (*extract_ref_img_tile)(const cv::Mat&, int, int) = nullptr; + if ( curr_tile_size == 8 ) + { + extract_ref_img_tile = &extract_img_tile; + } + else if ( curr_tile_size == 16 ) + { + extract_ref_img_tile = &extract_img_tile; + } + + // Function to extract search image tile for memory cache + cv::Mat (*extract_alt_img_search)(const cv::Mat&, int, int) = nullptr; + if ( curr_tile_size == 8 ) + { + if ( search_radious == 1 ) + { + extract_alt_img_search = &extract_img_tile; + } + else if ( search_radious == 4 ) + { + extract_alt_img_search = &extract_img_tile; + } + } + else if ( curr_tile_size == 16 ) + { + if ( search_radious == 1 ) + { + extract_alt_img_search = &extract_img_tile; + } + else if ( search_radious == 4 ) + { + extract_alt_img_search = &extract_img_tile; + } + } + int num_tiles_h = ref_img.size().height / (curr_tile_size / 2) - 1; int num_tiles_w = ref_img.size().width / (curr_tile_size / 2 ) - 1; @@ -387,17 +462,15 @@ void align_image_level( \ int alt_tile_row_idx_max = alt_img_pad.size().height - ( curr_tile_size + 2 * search_radiou ); int alt_tile_col_idx_max = alt_img_pad.size().width - ( curr_tile_size + 2 * search_radiou ); - // TODO delete below distance vector, this is for debug only - std::vector> distances( num_tiles_h, std::vector( num_tiles_w, 0 )); + // Dlete below distance vector, this is for debug only + // std::vector> distances( num_tiles_h, std::vector( num_tiles_w, 0 )); /* Iterate through all reference tile & compute distance */ - #pragma omp parallel for collapse(2) + #pragma omp parallel for for ( int ref_tile_row_i = 0; ref_tile_row_i < num_tiles_h; ref_tile_row_i++ ) { for ( int ref_tile_col_i = 0; ref_tile_col_i < num_tiles_w; ref_tile_col_i++ ) { - printf("num omp thread %d\n", omp_get_num_threads() ); - // Upper left index of reference tile int ref_tile_row_start_idx_i = ref_tile_row_i * curr_tile_size / 2; int ref_tile_col_start_idx_i = ref_tile_col_i * curr_tile_size / 2; @@ -434,6 +507,10 @@ void align_image_level( \ // printf("@@ change start y from %d to %d\n", before, alt_tile_col_idx_max ); } + // Explicitly caching reference image tile + cv::Mat ref_img_tile_i = extract_ref_img_tile( ref_img, ref_tile_col_start_idx_i, ref_tile_col_start_idx_i ); + cv::Mat alt_img_search_i = extract_alt_img_search( alt_img, alt_tile_row_start_idx_i, alt_tile_col_start_idx_i ); + // Because alternative image is padded with search radious. // Using same coordinate with reference image will automatically considered search radious * 2 // printf("Alt image tile [%d, %d]-> start idx [%d, %d]\n", \ @@ -453,9 +530,8 @@ void align_image_level( \ // ref_tile_row_i, ref_tile_col_i, search_row_j - search_radiou, search_col_j - search_radiou ); // TODO: currently distance is incorrect - unsigned long long distance_j = distance_func_ptr( ref_img, alt_img_pad, \ - ref_tile_row_start_idx_i, ref_tile_col_start_idx_i, \ - alt_tile_row_start_idx_i + search_row_j, alt_tile_col_start_idx_i + search_col_j ); + unsigned long long distance_j = distance_func_ptr( ref_img_tile_i, alt_img_search_i, \ + 0, 0, search_row_j, search_col_j ); // printf("<---tile at [%d, %d] search (%d, %d), new dis %llu, old dis %llu\n", \ // ref_tile_row_i, ref_tile_col_i, search_row_j - search_radiou, search_col_j - search_radiou, distance_j, min_distance_i ); @@ -501,7 +577,7 @@ void align_image_level( \ // Add min_distance_i's corresbonding idx as min curr_alignment.at( ref_tile_row_i ).at( ref_tile_col_i ) = alignment_i; - distances.at( ref_tile_row_i ).at( ref_tile_col_i ) = min_distance_i; + // distances.at( ref_tile_row_i ).at( ref_tile_col_i ) = min_distance_i; } } @@ -552,6 +628,8 @@ void align::process( const hdrplus::burst& burst_images, \ // exit(1); per_grayimg_pyramid.resize( burst_images.num_images ); + + #pragma omp parallel for for ( int img_idx = 0; img_idx < burst_images.num_images; ++img_idx ) { // per_grayimg_pyramid[ img_idx ][ 0 ] is the original image diff --git a/src/burst.cpp b/src/burst.cpp index b3fb001..2f058ff 100644 --- a/src/burst.cpp +++ b/src/burst.cpp @@ -1,5 +1,6 @@ #include #include +#include #include // all opencv header #include "hdrplus/burst.h" #include "hdrplus/utility.h" @@ -74,18 +75,21 @@ burst::burst( const std::string& burst_path, const std::string& reference_image_ padding_info_bayer = std::vector{ padding_top, padding_bottom, padding_left, padding_right }; // Pad bayer image - for ( const auto& bayer_image_i : bayer_images ) + bayer_images_pad.resize( bayer_images.size() ); + grayscale_images_pad.resize( bayer_images.size() ); + + #pragma omp parallel for + for ( size_t img_i = 0; img_i < bayer_images.size(); ++img_i ) { cv::Mat bayer_image_pad_i; - cv::copyMakeBorder( bayer_image_i.raw_image, \ + cv::copyMakeBorder( bayer_images.at( img_i ).raw_image, \ bayer_image_pad_i, \ padding_top, padding_bottom, padding_left, padding_right, \ cv::BORDER_REFLECT ); - - // cv::Mat use internal reference count - bayer_images_pad.emplace_back( bayer_image_pad_i ); - grayscale_images_pad.emplace_back( box_filter_kxk( bayer_image_pad_i ) ); - } + + bayer_images_pad.at( img_i ) = bayer_image_pad_i; + grayscale_images_pad.at( img_i ) = box_filter_kxk( bayer_image_pad_i ); + } #ifndef NDEBUG printf("%s::%s Pad bayer image from (%d, %d) -> (%d, %d)\n", \ diff --git a/src/merge.cpp b/src/merge.cpp index 0fa1e25..b0ed800 100644 --- a/src/merge.cpp +++ b/src/merge.cpp @@ -24,7 +24,7 @@ namespace hdrplus // Get raw channels std::vector channels(4); - hdrplus::extract_rgb_fmom_bayer(reference_image, channels[0], channels[1], channels[2], channels[3]); + hdrplus::extract_rgb_from_bayer(reference_image, channels[0], channels[1], channels[2], channels[3]); std::vector processed_channels(4); // For each channel, perform denoising and merge @@ -44,7 +44,7 @@ namespace hdrplus //get alternate image cv::Mat alt_image = burst_images.bayer_images_pad[j]; std::vector alt_channels(4); - hdrplus::extract_rgb_fmom_bayer(alt_image, alt_channels[0], alt_channels[1], alt_channels[2], alt_channels[3]); + hdrplus::extract_rgb_from_bayer(alt_image, alt_channels[0], alt_channels[1], alt_channels[2], alt_channels[3]); alternate_channel_i_list.push_back(alt_channels[i]); } diff --git a/tests/test_align.cpp b/tests/test_align.cpp index 2c79fa5..6626f02 100644 --- a/tests/test_align.cpp +++ b/tests/test_align.cpp @@ -36,7 +36,7 @@ void test_align_one_level(int argc, char** argv) // Create RGB channel std::vector rggb_imgs( 4 ); - hdrplus::extract_rgb_fmom_bayer( bayer_image_pad, rggb_imgs.at(0), rggb_imgs.at(1), rggb_imgs.at(2), rggb_imgs.at(3) ); + hdrplus::extract_rgb_from_bayer( bayer_image_pad, rggb_imgs.at(0), rggb_imgs.at(1), rggb_imgs.at(2), rggb_imgs.at(3) ); // Get tile of each channel with the alignments int tilesize = 16; // tile size of grayscale image diff --git a/tests/test_utility.cpp b/tests/test_utility.cpp index 163f320..46b13d2 100644 --- a/tests/test_utility.cpp +++ b/tests/test_utility.cpp @@ -91,7 +91,7 @@ void test_extract_rgb_from_bayer() printf("\nbayer cv::Mat is \n"); hdrplus::print_cvmat( bayer_img ); - hdrplus::extract_rgb_fmom_bayer( bayer_img, red_img, green_img1, green_img2, blue_img ); + hdrplus::extract_rgb_from_bayer( bayer_img, red_img, green_img1, green_img2, blue_img ); printf("\nRed cv::Mat is \n"); hdrplus::print_cvmat( red_img );