From 4ad746055441cac4b199e4be722509b694dfa9fb Mon Sep 17 00:00:00 2001 From: Xiao Song Date: Fri, 6 May 2022 11:07:07 -0700 Subject: [PATCH] omp support --- CMakeLists.txt | 6 +++++- include/hdrplus/utility.h | 4 ++++ src/align.cpp | 15 ++++++++++++--- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4cffdb6..aa966c0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,6 +37,9 @@ message(STATUS "Found LIBRAW_LIBRARY to be ${LIBRAW_LIBRARY}" ) find_package(exiv2 REQUIRED CONFIG NAMES exiv2) message(STATUS "Found Exiv2 and linked") +# OpenMP +find_package(OpenMP REQUIRED) + # library include_directories( include ) @@ -60,7 +63,8 @@ add_library(${PROJECT_NAME} SHARED ${src_files} ) target_link_libraries(${PROJECT_NAME} PUBLIC ${OpenCV_LIBS} ${LIBRAW_LIBRARY} - exiv2lib ) + exiv2lib + PRIVATE OpenMP::OpenMP_CXX ) # example set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/bin ) diff --git a/include/hdrplus/utility.h b/include/hdrplus/utility.h index 45a2378..ad972a9 100644 --- a/include/hdrplus/utility.h +++ b/include/hdrplus/utility.h @@ -3,6 +3,7 @@ #include #include // std::runtime_error #include // all opencv header +#include // https://stackoverflow.com/questions/63404539/portable-loop-unrolling-with-template-parameter-in-c-with-gcc-icc /// Helper macros for stringification @@ -50,6 +51,7 @@ cv::Mat box_filter_kxk( const cv::Mat& src_image ) int dst_width = dst_image.size().width; int dst_step = dst_image.step1(); + #pragma omp parallel for for ( int row_i = 0; row_i < dst_height; ++row_i ) { for ( int col_i = 0; col_i < dst_width; col_i++ ) @@ -94,6 +96,7 @@ cv::Mat downsample_nearest_neighbour( const cv::Mat& src_image ) int dst_step = dst_image.step1(); // -03 should be enough to optimize below code + #pragma omp parallel for for ( int row_i = 0; row_i < dst_height; row_i++ ) { UNROLL_LOOP( 32 ) @@ -169,6 +172,7 @@ void extract_rgb_fmom_bayer( const cv::Mat& bayer_img, \ T* img_ch3_ptr = (T*)img_ch3.data; T* img_ch4_ptr = (T*)img_ch4.data; + #pragma omp parallel for for ( int rgb_row_i = 0; rgb_row_i < rgb_height; rgb_row_i++ ) { int rgb_row_i_offset = rgb_row_i * rgb_step; diff --git a/src/align.cpp b/src/align.cpp index 9045b41..9529f91 100644 --- a/src/align.cpp +++ b/src/align.cpp @@ -5,6 +5,7 @@ #include // std::make_pair #include // std::runtime_error #include // all opencv header +#include #include "hdrplus/align.h" #include "hdrplus/burst.h" #include "hdrplus/utility.h" @@ -97,6 +98,7 @@ static void build_upsampled_prev_aligement( \ dst_alignment.resize( num_tiles_h, std::vector>( num_tiles_w, std::pair(0, 0) ) ); // Upsample alignment + #pragma omp parallel for collapse(2) for ( int row_i = 0; row_i < src_height; row_i++ ) { for ( int col_i = 0; col_i < src_width; col_i++ ) @@ -110,10 +112,12 @@ static void build_upsampled_prev_aligement( \ UNROLL_LOOP( repeat_factor ) for ( int repeat_row_i = 0; repeat_row_i < repeat_factor; ++repeat_row_i ) { + int repeat_row_i_offset = row_i * repeat_factor + repeat_row_i; UNROLL_LOOP( repeat_factor ) for ( int repeat_col_i = 0; repeat_col_i < repeat_factor; ++repeat_col_i ) { - dst_alignment[ row_i * repeat_factor + repeat_row_i ][ col_i * repeat_factor + repeat_col_i ] = align_i; + int repeat_col_i_offset = col_i * repeat_factor + repeat_col_i; + dst_alignment[ repeat_row_i_offset ][ repeat_col_i_offset ] = align_i; } } } @@ -163,7 +167,8 @@ static unsigned long long l1_distance( const cv::Mat& img1, const cv::Mat& img2, } return_type sum(0); - // TODO: add pragma unroll here + + UNROLL_LOOP( tile_size ) for ( int row_i = 0; row_i < tile_size; ++row_i ) { const data_type* img1_ptr_row_i = img1_ptr + (img1_tile_row_start_idx + row_i) * img1_step + img1_tile_col_start_idx; @@ -229,7 +234,8 @@ static return_type l2_distance( const cv::Mat& img1, const cv::Mat& img2, \ // print_tile( img2, tile_size, img2_tile_row_start_idx, img2_tile_col_start_idx ); return_type sum(0); - // TODO: add pragma unroll here + + UNROLL_LOOP( tile_size ) for ( int row_i = 0; row_i < tile_size; ++row_i ) { const data_type* img1_ptr_row_i = img1_ptr + (img1_tile_row_start_idx + row_i) * img1_step + img1_tile_col_start_idx; @@ -385,10 +391,13 @@ void align_image_level( \ std::vector> distances( num_tiles_h, std::vector( num_tiles_w, 0 )); /* Iterate through all reference tile & compute distance */ + #pragma omp parallel for collapse(2) for ( int ref_tile_row_i = 0; ref_tile_row_i < num_tiles_h; ref_tile_row_i++ ) { for ( int ref_tile_col_i = 0; ref_tile_col_i < num_tiles_w; ref_tile_col_i++ ) { + printf("num omp thread %d\n", omp_get_num_threads() ); + // Upper left index of reference tile int ref_tile_row_start_idx_i = ref_tile_row_i * curr_tile_size / 2; int ref_tile_col_start_idx_i = ref_tile_col_i * curr_tile_size / 2;