Upgrade libx264

Signed-off-by: Leo Ma <begeekmyfriend@gmail.com>
9 years ago · e9aaf904a4
parent 5023200374
commit e9aaf904a4
53 changed files with 1041 additions and 716 deletions
--- a/library/src/main/jniLibs/armeabi-v7a/libenc.so
+++ b/library/src/main/jniLibs/armeabi-v7a/libenc.so
--- a/library/src/main/libenc/jni/libs/armeabi-v7a/libx264.a
+++ b/library/src/main/libenc/jni/libs/armeabi-v7a/libx264.a
--- a/library/src/main/libenc/jni/libx264/common/aarch64/mc-a.S
+++ b/library/src/main/libenc/jni/libx264/common/aarch64/mc-a.S
@ -1253,7 +1253,7 @@ load_deinterleave_chroma:
    ret
 endfunc

-function x264_plane_copy_neon, export=1
+function x264_plane_copy_core_neon, export=1
    add         x8,  x4,  #15
    and         x4,  x8,  #~15
    sub         x1,  x1,  x4
@ -1281,6 +1281,34 @@ function x264_plane_copy_neon, export=1
    ret
 endfunc

+function x264_plane_copy_swap_core_neon, export=1
+    lsl         w4,  w4,  #1
+    sub         x1,  x1,  x4
+    sub         x3,  x3,  x4
+1:
+    mov         w8,  w4
+    tbz         w4,  #4,  32f
+    subs        w8,  w8,  #16
+    ld1         {v0.16b}, [x2], #16
+    rev16       v0.16b, v0.16b
+    st1         {v0.16b}, [x0], #16
+    b.eq        0f
+32:
+    subs        w8,  w8,  #32
+    ld1         {v0.16b,v1.16b}, [x2], #32
+    rev16       v0.16b, v0.16b
+    rev16       v1.16b, v1.16b
+    st1         {v0.16b,v1.16b}, [x0], #32
+    b.gt        32b
+0:
+    subs        w5,  w5,  #1
+    add         x2,  x2,  x3
+    add         x0,  x0,  x1
+    b.gt        1b
+
+    ret
+endfunc
+
 function x264_plane_copy_deinterleave_neon, export=1
    add         w9,  w6,  #15
    and         w9,  w9,  #0xfffffff0
@ -1352,7 +1380,7 @@ function x264_plane_copy_deinterleave_rgb_neon, export=1
    ret
 endfunc

-function x264_plane_copy_interleave_neon, export=1
+function x264_plane_copy_interleave_core_neon, export=1
    add         w9,  w6,  #15
    and         w9,  w9,  #0xfffffff0
    sub         x1,  x1,  x9,  lsl #1
--- a/library/src/main/libenc/jni/libx264/common/aarch64/mc-c.c
+++ b/library/src/main/libenc/jni/libx264/common/aarch64/mc-c.c
@ -49,7 +49,9 @@ void x264_pixel_avg2_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t
 void x264_pixel_avg2_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
 void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );

-void x264_plane_copy_neon( pixel *dst, intptr_t i_dst,
+void x264_plane_copy_core_neon( pixel *dst, intptr_t i_dst,
+                                pixel *src, intptr_t i_src, int w, int h );
+void x264_plane_copy_swap_core_neon( pixel *dst, intptr_t i_dst,
                                     pixel *src, intptr_t i_src, int w, int h );
 void x264_plane_copy_deinterleave_neon(  pixel *dstu, intptr_t i_dstu,
                                         pixel *dstv, intptr_t i_dstv,
@ -58,7 +60,7 @@ void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta,
                                            pixel *dstb, intptr_t i_dstb,
                                            pixel *dstc, intptr_t i_dstc,
                                            pixel *src,  intptr_t i_src, int pw, int w, int h );
-void x264_plane_copy_interleave_neon( pixel *dst,  intptr_t i_dst,
+void x264_plane_copy_interleave_core_neon( pixel *dst,  intptr_t i_dst,
                                           pixel *srcu, intptr_t i_srcu,
                                           pixel *srcv, intptr_t i_srcv, int w, int h );

@ -206,6 +208,10 @@ static uint8_t *get_ref_neon( uint8_t *dst,   intptr_t *i_dst_stride,
 void x264_hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,
                            uint8_t *src, intptr_t stride, int width,
                            int height, int16_t *buf );
+
+PLANE_COPY(16, neon)
+PLANE_COPY_SWAP(16, neon)
+PLANE_INTERLEAVE(neon)
 #endif // !HIGH_BIT_DEPTH

 PROPAGATE_LIST(neon)
@ -229,6 +235,7 @@ void x264_mc_init_aarch64( int cpu, x264_mc_functions_t *pf )
    pf->copy[PIXEL_4x4]      = x264_mc_copy_w4_neon;

    pf->plane_copy                  = x264_plane_copy_neon;
+    pf->plane_copy_swap             = x264_plane_copy_swap_neon;
    pf->plane_copy_deinterleave     = x264_plane_copy_deinterleave_neon;
    pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
    pf->plane_copy_interleave       = x264_plane_copy_interleave_neon;
--- a/library/src/main/libenc/jni/libx264/common/arm/mc-a.S
+++ b/library/src/main/libenc/jni/libx264/common/arm/mc-a.S
@ -1468,7 +1468,7 @@ function x264_load_deinterleave_chroma_fenc_neon
    bx              lr
 endfunc

-function x264_plane_copy_neon
+function x264_plane_copy_core_neon
    push            {r4,lr}
    ldr             r4,  [sp, #8]
    ldr             lr,  [sp, #12]
@ -1577,7 +1577,7 @@ block4:
    pop             {r4-r8, r10, r11, pc}
 endfunc

-function x264_plane_copy_interleave_neon
+function x264_plane_copy_interleave_core_neon
    push            {r4-r7, lr}
    ldrd            r6, r7, [sp, #28]
    ldrd            r4, r5, [sp, #20]
@ -1604,7 +1604,7 @@ blocki:
    pop             {r4-r7, pc}
 endfunc

-function x264_plane_copy_swap_neon
+function x264_plane_copy_swap_core_neon
    push            {r4-r5, lr}
    ldrd            r4, r5, [sp, #12]
    add             lr,  r4,  #15
--- a/library/src/main/libenc/jni/libx264/common/arm/mc-c.c
+++ b/library/src/main/libenc/jni/libx264/common/arm/mc-c.c
@ -48,7 +48,7 @@ void x264_pixel_avg2_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t
 void x264_pixel_avg2_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
 void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );

-void x264_plane_copy_neon( pixel *dst, intptr_t i_dst,
+void x264_plane_copy_core_neon( pixel *dst, intptr_t i_dst,
                                pixel *src, intptr_t i_src, int w, int h );
 void x264_plane_copy_deinterleave_neon(  pixel *dstu, intptr_t i_dstu,
                                         pixel *dstv, intptr_t i_dstv,
@ -57,10 +57,10 @@ void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta,
                                            pixel *dstb, intptr_t i_dstb,
                                            pixel *dstc, intptr_t i_dstc,
                                            pixel *src,  intptr_t i_src, int pw, int w, int h );
-void x264_plane_copy_interleave_neon( pixel *dst,  intptr_t i_dst,
+void x264_plane_copy_interleave_core_neon( pixel *dst,  intptr_t i_dst,
                                           pixel *srcu, intptr_t i_srcu,
                                           pixel *srcv, intptr_t i_srcv, int w, int h );
-void x264_plane_copy_swap_neon( pixel *dst, intptr_t i_dst,
+void x264_plane_copy_swap_core_neon( pixel *dst, intptr_t i_dst,
                                     pixel *src, intptr_t i_src, int w, int h );

 void x264_store_interleave_chroma_neon( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
@ -232,6 +232,10 @@ static void hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8
        src  += stride;
    }
 }
+
+PLANE_COPY(16, neon)
+PLANE_COPY_SWAP(16, neon)
+PLANE_INTERLEAVE(neon)
 #endif // !HIGH_BIT_DEPTH

 PROPAGATE_LIST(neon)
--- a/library/src/main/libenc/jni/libx264/common/common.c
+++ b/library/src/main/libenc/jni/libx264/common/common.c
@ -221,7 +221,6 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
    }
    else if( !strcasecmp( preset, "veryfast" ) )
    {
-        param->analyse.i_me_method = X264_ME_HEX;
        param->analyse.i_subpel_refine = 2;
        param->i_frame_reference = 1;
        param->analyse.b_mixed_references = 0;
@ -250,11 +249,10 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
    }
    else if( !strcasecmp( preset, "slow" ) )
    {
-        param->analyse.i_me_method = X264_ME_UMH;
        param->analyse.i_subpel_refine = 8;
        param->i_frame_reference = 5;
-        param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
        param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
+        param->analyse.i_trellis = 2;
        param->rc.i_lookahead = 50;
    }
    else if( !strcasecmp( preset, "slower" ) )
@ -1074,18 +1072,6 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
 /****************************************************************************
 * x264_log:
 ****************************************************************************/
-#ifdef __ANDROID__
-    #include <android/log.h>
-    #define LIBX264_LOGD(...) ((void)__android_log_print(ANDROID_LOG_DEBUG, "libx264", __VA_ARGS__))
-    #define LIBX264_LOGI(...) ((void)__android_log_print(ANDROID_LOG_INFO , "libx264", __VA_ARGS__))
-    #define LIBX264_LOGW(...) ((void)__android_log_print(ANDROID_LOG_WARN , "libx264", __VA_ARGS__))
-    #define LIBX264_LOGE(...) ((void)__android_log_print(ANDROID_LOG_ERROR, "libx264", __VA_ARGS__))
-#else
-    #define LIBX264_LOGD(...) do {} while (0)
-    #define LIBX264_LOGI(...) do {} while (0)
-    #define LIBX264_LOGW(...) do {} while (0)
-    #define LIBX264_LOGE(...) do {} while (0)
-#endif
 void x264_log( x264_t *h, int i_level, const char *psz_fmt, ... )
 {
    if( !h || i_level <= h->param.i_log_level )
@ -1107,25 +1093,20 @@ static void x264_log_default( void *p_unused, int i_level, const char *psz_fmt,
    {
        case X264_LOG_ERROR:
            psz_prefix = "error";
-            LIBX264_LOGE(psz_fmt, arg);
            break;
        case X264_LOG_WARNING:
            psz_prefix = "warning";
-            LIBX264_LOGW(psz_fmt, arg);
            break;
        case X264_LOG_INFO:
            psz_prefix = "info";
-            LIBX264_LOGI(psz_fmt, arg);
            break;
        case X264_LOG_DEBUG:
            psz_prefix = "debug";
-            LIBX264_LOGD(psz_fmt, arg);
            break;
        default:
            psz_prefix = "unknown";
            break;
    }
-
    fprintf( stderr, "x264 [%s]: ", psz_prefix );
    x264_vfprintf( stderr, psz_fmt, arg );
 }
--- a/library/src/main/libenc/jni/libx264/common/dct.c
+++ b/library/src/main/libenc/jni/libx264/common/dct.c
@ -990,10 +990,11 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_mmx;
    if( cpu&X264_CPU_MMX2 )
    {
-        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_mmx2;
        pf_interlaced->scan_8x8  = x264_zigzag_scan_8x8_field_mmx2;
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_mmx2;
    }
+    if( cpu&X264_CPU_SSE )
+        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_sse;
    if( cpu&X264_CPU_SSE2_IS_FAST )
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_sse2;
    if( cpu&X264_CPU_SSSE3 )
--- a/library/src/main/libenc/jni/libx264/common/mc.h
+++ b/library/src/main/libenc/jni/libx264/common/mc.h
@ -100,6 +100,98 @@ static void x264_mbtree_propagate_list_##cpu( x264_t *h, uint16_t *ref_costs, in
    }\
 }

+void x264_plane_copy_c( pixel *, intptr_t, pixel *, intptr_t, int w, int h );
+
+#define PLANE_COPY(align, cpu)\
+static void x264_plane_copy_##cpu( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )\
+{\
+    int c_w = (align) / sizeof(pixel) - 1;\
+    if( w < 256 ) /* tiny resolutions don't want non-temporal hints. dunno the exact threshold. */\
+        x264_plane_copy_c( dst, i_dst, src, i_src, w, h );\
+    else if( !(w&c_w) )\
+        x264_plane_copy_core_##cpu( dst, i_dst, src, i_src, w, h );\
+    else\
+    {\
+        if( --h > 0 )\
+        {\
+            if( i_src > 0 )\
+            {\
+                x264_plane_copy_core_##cpu( dst, i_dst, src, i_src, (w+c_w)&~c_w, h );\
+                dst += i_dst * h;\
+                src += i_src * h;\
+            }\
+            else\
+                x264_plane_copy_core_##cpu( dst+i_dst, i_dst, src+i_src, i_src, (w+c_w)&~c_w, h );\
+        }\
+        /* use plain memcpy on the last line (in memory order) to avoid overreading src. */\
+        memcpy( dst, src, w*sizeof(pixel) );\
+    }\
+}
+
+void x264_plane_copy_swap_c( pixel *, intptr_t, pixel *, intptr_t, int w, int h );
+
+#define PLANE_COPY_SWAP(align, cpu)\
+static void x264_plane_copy_swap_##cpu( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )\
+{\
+    int c_w = (align>>1) / sizeof(pixel) - 1;\
+    if( !(w&c_w) )\
+        x264_plane_copy_swap_core_##cpu( dst, i_dst, src, i_src, w, h );\
+    else if( w > c_w )\
+    {\
+        if( --h > 0 )\
+        {\
+            if( i_src > 0 )\
+            {\
+                x264_plane_copy_swap_core_##cpu( dst, i_dst, src, i_src, (w+c_w)&~c_w, h );\
+                dst += i_dst * h;\
+                src += i_src * h;\
+            }\
+            else\
+                x264_plane_copy_swap_core_##cpu( dst+i_dst, i_dst, src+i_src, i_src, (w+c_w)&~c_w, h );\
+        }\
+        x264_plane_copy_swap_core_##cpu( dst, 0, src, 0, w&~c_w, 1 );\
+        for( int x = 2*(w&~c_w); x < 2*w; x += 2 )\
+        {\
+            dst[x]   = src[x+1];\
+            dst[x+1] = src[x];\
+        }\
+    }\
+    else\
+        x264_plane_copy_swap_c( dst, i_dst, src, i_src, w, h );\
+}
+
+void x264_plane_copy_interleave_c( pixel *dst,  intptr_t i_dst,
+                                   pixel *srcu, intptr_t i_srcu,
+                                   pixel *srcv, intptr_t i_srcv, int w, int h );
+
+#define PLANE_INTERLEAVE(cpu) \
+static void x264_plane_copy_interleave_##cpu( pixel *dst,  intptr_t i_dst,\
+                                              pixel *srcu, intptr_t i_srcu,\
+                                              pixel *srcv, intptr_t i_srcv, int w, int h )\
+{\
+    int c_w = 16 / sizeof(pixel) - 1;\
+    if( !(w&c_w) )\
+        x264_plane_copy_interleave_core_##cpu( dst, i_dst, srcu, i_srcu, srcv, i_srcv, w, h );\
+    else if( w > c_w && (i_srcu ^ i_srcv) >= 0 ) /* only works correctly for strides with identical signs */\
+    {\
+        if( --h > 0 )\
+        {\
+            if( i_srcu > 0 )\
+            {\
+                x264_plane_copy_interleave_core_##cpu( dst, i_dst, srcu, i_srcu, srcv, i_srcv, (w+c_w)&~c_w, h );\
+                dst  += i_dst  * h;\
+                srcu += i_srcu * h;\
+                srcv += i_srcv * h;\
+            }\
+            else\
+                x264_plane_copy_interleave_core_##cpu( dst+i_dst, i_dst, srcu+i_srcu, i_srcu, srcv+i_srcv, i_srcv, (w+c_w)&~c_w, h );\
+        }\
+        x264_plane_copy_interleave_c( dst, 0, srcu, 0, srcv, 0, w, 1 );\
+    }\
+    else\
+        x264_plane_copy_interleave_c( dst, i_dst, srcu, i_srcu, srcv, i_srcv, w, h );\
+}
+
 struct x264_weight_t;
 typedef void (* weight_fn_t)( pixel *, intptr_t, pixel *,intptr_t, const struct x264_weight_t *, int );
 typedef struct x264_weight_t
--- a/library/src/main/libenc/jni/libx264/common/mips/mc-c.c
+++ b/library/src/main/libenc/jni/libx264/common/mips/mc-c.c
@ -3430,7 +3430,7 @@ uint8_t *x264_get_ref_msa( uint8_t *p_dst, intptr_t *p_dst_stride,
                x264_mc_weight_w8_msa( p_dst, *p_dst_stride,
                                       p_dst, *p_dst_stride,
                                       pWeight, i_h4w );
-                for( i_cnt = i_h4w; i_cnt < i_height ; i_cnt++ )
+                for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ )
                {
                    uint64_t temp0;
                    v16i8 zero = {0};
@ -3666,7 +3666,7 @@ uint8_t *x264_get_ref_msa( uint8_t *p_dst, intptr_t *p_dst_stride,
                                   pWeight, i_h4w );
            p_src1 = src1_org + i_h4w * i_src_stride;

-            for( i_cnt = i_h4w; i_cnt < i_height ; i_cnt++ )
+            for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ )
            {
                uint64_t u_temp0;
                v16i8 zero = {0};
@ -3761,9 +3761,11 @@ uint8_t *x264_get_ref_msa( uint8_t *p_dst, intptr_t *p_dst_stride,
        return p_src1;
    }
 }
+#endif // !HIGH_BIT_DEPTH

 void x264_mc_init_mips( int32_t cpu, x264_mc_functions_t *pf  )
 {
+#if !HIGH_BIT_DEPTH
    if( cpu & X264_CPU_MSA )
    {
        pf->mc_luma = x264_mc_luma_msa;
@ -3803,5 +3805,5 @@ void x264_mc_init_mips( int32_t cpu, x264_mc_functions_t *pf  )
        pf->memzero_aligned = x264_memzero_aligned_msa;
        pf->frame_init_lowres_core = x264_frame_init_lowres_core_msa;
    }
+#endif // !HIGH_BIT_DEPTH
 }
-#endif
--- a/library/src/main/libenc/jni/libx264/common/mvpred.c
+++ b/library/src/main/libenc/jni/libx264/common/mvpred.c
@ -370,8 +370,8 @@ static ALWAYS_INLINE int x264_mb_predict_mv_direct16x16_spatial( x264_t *h, int
            h->mb.i_partition = partition_col[0];
        }
    }
-    int i_mb_4x4 = b_interlaced ? 4 * (h->mb.i_b4_stride*mb_y + mb_x) : h->mb.i_b4_xy ;
-    int i_mb_8x8 = b_interlaced ? 2 * (h->mb.i_b8_stride*mb_y + mb_x) : h->mb.i_b8_xy ;
+    int i_mb_4x4 = b_interlaced ? 4 * (h->mb.i_b4_stride*mb_y + mb_x) : h->mb.i_b4_xy;
+    int i_mb_8x8 = b_interlaced ? 2 * (h->mb.i_b8_stride*mb_y + mb_x) : h->mb.i_b8_xy;

    int8_t *l1ref0 = &h->fref[1][0]->ref[0][i_mb_8x8];
    int8_t *l1ref1 = &h->fref[1][0]->ref[1][i_mb_8x8];
--- a/library/src/main/libenc/jni/libx264/common/osdep.h
+++ b/library/src/main/libenc/jni/libx264/common/osdep.h
@ -249,7 +249,7 @@ int x264_threading_init( void );
 static ALWAYS_INLINE int x264_pthread_fetch_and_add( int *val, int add, x264_pthread_mutex_t *mutex )
 {
 #if HAVE_THREAD
-#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ > 0) && ARCH_X86
+#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ > 0) && (ARCH_X86 || ARCH_X86_64)
    return __sync_fetch_and_add( val, add );
 #else
    x264_pthread_mutex_lock( mutex );
--- a/library/src/main/libenc/jni/libx264/common/pixel.c
+++ b/library/src/main/libenc/jni/libx264/common/pixel.c
@ -556,6 +556,7 @@ INTRA_MBCMP(satd, 16x16,  v, h, dc,  ,, _c )
 #if HIGH_BIT_DEPTH
 #define x264_predict_8x8c_v_mmx2 x264_predict_8x8c_v_mmx
 #define x264_predict_8x16c_v_mmx2 x264_predict_8x16c_v_c
+#define x264_predict_16x16_dc_mmx2 x264_predict_16x16_dc_c
 #define x264_predict_8x8c_v_sse2 x264_predict_8x8c_v_sse
 #define x264_predict_8x16c_v_sse2 x264_predict_8x16c_v_sse
 #define x264_predict_16x16_v_sse2 x264_predict_16x16_v_sse
@ -884,7 +885,6 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
        INIT8( ssd, _mmx2 );
        INIT_ADS( _mmx2 );

-        pixf->ssd_nv12_core = x264_pixel_ssd_nv12_core_mmx2;
        pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_mmx2;
        pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8_mmx2;
 #if ARCH_X86
@ -1070,7 +1070,6 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
        pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_mmx2;
        pixf->var[PIXEL_8x16]  = x264_pixel_var_8x16_mmx2;
        pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8_mmx2;
-        pixf->ssd_nv12_core    = x264_pixel_ssd_nv12_core_mmx2;
 #if ARCH_X86
        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_mmx2;
        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_mmx2;
--- a/library/src/main/libenc/jni/libx264/common/ppc/quant.c
+++ b/library/src/main/libenc/jni/libx264/common/ppc/quant.c
@ -71,7 +71,7 @@ int x264_quant_4x4_altivec( int16_t dct[16], uint16_t mf[16], uint16_t bias[16]
    vec_u32_t multEvenvA, multOddvA;
    vec_u16_t mfvA;
    vec_u16_t biasvA;
-    vec_s16_t one = vec_splat_s16(1);;
+    vec_s16_t one = vec_splat_s16(1);
    vec_s16_t nz = zero_s16v;

    vector bool short mskB;
@ -216,7 +216,7 @@ int x264_quant_8x8_altivec( int16_t dct[64], uint16_t mf[64], uint16_t bias[64]
    vec_u32_t multEvenvA, multOddvA;
    vec_u16_t mfvA;
    vec_u16_t biasvA;
-    vec_s16_t one = vec_splat_s16(1);;
+    vec_s16_t one = vec_splat_s16(1);
    vec_s16_t nz = zero_s16v;

    vector bool short mskB;
--- a/library/src/main/libenc/jni/libx264/common/x86/const-a.asm
+++ b/library/src/main/libenc/jni/libx264/common/x86/const-a.asm
@ -38,6 +38,8 @@ const pw_00ff,     times 16 dw 0x00ff
 const pw_pixel_max,times 16 dw ((1 << BIT_DEPTH)-1)
 const pw_0to15,    dw 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 const pd_1,        times 8 dd 1
+const pd_0123,     dd 0,1,2,3
+const pd_4567,     dd 4,5,6,7
 const deinterleave_shufd, dd 0,4,1,5,2,6,3,7
 const pb_unpackbd1, times 2 db 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3
 const pb_unpackbd2, times 2 db 4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7
@ -63,6 +65,7 @@ const pw_ppmmppmm, dw 1,1,-1,-1,1,1,-1,-1
 const pw_pmpmpmpm, dw 1,-1,1,-1,1,-1,1,-1
 const pw_pmmpzzzz, dw 1,-1,-1,1,0,0,0,0

+const pd_8,        times 4 dd 8
 const pd_32,       times 4 dd 32
 const pd_1024,     times 4 dd 1024
 const pd_ffff,     times 4 dd 0xffff
--- a/library/src/main/libenc/jni/libx264/common/x86/dct-a.asm
+++ b/library/src/main/libenc/jni/libx264/common/x86/dct-a.asm
@ -1463,9 +1463,9 @@ cglobal zigzag_scan_4x4_frame, 2,2
 ; void zigzag_scan_4x4_field( int32_t level[16], int32_t dct[4][4] )
 ;-----------------------------------------------------------------------------
 INIT_XMM sse2
-cglobal zigzag_scan_4x4_field, 2,3
-    movu       m4, [r1+ 8]
-    pshufd     m0, m4, q3102
+cglobal zigzag_scan_4x4_field, 2,2
+    movu       m0, [r1+ 8]
+    pshufd     m0, m0, q3102
    mova       m1, [r1+32]
    mova       m2, [r1+48]
    movu  [r0+ 8], m0
@ -1480,19 +1480,14 @@ cglobal zigzag_scan_4x4_field, 2,3
 ;-----------------------------------------------------------------------------
 ; void zigzag_scan_4x4_field( int16_t level[16], int16_t dct[4][4] )
 ;-----------------------------------------------------------------------------
-; sse2 is only 1 cycle faster, and ssse3/pshufb is slower on core2
-INIT_MMX mmx2
-cglobal zigzag_scan_4x4_field, 2,3
-    pshufw      m0, [r1+4], q3102
+INIT_XMM sse
+cglobal zigzag_scan_4x4_field, 2,2
+    mova       m0, [r1]
    mova       m1, [r1+16]
-    mova        m2, [r1+24]
-    movu    [r0+4], m0
+    pshufw    mm0, [r1+4], q3102
+    mova     [r0], m0
    mova  [r0+16], m1
-    mova   [r0+24], m2
-    mov        r2d, [r1]
-    mov       [r0], r2d
-    mov        r2d, [r1+12]
-    mov    [r0+12], r2d
+    movq   [r0+4], mm0
    RET
 %endif ; HIGH_BIT_DEPTH

--- a/library/src/main/libenc/jni/libx264/common/x86/dct.h
+++ b/library/src/main/libenc/jni/libx264/common/x86/dct.h
@ -112,7 +112,7 @@ void x264_zigzag_scan_4x4_frame_ssse3( int16_t level[16], int16_t dct[16] );
 void x264_zigzag_scan_4x4_frame_sse2 ( int32_t level[16], int32_t dct[16] );
 void x264_zigzag_scan_4x4_frame_mmx  ( int16_t level[16], int16_t dct[16] );
 void x264_zigzag_scan_4x4_field_sse2 ( int32_t level[16], int32_t dct[16] );
-void x264_zigzag_scan_4x4_field_mmx2 ( int16_t level[16], int16_t dct[16] );
+void x264_zigzag_scan_4x4_field_sse  ( int16_t level[16], int16_t dct[16] );
 void x264_zigzag_scan_8x8_field_xop  ( int16_t level[64], int16_t dct[64] );
 void x264_zigzag_scan_8x8_field_avx  ( int32_t level[64], int32_t dct[64] );
 void x264_zigzag_scan_8x8_field_sse4 ( int32_t level[64], int32_t dct[64] );
--- a/library/src/main/libenc/jni/libx264/common/x86/mc-a2.asm
+++ b/library/src/main/libenc/jni/libx264/common/x86/mc-a2.asm
@ -67,7 +67,6 @@ pf_256:    times 4 dd 256.0
 pf_inv256: times 4 dd 0.00390625

 pd_16: times 4 dd 16
-pd_0f: times 4 dd 0xffff

 pad10: times 8 dw    10*PIXEL_MAX
 pad20: times 8 dw    20*PIXEL_MAX
@ -94,6 +93,8 @@ cextern pw_00ff
 cextern pw_3fff
 cextern pw_pixel_max
 cextern pw_0to15
+cextern pd_8
+cextern pd_0123
 cextern pd_ffff

 %macro LOAD_ADD 4
@ -285,7 +286,7 @@ cglobal hpel_filter_c, 3,3,10
    psrad      m1, 10
    psrad      m2, 10
    pslld      m2, 16
-    pand       m1, [pd_0f]
+    pand       m1, [pd_ffff]
    por        m1, m2
    CLIPW      m1, [pb_0], [pw_pixel_max]
    mova  [r0+r2], m1
@ -2178,7 +2179,7 @@ MBTREE_AVX

 %macro MBTREE_PROPAGATE_LIST 0
 ;-----------------------------------------------------------------------------
-; void mbtree_propagate_list_internal( int16_t (*mvs)[2], int *propagate_amount, uint16_t *lowres_costs,
+; void mbtree_propagate_list_internal( int16_t (*mvs)[2], int16_t *propagate_amount, uint16_t *lowres_costs,
 ;                                      int16_t *output, int bipred_weight, int mb_y, int len )
 ;-----------------------------------------------------------------------------
 cglobal mbtree_propagate_list_internal, 4,6,8
@ -2268,6 +2269,67 @@ MBTREE_PROPAGATE_LIST
 INIT_XMM avx
 MBTREE_PROPAGATE_LIST

+INIT_YMM avx2
+cglobal mbtree_propagate_list_internal, 4+2*UNIX64,5+UNIX64,8
+    mova          xm4, [pw_0xc000]
+%if UNIX64
+    shl           r4d, 9
+    shl           r5d, 16
+    movd          xm5, r4d
+    movd          xm6, r5d
+    vpbroadcastw  xm5, xm5
+    vpbroadcastd   m6, xm6
+%else
+    vpbroadcastw  xm5, r4m
+    vpbroadcastd   m6, r5m
+    psllw         xm5, 9             ; bipred_weight << 9
+    pslld          m6, 16
+%endif
+    mov           r4d, r6m
+    lea            r1, [r1+r4*2]
+    lea            r2, [r2+r4*2]
+    lea            r0, [r0+r4*4]
+    neg            r4
+    por            m6, [pd_0123]     ; 0 y 1 y 2 y 3 y 4 y 5 y 6 y 7 y
+    vbroadcasti128 m7, [pw_31]
+.loop:
+    mova          xm3, [r1+r4*2]
+    pand          xm0, xm4, [r2+r4*2]
+    pmulhrsw      xm1, xm3, xm5      ; bipred_amount = (propagate_amount * bipred_weight + 32) >> 6
+    pcmpeqw       xm0, xm4
+    pblendvb      xm3, xm3, xm1, xm0 ; (lists_used == 3) ? bipred_amount : propagate_amount
+    vpermq         m3, m3, q1100
+
+    movu           m0, [r0+r4*4]     ; {x, y}
+    vbroadcasti128 m1, [pd_8]
+    psraw          m2, m0, 5
+    paddw          m2, m6            ; {mbx, mby} = ({x, y} >> 5) + {h->mb.i_mb_x, h->mb.i_mb_y}
+    paddw          m6, m1            ; i_mb_x += 8
+    mova         [r3], m2
+
+    mova           m1, [pw_32]
+    pand           m0, m7
+    psubw          m1, m0
+    packuswb       m1, m0            ; {32-x, 32-y} {x, y} {32-x, 32-y} {x, y}
+    psrlw          m0, m1, 3
+    pand           m1, [pw_00ff]     ; 32-x x 32-x x
+    pandn          m0, m7, m0        ; (32-y y 32-y y) << 5
+    pshufd         m2, m1, q1032
+    pmullw         m1, m0            ; idx0 idx3 idx0 idx3
+    pmullw         m2, m0            ; idx1 idx2 idx1 idx2
+
+    pmulhrsw       m0, m1, m3        ; (idx0 idx3 idx0 idx3) * propagate_amount + 512 >> 10
+    pmulhrsw       m2, m3            ; (idx1 idx2 idx1 idx2) * propagate_amount + 512 >> 10
+    psignw         m0, m1            ; correct potential overflow in the idx0 input to pmulhrsw
+    punpcklwd      m1, m0, m2        ; idx01weight
+    punpckhwd      m2, m0            ; idx23weight
+    mova      [r3+32], m1
+    mova      [r3+64], m2
+    add            r3, 3*mmsize
+    add            r4, 8
+    jl .loop
+    RET
+
 %macro MBTREE_FIX8 0
 ;-----------------------------------------------------------------------------
 ; void mbtree_fix8_pack( uint16_t *dst, float *src, int count )
--- a/library/src/main/libenc/jni/libx264/common/x86/mc-c.c
+++ b/library/src/main/libenc/jni/libx264/common/x86/mc-c.c
@ -88,10 +88,8 @@ void x264_prefetch_fenc_422_mmx2( pixel *, intptr_t, pixel *, intptr_t, int );
 void x264_prefetch_ref_mmx2( pixel *, intptr_t, int );
 void x264_plane_copy_core_sse( pixel *, intptr_t, pixel *, intptr_t, int w, int h );
 void x264_plane_copy_core_avx( pixel *, intptr_t, pixel *, intptr_t, int w, int h );
-void x264_plane_copy_c( pixel *, intptr_t, pixel *, intptr_t, int w, int h );
 void x264_plane_copy_swap_core_ssse3( pixel *, intptr_t, pixel *, intptr_t, int w, int h );
 void x264_plane_copy_swap_core_avx2 ( pixel *, intptr_t, pixel *, intptr_t, int w, int h );
-void x264_plane_copy_swap_c( pixel *, intptr_t, pixel *, intptr_t, int w, int h );
 void x264_plane_copy_interleave_core_mmx2( pixel *dst,  intptr_t i_dst,
                                           pixel *srcu, intptr_t i_srcu,
                                           pixel *srcv, intptr_t i_srcv, int w, int h );
@ -101,9 +99,6 @@ void x264_plane_copy_interleave_core_sse2( pixel *dst,  intptr_t i_dst,
 void x264_plane_copy_interleave_core_avx( pixel *dst,  intptr_t i_dst,
                                          pixel *srcu, intptr_t i_srcu,
                                          pixel *srcv, intptr_t i_srcv, int w, int h );
-void x264_plane_copy_interleave_c( pixel *dst,  intptr_t i_dst,
-                                   pixel *srcu, intptr_t i_srcu,
-                                   pixel *srcv, intptr_t i_srcv, int w, int h );
 void x264_plane_copy_deinterleave_mmx( pixel *dstu, intptr_t i_dstu,
                                       pixel *dstv, intptr_t i_dstv,
                                       pixel *src,  intptr_t i_src, int w, int h );
@ -493,96 +488,12 @@ HPEL(32, avx2, avx2, avx2, avx2)
 #endif
 #endif // HIGH_BIT_DEPTH

-#define PLANE_COPY(align, cpu)\
-static void x264_plane_copy_##cpu( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )\
-{\
-    int c_w = (align) / sizeof(pixel) - 1;\
-    if( w < 256 ) /* tiny resolutions don't want non-temporal hints. dunno the exact threshold. */\
-        x264_plane_copy_c( dst, i_dst, src, i_src, w, h );\
-    else if( !(w&c_w) )\
-        x264_plane_copy_core_##cpu( dst, i_dst, src, i_src, w, h );\
-    else\
-    {\
-        if( --h > 0 )\
-        {\
-            if( i_src > 0 )\
-            {\
-                x264_plane_copy_core_##cpu( dst, i_dst, src, i_src, (w+c_w)&~c_w, h );\
-                dst += i_dst * h;\
-                src += i_src * h;\
-            }\
-            else\
-                x264_plane_copy_core_##cpu( dst+i_dst, i_dst, src+i_src, i_src, (w+c_w)&~c_w, h );\
-        }\
-        /* use plain memcpy on the last line (in memory order) to avoid overreading src. */\
-        memcpy( dst, src, w*sizeof(pixel) );\
-    }\
-}
-
 PLANE_COPY(16, sse)
 PLANE_COPY(32, avx)

-#define PLANE_COPY_SWAP(align, cpu)\
-static void x264_plane_copy_swap_##cpu( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )\
-{\
-    int c_w = (align>>1) / sizeof(pixel) - 1;\
-    if( !(w&c_w) )\
-        x264_plane_copy_swap_core_##cpu( dst, i_dst, src, i_src, w, h );\
-    else if( w > c_w )\
-    {\
-        if( --h > 0 )\
-        {\
-            if( i_src > 0 )\
-            {\
-                x264_plane_copy_swap_core_##cpu( dst, i_dst, src, i_src, (w+c_w)&~c_w, h );\
-                dst += i_dst * h;\
-                src += i_src * h;\
-            }\
-            else\
-                x264_plane_copy_swap_core_##cpu( dst+i_dst, i_dst, src+i_src, i_src, (w+c_w)&~c_w, h );\
-        }\
-        x264_plane_copy_swap_core_##cpu( dst, 0, src, 0, w&~c_w, 1 );\
-        for( int x = 2*(w&~c_w); x < 2*w; x += 2 )\
-        {\
-            dst[x]   = src[x+1];\
-            dst[x+1] = src[x];\
-        }\
-    }\
-    else\
-        x264_plane_copy_swap_c( dst, i_dst, src, i_src, w, h );\
-}
-
 PLANE_COPY_SWAP(16, ssse3)
 PLANE_COPY_SWAP(32, avx2)

-#define PLANE_INTERLEAVE(cpu) \
-static void x264_plane_copy_interleave_##cpu( pixel *dst,  intptr_t i_dst,\
-                                              pixel *srcu, intptr_t i_srcu,\
-                                              pixel *srcv, intptr_t i_srcv, int w, int h )\
-{\
-    int c_w = 16 / sizeof(pixel) - 1;\
-    if( !(w&c_w) )\
-        x264_plane_copy_interleave_core_##cpu( dst, i_dst, srcu, i_srcu, srcv, i_srcv, w, h );\
-    else if( w > c_w && (i_srcu ^ i_srcv) >= 0 ) /* only works correctly for strides with identical signs */\
-    {\
-        if( --h > 0 )\
-        {\
-            if( i_srcu > 0 )\
-            {\
-                x264_plane_copy_interleave_core_##cpu( dst, i_dst, srcu, i_srcu, srcv, i_srcv, (w+c_w)&~c_w, h );\
-                dst  += i_dst  * h;\
-                srcu += i_srcu * h;\
-                srcv += i_srcv * h;\
-            }\
-            else\
-                x264_plane_copy_interleave_core_##cpu( dst+i_dst, i_dst, srcu+i_srcu, i_srcu, srcv+i_srcv, i_srcv, (w+c_w)&~c_w, h );\
-        }\
-        x264_plane_copy_interleave_c( dst, 0, srcu, 0, srcv, 0, w, 1 );\
-    }\
-    else\
-        x264_plane_copy_interleave_c( dst, i_dst, srcu, i_srcu, srcv, i_srcv, w, h );\
-}
-
 PLANE_INTERLEAVE(mmx2)
 PLANE_INTERLEAVE(sse2)
 #if HIGH_BIT_DEPTH
@ -621,6 +532,7 @@ do\

 PROPAGATE_LIST(ssse3)
 PROPAGATE_LIST(avx)
+PROPAGATE_LIST(avx2)

 void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
 {
@ -932,6 +844,7 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
    pf->plane_copy_swap = x264_plane_copy_swap_avx2;
    pf->get_ref = get_ref_avx2;
    pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_avx2;
+    pf->mbtree_propagate_list = x264_mbtree_propagate_list_avx2;
    pf->mbtree_fix8_pack      = x264_mbtree_fix8_pack_avx2;
    pf->mbtree_fix8_unpack    = x264_mbtree_fix8_unpack_avx2;
 }
--- a/library/src/main/libenc/jni/libx264/common/x86/pixel-a.asm
+++ b/library/src/main/libenc/jni/libx264/common/x86/pixel-a.asm
@ -43,6 +43,9 @@ mask_ff:   times 16 db 0xff
 mask_ac4:  times 2 dw 0, -1, -1, -1, 0, -1, -1, -1
 mask_ac4b: times 2 dw 0, -1, 0, -1, -1, -1, -1, -1
 mask_ac8:  times 2 dw 0, -1, -1, -1, -1, -1, -1, -1
+%if HIGH_BIT_DEPTH
+ssd_nv12_shuf: db 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15
+%endif
 %if BIT_DEPTH == 10
 ssim_c1:   times 4 dd 6697.7856    ; .01*.01*1023*1023*64
 ssim_c2:   times 4 dd 3797644.4352 ; .03*.03*1023*1023*64*63
@ -531,8 +534,8 @@ SSD 16,  8
 ;
 ;   2 * mmsize/32 * (2^32 - 1) / (2^BIT_DEPTH - 1)^2
 ;
-; For 10-bit MMX this means width >= 16416 and for XMM >= 32832. At sane
-; distortion levels it will take much more than that though.
+; For 10-bit XMM this means width >= 32832. At sane distortion levels
+; it will take much more than that though.
 ;-----------------------------------------------------------------------------
 %if HIGH_BIT_DEPTH
 %macro SSD_NV12 0
@ -541,13 +544,14 @@ cglobal pixel_ssd_nv12_core, 6,7,7
    FIX_STRIDES r1, r3
    add         r0, r4
    add         r2, r4
-    xor         r6, r6
+    neg         r4
    pxor        m4, m4
    pxor        m5, m5
-    pxor        m6, m6
+%if mmsize == 32
+    vbroadcasti128 m6, [ssd_nv12_shuf]
+%endif
 .loopy:
    mov         r6, r4
-    neg         r6
    pxor        m2, m2
    pxor        m3, m3
 .loopx:
@ -555,11 +559,11 @@ cglobal pixel_ssd_nv12_core, 6,7,7
    mova        m1, [r0+r6+mmsize]
    psubw       m0, [r2+r6]
    psubw       m1, [r2+r6+mmsize]
-    PSHUFLW     m0, m0, q3120
-    PSHUFLW     m1, m1, q3120
-%if mmsize >= 16
-    pshufhw     m0, m0, q3120
-    pshufhw     m1, m1, q3120
+%if mmsize == 32
+    pshufb      m0, m6
+    pshufb      m1, m6
+%else
+    SBUTTERFLY wd, 0, 1, 6
 %endif
 %if cpuflag(xop)
    pmadcswd    m2, m0, m0, m2
@ -577,59 +581,30 @@ cglobal pixel_ssd_nv12_core, 6,7,7
    psubd       m3, m1
 .no_overread:
 %endif
-%if mmsize >= 16 ; using HADDD would remove the mmsize/32 part from the
-                 ; equation above, putting the width limit at 8208
-    punpckhdq   m0, m2, m6
-    punpckhdq   m1, m3, m6
-    punpckldq   m2, m6
-    punpckldq   m3, m6
-    paddq       m3, m2
-    paddq       m1, m0
-    paddq       m4, m3
-    paddq       m4, m1
-%else ; unfortunately paddq is sse2
-      ; emulate 48 bit precision for mmx2 instead
-    mova        m0, m2
-    mova        m1, m3
-    punpcklwd   m2, m6
-    punpcklwd   m3, m6
-    punpckhwd   m0, m6
-    punpckhwd   m1, m6
-    paddd       m3, m2
-    paddd       m1, m0
-    paddd       m4, m3
-    paddd       m5, m1
-%endif
+    punpckhdq   m0, m2, m5 ; using HADDD would remove the mmsize/32 part from the
+    punpckhdq   m1, m3, m5 ; equation above, putting the width limit at 8208
+    punpckldq   m2, m5
+    punpckldq   m3, m5
+    paddq       m0, m1
+    paddq       m2, m3
+    paddq       m4, m0
+    paddq       m4, m2
    add         r0, r1
    add         r2, r3
    dec        r5d
    jg .loopy
-    mov         r3, r6m
-    mov         r4, r7m
+    mov         r0, r6m
+    mov         r1, r7m
 %if mmsize == 32
    vextracti128 xm0, m4, 1
    paddq      xm4, xm0
 %endif
-%if mmsize >= 16
-    movq      [r3], xm4
-    movhps    [r4], xm4
-%else ; fixup for mmx2
-    SBUTTERFLY dq, 4, 5, 0
-    mova        m0, m4
-    psrld       m4, 16
-    paddd       m5, m4
-    pslld       m0, 16
-    SBUTTERFLY dq, 0, 5, 4
-    psrlq       m0, 16
-    psrlq       m5, 16
-    movq      [r3], m0
-    movq      [r4], m5
-%endif
+    movq      [r0], xm4
+    movhps    [r1], xm4
    RET
 %endmacro ; SSD_NV12
-%endif ; HIGH_BIT_DEPTH

-%if HIGH_BIT_DEPTH == 0
+%else ; !HIGH_BIT_DEPTH
 ;-----------------------------------------------------------------------------
 ; void pixel_ssd_nv12_core( uint8_t *pixuv1, intptr_t stride1, uint8_t *pixuv2, intptr_t stride2,
 ;                           int width, int height, uint64_t *ssd_u, uint64_t *ssd_v )
@ -643,12 +618,12 @@ cglobal pixel_ssd_nv12_core, 6,7
    add    r4d, r4d
    add     r0, r4
    add     r2, r4
+    neg     r4
    pxor    m3, m3
    pxor    m4, m4
    mova    m5, [pw_00ff]
 .loopy:
    mov     r6, r4
-    neg     r6
 .loopx:
 %if mmsize == 32 ; only 16-byte alignment is guaranteed
    movu    m2, [r0+r6]
@ -686,21 +661,27 @@ cglobal pixel_ssd_nv12_core, 6,7
    add     r2, r3
    dec    r5d
    jg .loopy
-    mov     r3, r6m
-    mov     r4, r7m
-    HADDD   m3, m0
-    HADDD   m4, m0
-    pxor   xm0, xm0
-    punpckldq xm3, xm0
-    punpckldq xm4, xm0
-    movq  [r3], xm3
-    movq  [r4], xm4
+    mov     r0, r6m
+    mov     r1, r7m
+%if cpuflag(ssse3)
+    phaddd  m3, m4
+%else
+    SBUTTERFLY qdq, 3, 4, 0
+    paddd   m3, m4
+%endif
+%if mmsize == 32
+    vextracti128 xm4, m3, 1
+    paddd  xm3, xm4
+%endif
+    psllq  xm4, xm3, 32
+    paddd  xm3, xm4
+    psrlq  xm3, 32
+    movq  [r0], xm3
+    movhps [r1], xm3
    RET
 %endmacro ; SSD_NV12
 %endif ; !HIGH_BIT_DEPTH

-INIT_MMX mmx2
-SSD_NV12
 INIT_XMM sse2
 SSD_NV12
 INIT_XMM avx
@ -4614,67 +4595,82 @@ cglobal intra_sad_x9_8x8, 5,7,8
 ;-----------------------------------------------------------------------------
 %macro SSIM_ITER 1
 %if HIGH_BIT_DEPTH
-    movdqu    m5, [r0+(%1&1)*r1]
-    movdqu    m6, [r2+(%1&1)*r3]
+    movu      m4, [r0+(%1&1)*r1]
+    movu      m5, [r2+(%1&1)*r3]
+%elif cpuflag(avx)
+    pmovzxbw  m4, [r0+(%1&1)*r1]
+    pmovzxbw  m5, [r2+(%1&1)*r3]
 %else
-    movq      m5, [r0+(%1&1)*r1]
-    movq      m6, [r2+(%1&1)*r3]
-    punpcklbw m5, m0
-    punpcklbw m6, m0
+    movq      m4, [r0+(%1&1)*r1]
+    movq      m5, [r2+(%1&1)*r3]
+    punpcklbw m4, m7
+    punpcklbw m5, m7
 %endif
 %if %1==1
    lea       r0, [r0+r1*2]
    lea       r2, [r2+r3*2]
 %endif
-%if %1==0
-    movdqa    m1, m5
-    movdqa    m2, m6
+%if %1 == 0 && cpuflag(avx)
+    SWAP       0, 4
+    SWAP       1, 5
+    pmaddwd   m4, m0, m0
+    pmaddwd   m5, m1, m1
+    pmaddwd   m6, m0, m1
 %else
+%if %1 == 0
+    mova      m0, m4
+    mova      m1, m5
+%else
+    paddw     m0, m4
    paddw     m1, m5
-    paddw     m2, m6
 %endif
-    pmaddwd   m7, m5, m6
+    pmaddwd   m6, m4, m5
+    pmaddwd   m4, m4
    pmaddwd   m5, m5
-    pmaddwd   m6, m6
-    ACCUM  paddd, 3, 5, %1
-    ACCUM  paddd, 4, 7, %1
-    paddd     m3, m6
+%endif
+    ACCUM  paddd, 2, 4, %1
+    ACCUM  paddd, 3, 6, %1
+    paddd     m2, m5
 %endmacro

 %macro SSIM 0
-cglobal pixel_ssim_4x4x2_core, 4,4,8
+%if HIGH_BIT_DEPTH
+cglobal pixel_ssim_4x4x2_core, 4,4,7
    FIX_STRIDES r1, r3
-    pxor      m0, m0
+%else
+cglobal pixel_ssim_4x4x2_core, 4,4,7+notcpuflag(avx)
+%if notcpuflag(avx)
+    pxor      m7, m7
+%endif
+%endif
    SSIM_ITER 0
    SSIM_ITER 1
    SSIM_ITER 2
    SSIM_ITER 3
-    ; PHADDW m1, m2
-    ; PHADDD m3, m4
-    movdqa    m7, [pw_1]
-    pshufd    m5, m3, q2301
-    pmaddwd   m1, m7
-    pmaddwd   m2, m7
-    pshufd    m6, m4, q2301
-    packssdw  m1, m2
-    paddd     m3, m5
-    pshufd    m1, m1, q3120
-    paddd     m4, m6
-    pmaddwd   m1, m7
-    punpckhdq m5, m3, m4
-    punpckldq m3, m4
-
 %if UNIX64
-    %define t0 r4
+    DECLARE_REG_TMP 4
 %else
-    %define t0 rax
+    DECLARE_REG_TMP 0
    mov       t0, r4mp
 %endif
-
-    movq      [t0+ 0], m1
-    movq      [t0+ 8], m3
-    movhps    [t0+16], m1
-    movq      [t0+24], m5
+%if cpuflag(ssse3)
+    phaddw    m0, m1
+    pmaddwd   m0, [pw_1]
+    phaddd    m2, m3
+%else
+    mova      m4, [pw_1]
+    pmaddwd   m0, m4
+    pmaddwd   m1, m4
+    packssdw  m0, m1
+    shufps    m1, m2, m3, q2020
+    shufps    m2, m3, q3131
+    pmaddwd   m0, m4
+    paddd     m2, m1
+%endif
+    shufps    m1, m0, m2, q2020
+    shufps    m0, m2, q3131
+    mova    [t0], m1
+    mova [t0+16], m0
    RET

 ;-----------------------------------------------------------------------------
--- a/library/src/main/libenc/jni/libx264/common/x86/pixel.h
+++ b/library/src/main/libenc/jni/libx264/common/x86/pixel.h
@ -145,9 +145,6 @@ int x264_intra_sad_x9_8x8_sse4  ( uint8_t *, uint8_t *, uint8_t *, uint16_t *, u
 int x264_intra_sad_x9_8x8_avx   ( uint8_t *, uint8_t *, uint8_t *, uint16_t *, uint16_t * );
 int x264_intra_sad_x9_8x8_avx2  ( uint8_t *, uint8_t *, uint8_t *, uint16_t *, uint16_t * );

-void x264_pixel_ssd_nv12_core_mmx2( pixel *pixuv1, intptr_t stride1,
-                                    pixel *pixuv2, intptr_t stride2, int width,
-                                    int height, uint64_t *ssd_u, uint64_t *ssd_v );
 void x264_pixel_ssd_nv12_core_sse2( pixel *pixuv1, intptr_t stride1,
                                    pixel *pixuv2, intptr_t stride2, int width,
                                    int height, uint64_t *ssd_u, uint64_t *ssd_v );
--- a/library/src/main/libenc/jni/libx264/common/x86/predict-a.asm
+++ b/library/src/main/libenc/jni/libx264/common/x86/predict-a.asm
@ -2092,63 +2092,28 @@ PREDICT_16x16_H
 %endif

 ;-----------------------------------------------------------------------------
-; void predict_16x16_dc_core( pixel *src, int i_dc_left )
+; void predict_16x16_dc( pixel *src )
 ;-----------------------------------------------------------------------------
-%macro PRED16x16_DC_MMX 2
-%if HIGH_BIT_DEPTH
-    mova       m0, [r0 - FDEC_STRIDEB+ 0]
-    paddw      m0, [r0 - FDEC_STRIDEB+ 8]
-    paddw      m0, [r0 - FDEC_STRIDEB+16]
-    paddw      m0, [r0 - FDEC_STRIDEB+24]
-    HADDW      m0, m1
-    paddw      m0, %1
-    psrlw      m0, %2
-    SPLATW     m0, m0
-    STORE16    m0, m0, m0, m0
-%else ; !HIGH_BIT_DEPTH
-    pxor        m0, m0
-    pxor        m1, m1
-    psadbw      m0, [r0 - FDEC_STRIDE]
-    psadbw      m1, [r0 - FDEC_STRIDE + 8]
-    paddusw     m0, m1
-    paddusw     m0, %1
-    psrlw       m0, %2                      ; dc
-    pshufw      m0, m0, 0
-    packuswb    m0, m0                      ; dc in bytes
-    STORE16     m0, m0
-%endif
-%endmacro
-
-INIT_MMX mmx2
-cglobal predict_16x16_dc_core, 1,2
-%if ARCH_X86_64
-    movd         m6, r1d
-    PRED16x16_DC_MMX m6, 5
+%if WIN64
+DECLARE_REG_TMP 6 ; Reduces code size due to fewer REX prefixes
 %else
-    PRED16x16_DC_MMX r1m, 5
+DECLARE_REG_TMP 3
 %endif
-    RET

-INIT_MMX mmx2
-cglobal predict_16x16_dc_top, 1,2
-    PRED16x16_DC_MMX [pw_8], 4
-    RET
-
-INIT_MMX mmx2
-%if HIGH_BIT_DEPTH
-cglobal predict_16x16_dc_left_core, 1,2
-    movd       m0, r1m
-    SPLATW     m0, m0
-    STORE16    m0, m0, m0, m0
-    RET
-%else ; !HIGH_BIT_DEPTH
-cglobal predict_16x16_dc_left_core, 1,1
-    movd       m0, r1m
-    pshufw     m0, m0, 0
-    packuswb   m0, m0
-    STORE16    m0, m0
+INIT_XMM
+; Returns the sum of the left pixels in r1d+r2d
+cglobal predict_16x16_dc_left_internal, 0,4
+    movzx r1d, pixel [r0-SIZEOF_PIXEL]
+    movzx r2d, pixel [r0+FDEC_STRIDEB-SIZEOF_PIXEL]
+%assign i 2*FDEC_STRIDEB
+%rep 7
+    movzx t0d, pixel [r0+i-SIZEOF_PIXEL]
+    add   r1d, t0d
+    movzx t0d, pixel [r0+i+FDEC_STRIDEB-SIZEOF_PIXEL]
+    add   r2d, t0d
+%assign i i+2*FDEC_STRIDEB
+%endrep
    RET
-%endif

 %macro PRED16x16_DC 2
 %if HIGH_BIT_DEPTH
@ -2176,9 +2141,11 @@ cglobal predict_16x16_dc_left_core, 1,1
 %endif
 %endmacro

-%macro PREDICT_16x16_DC_CORE 0
-cglobal predict_16x16_dc_core, 2,2,4
-    movd         xm3, r1m
+%macro PREDICT_16x16_DC 0
+cglobal predict_16x16_dc, 1,3
+    call predict_16x16_dc_left_internal
+    lea          r1d, [r1+r2+16]
+    movd         xm3, r1d
    PRED16x16_DC xm3, 5
    RET

@ -2186,8 +2153,11 @@ cglobal predict_16x16_dc_top, 1,2
    PRED16x16_DC [pw_8], 4
    RET

-cglobal predict_16x16_dc_left_core, 1,2
-    movd      xm0, r1m
+cglobal predict_16x16_dc_left, 1,3
+    call predict_16x16_dc_left_internal
+    lea       r1d, [r1+r2+8]
+    shr       r1d, 4
+    movd      xm0, r1d
    SPLATW     m0, xm0
 %if HIGH_BIT_DEPTH && mmsize == 16
    STORE16    m0, m0
@ -2201,11 +2171,11 @@ cglobal predict_16x16_dc_left_core, 1,2
 %endmacro

 INIT_XMM sse2
-PREDICT_16x16_DC_CORE
+PREDICT_16x16_DC
 %if HIGH_BIT_DEPTH
 INIT_YMM avx2
-PREDICT_16x16_DC_CORE
+PREDICT_16x16_DC
 %else
 INIT_XMM avx2
-PREDICT_16x16_DC_CORE
+PREDICT_16x16_DC
 %endif
--- a/library/src/main/libenc/jni/libx264/common/x86/predict-c.c
+++ b/library/src/main/libenc/jni/libx264/common/x86/predict-c.c
@ -29,38 +29,6 @@
 #include "predict.h"
 #include "pixel.h"

-#define PREDICT_16x16_DC(name)\
-void x264_predict_16x16_dc_##name( pixel *src )\
-{\
-    uint32_t dc = 16;\
-    for( int i = 0; i < 16; i += 2 )\
-    {\
-        dc += src[-1 + i * FDEC_STRIDE];\
-        dc += src[-1 + (i+1) * FDEC_STRIDE];\
-    }\
-    x264_predict_16x16_dc_core_##name( src, dc );\
-}
-
-PREDICT_16x16_DC( mmx2 )
-PREDICT_16x16_DC( sse2 )
-PREDICT_16x16_DC( avx2 )
-
-#define PREDICT_16x16_DC_LEFT(name)\
-static void x264_predict_16x16_dc_left_##name( pixel *src )\
-{\
-    uint32_t dc = 8;\
-    for( int i = 0; i < 16; i += 2 )\
-    {\
-        dc += src[-1 + i * FDEC_STRIDE];\
-        dc += src[-1 + (i+1) * FDEC_STRIDE];\
-    }\
-    x264_predict_16x16_dc_left_core_##name( src, dc>>4 );\
-}
-
-PREDICT_16x16_DC_LEFT( mmx2 )
-PREDICT_16x16_DC_LEFT( sse2 )
-PREDICT_16x16_DC_LEFT( avx2 )
-
 #define PREDICT_P_SUM(j,i)\
    H += i * ( src[j+i - FDEC_STRIDE ]  - src[j-i - FDEC_STRIDE ] );\
    V += i * ( src[(j+i)*FDEC_STRIDE -1] - src[(j-i)*FDEC_STRIDE -1] );
@ -347,9 +315,6 @@ void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
 {
    if( !(cpu&X264_CPU_MMX2) )
        return;
-    pf[I_PRED_16x16_DC]      = x264_predict_16x16_dc_mmx2;
-    pf[I_PRED_16x16_DC_TOP]  = x264_predict_16x16_dc_top_mmx2;
-    pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_mmx2;
    pf[I_PRED_16x16_V]       = x264_predict_16x16_v_mmx2;
    pf[I_PRED_16x16_H]       = x264_predict_16x16_h_mmx2;
 #if HIGH_BIT_DEPTH
--- a/library/src/main/libenc/jni/libx264/common/x86/predict.h
+++ b/library/src/main/libenc/jni/libx264/common/x86/predict.h
@ -40,15 +40,10 @@ void x264_predict_16x16_h_mmx2( pixel *src );
 void x264_predict_16x16_h_sse2( uint16_t *src );
 void x264_predict_16x16_h_ssse3( uint8_t *src );
 void x264_predict_16x16_h_avx2( uint16_t *src );
-void x264_predict_16x16_dc_mmx2( pixel *src );
 void x264_predict_16x16_dc_sse2( pixel *src );
-void x264_predict_16x16_dc_core_mmx2( pixel *src, int i_dc_left );
-void x264_predict_16x16_dc_core_sse2( pixel *src, int i_dc_left );
-void x264_predict_16x16_dc_core_avx2( pixel *src, int i_dc_left );
-void x264_predict_16x16_dc_left_core_mmx2( pixel *src, int i_dc_left );
-void x264_predict_16x16_dc_left_core_sse2( pixel *src, int i_dc_left );
-void x264_predict_16x16_dc_left_core_avx2( pixel *src, int i_dc_left );
-void x264_predict_16x16_dc_top_mmx2( pixel *src );
+void x264_predict_16x16_dc_avx2( pixel *src );
+void x264_predict_16x16_dc_left_sse2( pixel *src );
+void x264_predict_16x16_dc_left_avx2( pixel *src );
 void x264_predict_16x16_dc_top_sse2( pixel *src );
 void x264_predict_16x16_dc_top_avx2( pixel *src );
 void x264_predict_16x16_p_core_mmx2( uint8_t *src, int i00, int b, int c );
--- a/library/src/main/libenc/jni/libx264/common/x86/trellis-64.asm
+++ b/library/src/main/libenc/jni/libx264/common/x86/trellis-64.asm
@ -53,14 +53,14 @@

 SECTION_RODATA

-pd_8: times 4 dd 8
 pd_m16: times 4 dd -16
-pd_0123: dd 0, 1, 2, 3
-pd_4567: dd 4, 5, 6, 7
 sq_1: dq 1, 0
 pq_128: times 2 dq 128
 pq_ffffffff: times 2 dq 0xffffffff

+cextern pd_8
+cextern pd_0123
+cextern pd_4567
 cextern cabac_entropy
 cextern cabac_transition
 cextern cabac_size_unary
--- a/library/src/main/libenc/jni/libx264/configure
+++ b/library/src/main/libenc/jni/libx264/configure
@ -997,7 +997,9 @@ fi
 if [ "$thread" = "posix" ]; then
    LDFLAGS="$LDFLAGS $libpthread"
    define HAVE_POSIXTHREAD
+    if [ "$SYS" = "LINUX" ] && cc_check sched.h "-D_GNU_SOURCE -Werror" "cpu_set_t p_aff; return CPU_COUNT(&p_aff);" ; then
        define HAVE_CPU_COUNT
+    fi
 fi
 [ "$thread" != "no" ] && define HAVE_THREAD

--- a/library/src/main/libenc/jni/libx264/encoder/cabac.c
+++ b/library/src/main/libenc/jni/libx264/encoder/cabac.c
@ -863,7 +863,7 @@ static void ALWAYS_INLINE x264_cabac_block_residual_internal( x264_t *h, x264_ca
        x264_cabac_encode_bypass( cb, 0 ); // sign
    }

-    for( int i = last-1 ; i >= 0; i-- )
+    for( int i = last-1; i >= 0; i-- )
    {
        if( l[i] )
        {
--- a/library/src/main/libenc/jni/libx264/encoder/macroblock.c
+++ b/library/src/main/libenc/jni/libx264/encoder/macroblock.c
@ -708,7 +708,7 @@ static ALWAYS_INLINE void x264_macroblock_encode_internal( x264_t *h, int plane_
        }
        for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp )
        {
-            for( int i = (p == 0 && h->mb.i_skip_intra) ? 3 : 0 ; i < 4; i++ )
+            for( int i = (p == 0 && h->mb.i_skip_intra) ? 3 : 0; i < 4; i++ )
            {
                int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
                x264_mb_encode_i8x8( h, p, i, i_qp, i_mode, NULL, 1 );
@ -733,7 +733,7 @@ static ALWAYS_INLINE void x264_macroblock_encode_internal( x264_t *h, int plane_
        }
        for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp )
        {
-            for( int i = (p == 0 && h->mb.i_skip_intra) ? 15 : 0 ; i < 16; i++ )
+            for( int i = (p == 0 && h->mb.i_skip_intra) ? 15 : 0; i < 16; i++ )
            {
                pixel *p_dst = &h->mb.pic.p_fdec[p][block_idx_xy_fdec[i]];
                int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
--- a/library/src/main/libenc/jni/libx264/encoder/ratecontrol.c
+++ b/library/src/main/libenc/jni/libx264/encoder/ratecontrol.c
@ -786,7 +786,7 @@ int x264_ratecontrol_new( x264_t *h )
        x264_reduce_fraction64( &num, &denom );
        rc->hrd_multiply_denom = 90000 / num;

-        double bits_required = log2( 90000 / rc->hrd_multiply_denom )
+        double bits_required = log2( num )
                             + log2( h->sps->vui.i_time_scale )
                             + log2( h->sps->vui.hrd.i_cpb_size_unscaled );
        if( bits_required >= 63 )
--- a/library/src/main/libenc/jni/libx264/extras/avisynth_c.h
+++ b/library/src/main/libenc/jni/libx264/extras/avisynth_c.h
@ -54,12 +54,12 @@
 #define AVSC_INLINE static __inline

 #ifdef AVISYNTH_C_EXPORTS
-#  define AVSC_EXPORT EXTERN_C
-#  define AVSC_API(ret, name) EXTERN_C __declspec(dllexport) ret AVSC_CC name
+#  define AVSC_EXPORT __declspec(dllexport)
+#  define AVSC_API(ret, name) EXTERN_C AVSC_EXPORT ret AVSC_CC name
 #else
-#  define AVSC_EXPORT EXTERN_C __declspec(dllexport)
+#  define AVSC_EXPORT __declspec(dllimport)
 #  ifndef AVSC_NO_DECLSPEC
-#    define AVSC_API(ret, name) EXTERN_C __declspec(dllimport) ret AVSC_CC name
+#    define AVSC_API(ret, name) EXTERN_C AVSC_EXPORT ret AVSC_CC name
 #  else
 #    define AVSC_API(ret, name) typedef ret (AVSC_CC *name##_func)
 #  endif
@ -78,8 +78,8 @@ typedef __int64 INT64;
 // Constants
 //

-#ifndef __AVISYNTH_H__
-enum { AVISYNTH_INTERFACE_VERSION = 4 };
+#ifndef __AVISYNTH_6_H__
+enum { AVISYNTH_INTERFACE_VERSION = 6 };
 #endif

 enum {AVS_SAMPLE_INT8  = 1<<0,
@ -111,8 +111,8 @@ enum {AVS_CS_BGR = 1<<28,
      AVS_CS_PLANAR = 1<<31,

      AVS_CS_SHIFT_SUB_WIDTH   = 0,
-      AVS_CS_SHIFT_SUB_HEIGHT  = 1 << 3,
-      AVS_CS_SHIFT_SAMPLE_BITS = 1 << 4,
+      AVS_CS_SHIFT_SUB_HEIGHT  = 8,
+      AVS_CS_SHIFT_SAMPLE_BITS = 16,

      AVS_CS_SUB_WIDTH_MASK    = 7 << AVS_CS_SHIFT_SUB_WIDTH,
      AVS_CS_SUB_WIDTH_1       = 3 << AVS_CS_SHIFT_SUB_WIDTH, // YV24
@ -179,12 +179,56 @@ enum {  //SUBTYPES
  AVS_FILTER_OUTPUT_TYPE_DIFFERENT=4};

 enum {
-  AVS_CACHE_NOTHING=0,
-  AVS_CACHE_RANGE=1,
-  AVS_CACHE_ALL=2,
-  AVS_CACHE_AUDIO=3,
-  AVS_CACHE_AUDIO_NONE=4,
-  AVS_CACHE_AUDIO_AUTO=5
+  // New 2.6 explicitly defined cache hints.
+  AVS_CACHE_NOTHING=10, // Do not cache video.
+  AVS_CACHE_WINDOW=11, // Hard protect upto X frames within a range of X from the current frame N.
+  AVS_CACHE_GENERIC=12, // LRU cache upto X frames.
+  AVS_CACHE_FORCE_GENERIC=13, // LRU cache upto X frames, override any previous CACHE_WINDOW.
+
+  AVS_CACHE_GET_POLICY=30, // Get the current policy.
+  AVS_CACHE_GET_WINDOW=31, // Get the current window h_span.
+  AVS_CACHE_GET_RANGE=32, // Get the current generic frame range.
+
+  AVS_CACHE_AUDIO=50, // Explicitly do cache audio, X byte cache.
+  AVS_CACHE_AUDIO_NOTHING=51, // Explicitly do not cache audio.
+  AVS_CACHE_AUDIO_NONE=52, // Audio cache off (auto mode), X byte intial cache.
+  AVS_CACHE_AUDIO_AUTO=53, // Audio cache on (auto mode), X byte intial cache.
+
+  AVS_CACHE_GET_AUDIO_POLICY=70, // Get the current audio policy.
+  AVS_CACHE_GET_AUDIO_SIZE=71, // Get the current audio cache size.
+
+  AVS_CACHE_PREFETCH_FRAME=100, // Queue request to prefetch frame N.
+  AVS_CACHE_PREFETCH_GO=101, // Action video prefetches.
+
+  AVS_CACHE_PREFETCH_AUDIO_BEGIN=120, // Begin queue request transaction to prefetch audio (take critical section).
+  AVS_CACHE_PREFETCH_AUDIO_STARTLO=121, // Set low 32 bits of start.
+  AVS_CACHE_PREFETCH_AUDIO_STARTHI=122, // Set high 32 bits of start.
+  AVS_CACHE_PREFETCH_AUDIO_COUNT=123, // Set low 32 bits of length.
+  AVS_CACHE_PREFETCH_AUDIO_COMMIT=124, // Enqueue request transaction to prefetch audio (release critical section).
+  AVS_CACHE_PREFETCH_AUDIO_GO=125, // Action audio prefetches.
+
+  AVS_CACHE_GETCHILD_CACHE_MODE=200, // Cache ask Child for desired video cache mode.
+  AVS_CACHE_GETCHILD_CACHE_SIZE=201, // Cache ask Child for desired video cache size.
+  AVS_CACHE_GETCHILD_AUDIO_MODE=202, // Cache ask Child for desired audio cache mode.
+  AVS_CACHE_GETCHILD_AUDIO_SIZE=203, // Cache ask Child for desired audio cache size.
+
+  AVS_CACHE_GETCHILD_COST=220, // Cache ask Child for estimated processing cost.
+    AVS_CACHE_COST_ZERO=221, // Child response of zero cost (ptr arithmetic only).
+    AVS_CACHE_COST_UNIT=222, // Child response of unit cost (less than or equal 1 full frame blit).
+    AVS_CACHE_COST_LOW=223, // Child response of light cost. (Fast)
+    AVS_CACHE_COST_MED=224, // Child response of medium cost. (Real time)
+    AVS_CACHE_COST_HI=225, // Child response of heavy cost. (Slow)
+
+  AVS_CACHE_GETCHILD_THREAD_MODE=240, // Cache ask Child for thread safetyness.
+    AVS_CACHE_THREAD_UNSAFE=241, // Only 1 thread allowed for all instances. 2.5 filters default!
+    AVS_CACHE_THREAD_CLASS=242, // Only 1 thread allowed for each instance. 2.6 filters default!
+    AVS_CACHE_THREAD_SAFE=243, //  Allow all threads in any instance.
+    AVS_CACHE_THREAD_OWN=244, // Safe but limit to 1 thread, internally threaded.
+
+  AVS_CACHE_GETCHILD_ACCESS_COST=260, // Cache ask Child for preferred access pattern.
+    AVS_CACHE_ACCESS_RAND=261, // Filter is access order agnostic.
+    AVS_CACHE_ACCESS_SEQ0=262, // Filter prefers sequential access (low cost)
+    AVS_CACHE_ACCESS_SEQ1=263, // Filter needs sequential access (high cost)
  };

 #define AVS_FRAME_ALIGN 16
@ -226,10 +270,10 @@ AVSC_INLINE int avs_is_rgb(const AVS_VideoInfo * p)
        { return !!(p->pixel_type&AVS_CS_BGR); }

 AVSC_INLINE int avs_is_rgb24(const AVS_VideoInfo * p)
-        { return (p->pixel_type&AVS_CS_BGR24)==AVS_CS_BGR24; } // Clear out additional properties
+        { return ((p->pixel_type&AVS_CS_BGR24)==AVS_CS_BGR24) && ((p->pixel_type & AVS_CS_SAMPLE_BITS_MASK) == AVS_CS_SAMPLE_BITS_8); }

 AVSC_INLINE int avs_is_rgb32(const AVS_VideoInfo * p)
-        { return (p->pixel_type & AVS_CS_BGR32) == AVS_CS_BGR32 ; }
+        { return ((p->pixel_type&AVS_CS_BGR32)==AVS_CS_BGR32) && ((p->pixel_type & AVS_CS_SAMPLE_BITS_MASK) == AVS_CS_SAMPLE_BITS_8); }

 AVSC_INLINE int avs_is_yuv(const AVS_VideoInfo * p)
        { return !!(p->pixel_type&AVS_CS_YUV ); }
@ -237,6 +281,17 @@ AVSC_INLINE int avs_is_yuv(const AVS_VideoInfo * p)
 AVSC_INLINE int avs_is_yuy2(const AVS_VideoInfo * p)
        { return (p->pixel_type & AVS_CS_YUY2) == AVS_CS_YUY2; }

+AVSC_API(int, avs_is_yv24)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_yv16)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_yv12)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_yv411)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_y8)(const AVS_VideoInfo * p);
+
+#ifdef AVSC_NO_DECLSPEC
 AVSC_INLINE int avs_is_yv24(const AVS_VideoInfo * p)
        { return (p->pixel_type & AVS_CS_PLANAR_MASK) == (AVS_CS_YV24  & AVS_CS_PLANAR_FILTER); }

@ -251,15 +306,57 @@ AVSC_INLINE int avs_is_yv411(const AVS_VideoInfo * p)

 AVSC_INLINE int avs_is_y8(const AVS_VideoInfo * p)
        { return (p->pixel_type & AVS_CS_PLANAR_MASK) == (AVS_CS_Y8    & AVS_CS_PLANAR_FILTER); }
+#endif
+
+#if 1 // AviSynth+ extension
+AVSC_API(int, avs_is_rgb48)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_rgb64)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_yuv444p16)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_yuv422p16)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_yuv420p16)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_y16)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_yuv444ps)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_yuv422ps)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_yuv420ps)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_y32)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_444)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_422)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_420)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_y)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_yuva)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_planar_rgb)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_is_planar_rgba)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_num_components)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_component_size)(const AVS_VideoInfo * p);
+
+AVSC_API(int, avs_bits_per_component)(const AVS_VideoInfo * p);
+#endif

 AVSC_INLINE int avs_is_property(const AVS_VideoInfo * p, int property)
-        { return ((p->pixel_type & property)==property ); }
+        { return ((p->image_type & property)==property ); }

 AVSC_INLINE int avs_is_planar(const AVS_VideoInfo * p)
        { return !!(p->pixel_type & AVS_CS_PLANAR); }

-AVSC_INLINE int avs_is_color_space(const AVS_VideoInfo * p, int c_space)
-        { return avs_is_planar(p) ? ((p->pixel_type & AVS_CS_PLANAR_MASK) == (c_space & AVS_CS_PLANAR_FILTER)) : ((p->pixel_type & c_space) == c_space); }
+AVSC_API(int, avs_is_color_space)(const AVS_VideoInfo * p, int c_space);

 AVSC_INLINE int avs_is_field_based(const AVS_VideoInfo * p)
        { return !!(p->image_type & AVS_IT_FIELDBASED); }
@ -273,25 +370,23 @@ AVSC_INLINE int avs_is_bff(const AVS_VideoInfo * p)
 AVSC_INLINE int avs_is_tff(const AVS_VideoInfo * p)
        { return !!(p->image_type & AVS_IT_TFF); }

-AVSC_INLINE int avs_bits_per_pixel(const AVS_VideoInfo * p) 
-{ 
-  switch (p->pixel_type) {
-      case AVS_CS_BGR24: return 24;
-      case AVS_CS_BGR32: return 32;
-      case AVS_CS_YUY2:  return 16;
-      case AVS_CS_YV12:
-      case AVS_CS_I420:  return 12;
-      default:           return 0;
-    }
-}
-AVSC_INLINE int avs_bytes_from_pixels(const AVS_VideoInfo * p, int pixels) 
-        { return pixels * (avs_bits_per_pixel(p)>>3); }   // Will work on planar images, but will return only luma planes
+AVSC_API(int, avs_get_plane_width_subsampling)(const AVS_VideoInfo * p, int plane);
+
+AVSC_API(int, avs_get_plane_height_subsampling)(const AVS_VideoInfo * p, int plane);
+
+
+AVSC_API(int, avs_bits_per_pixel)(const AVS_VideoInfo * p);

+AVSC_API(int, avs_bytes_from_pixels)(const AVS_VideoInfo * p, int pixels);
+
+AVSC_API(int, avs_row_size_p)(const AVS_VideoInfo * p, int plane);
+
+#ifndef AVSC_NO_DECLSPEC
 AVSC_INLINE int avs_row_size(const AVS_VideoInfo * p)
-        { return avs_bytes_from_pixels(p,p->width); }  // Also only returns first plane on planar images
+        { return avs_row_size_p(p, 0); }
+#endif

-AVSC_INLINE int avs_bmp_size(const AVS_VideoInfo * vi)                
-        { if (avs_is_planar(vi)) {int p = vi->height * ((avs_row_size(vi)+3) & ~3); p+=p>>1; return p;  } return vi->height * ((avs_row_size(vi)+3) & ~3); }
+AVSC_API(int, avs_bmp_size)(const AVS_VideoInfo * vi);

 AVSC_INLINE int avs_samples_per_second(const AVS_VideoInfo * p)
        { return p->audio_samples_per_second; }
@ -390,89 +485,55 @@ typedef struct AVS_VideoFrame {
 } AVS_VideoFrame;

 // Access functions for AVS_VideoFrame
-AVSC_INLINE int avs_get_pitch(const AVS_VideoFrame * p) {
-        return p->pitch;}
+AVSC_API(int, avs_get_pitch_p)(const AVS_VideoFrame * p, int plane);

+#ifdef AVSC_NO_DECLSPEC
 AVSC_INLINE int avs_get_pitch_p(const AVS_VideoFrame * p, int plane) {
        switch (plane) {
-  case AVS_PLANAR_U: case AVS_PLANAR_V: return p->pitchUV;}
-  return p->pitch;}
+          case AVS_PLANAR_U:
+          case AVS_PLANAR_V:
+            return p->pitchUV;
+        }
+        return p->pitch;
+}
+#endif
+
+AVSC_INLINE int avs_get_pitch(const AVS_VideoFrame * p) {
+        return avs_get_pitch_p(p, 0);}
+
+AVSC_API(int, avs_get_row_size_p)(const AVS_VideoFrame * p, int plane);

 AVSC_INLINE int avs_get_row_size(const AVS_VideoFrame * p) {
        return p->row_size; }

-AVSC_INLINE int avs_get_row_size_p(const AVS_VideoFrame * p, int plane) { 
-        int r;
-    switch (plane) {
-    case AVS_PLANAR_U: case AVS_PLANAR_V: 
-                if (p->pitchUV) return p->row_sizeUV; 
-                else            return 0;
-    case AVS_PLANAR_U_ALIGNED: case AVS_PLANAR_V_ALIGNED: 
-                if (p->pitchUV) { 
-                        r = (p->row_sizeUV+AVS_FRAME_ALIGN-1)&(~(AVS_FRAME_ALIGN-1)); // Aligned rowsize
-                        if (r < p->pitchUV) 
-                                return r; 
-                        return p->row_sizeUV;
-                } else return 0;
-    case AVS_PLANAR_Y_ALIGNED:
-                r = (p->row_size+AVS_FRAME_ALIGN-1)&(~(AVS_FRAME_ALIGN-1)); // Aligned rowsize
-                if (r <= p->pitch) 
-                        return r; 
-                return p->row_size;
-    }
-    return p->row_size;
-}
+AVSC_API(int, avs_get_height_p)(const AVS_VideoFrame * p, int plane);

 AVSC_INLINE int avs_get_height(const AVS_VideoFrame * p) {
        return p->height;}

-AVSC_INLINE int avs_get_height_p(const AVS_VideoFrame * p, int plane) {
-        switch (plane) {
-                case AVS_PLANAR_U: case AVS_PLANAR_V: 
-                        if (p->pitchUV) return p->heightUV;
-                        return 0;
-        }
-        return p->height;}
-
-AVSC_INLINE const BYTE* avs_get_read_ptr(const AVS_VideoFrame * p) {
-        return p->vfb->data + p->offset;}
+AVSC_API(const BYTE *, avs_get_read_ptr_p)(const AVS_VideoFrame * p, int plane);

-AVSC_INLINE const BYTE* avs_get_read_ptr_p(const AVS_VideoFrame * p, int plane) 
-{
+#ifdef AVSC_NO_DECLSPEC
+AVSC_INLINE const BYTE* avs_get_read_ptr_p(const AVS_VideoFrame * p, int plane) {
        switch (plane) {
          case AVS_PLANAR_U: return p->vfb->data + p->offsetU;
          case AVS_PLANAR_V: return p->vfb->data + p->offsetV;
-                default:           return p->vfb->data + p->offset;}
+          default:           return p->vfb->data + p->offset;
+        }
 }
+#endif

-AVSC_INLINE int avs_is_writable(const AVS_VideoFrame * p) {
-        return (p->refcount == 1 && p->vfb->refcount == 1);}
+AVSC_INLINE const BYTE* avs_get_read_ptr(const AVS_VideoFrame * p) {
+        return avs_get_read_ptr_p(p, 0);}

-AVSC_INLINE BYTE* avs_get_write_ptr(const AVS_VideoFrame * p) 
-{
-        if (avs_is_writable(p)) {
-                ++p->vfb->sequence_number;
-                return p->vfb->data + p->offset;
-        } else
-                return 0;
-}
+AVSC_API(int, avs_is_writable)(const AVS_VideoFrame * p);

-AVSC_INLINE BYTE* avs_get_write_ptr_p(const AVS_VideoFrame * p, int plane) 
-{
-        if (plane==AVS_PLANAR_Y && avs_is_writable(p)) {
-                ++p->vfb->sequence_number;
-                return p->vfb->data + p->offset;
-        } else if (plane==AVS_PLANAR_Y) {
-                return 0;
-        } else {
-                switch (plane) {
-                        case AVS_PLANAR_U: return p->vfb->data + p->offsetU;
-                        case AVS_PLANAR_V: return p->vfb->data + p->offsetV;
-                        default:       return p->vfb->data + p->offset;
-                }
-        }
-}
+AVSC_API(BYTE *, avs_get_write_ptr_p)(const AVS_VideoFrame * p, int plane);

+#ifndef AVSC_NO_DECLSPEC
+AVSC_INLINE BYTE* avs_get_write_ptr(const AVS_VideoFrame * p) {
+        return avs_get_write_ptr_p(p, 0);}
+#endif

 AVSC_API(void, avs_release_video_frame)(AVS_VideoFrame *);
 // makes a shallow copy of a video frame
@ -569,7 +630,7 @@ AVSC_INLINE AVS_Value avs_new_value_clip(AVS_Clip * v0)
        { AVS_Value v; avs_set_to_clip(&v, v0); return v; }
 #endif
 AVSC_INLINE AVS_Value avs_new_value_array(AVS_Value * v0, int size)
-        { AVS_Value v; v.type = 'a'; v.d.array = v0; v.array_size = size; return v; }
+        { AVS_Value v; v.type = 'a'; v.d.array = v0; v.array_size = (short)size; return v; }

 /////////////////////////////////////////////////////////////////////
 //
@ -657,9 +718,13 @@ enum {
  AVS_CPUF_SSSE3      = 0x200,   //  Core 2
  AVS_CPUF_SSE4       = 0x400,   //  Penryn, Wolfdale, Yorkfield
  AVS_CPUF_SSE4_1     = 0x400,
-  AVS_CPUF_SSE4_2     = 0x800,   //  Nehalem
+//AVS_CPUF_AVX        = 0x800,   //  Sandy Bridge, Bulldozer
+  AVS_CPUF_SSE4_2    = 0x1000,   //  Nehalem
+//AVS_CPUF_AVX2      = 0x2000,   //  Haswell
+//AVS_CPUF_AVX512    = 0x4000,   //  Knights Landing
 };

+
 AVSC_API(const char *, avs_get_error)(AVS_ScriptEnvironment *); // return 0 if no error

 AVSC_API(long, avs_get_cpu_flags)(AVS_ScriptEnvironment *);
@ -772,7 +837,6 @@ struct AVS_Library {
  AVSC_DECLARE_FUNC(avs_function_exists);
  AVSC_DECLARE_FUNC(avs_get_audio);
  AVSC_DECLARE_FUNC(avs_get_cpu_flags);
-  AVSC_DECLARE_FUNC(avs_get_error);
  AVSC_DECLARE_FUNC(avs_get_frame);
  AVSC_DECLARE_FUNC(avs_get_parity);
  AVSC_DECLARE_FUNC(avs_get_var);
@ -797,6 +861,27 @@ struct AVS_Library {
  AVSC_DECLARE_FUNC(avs_subframe_planar);
  AVSC_DECLARE_FUNC(avs_take_clip);
  AVSC_DECLARE_FUNC(avs_vsprintf);
+
+  AVSC_DECLARE_FUNC(avs_get_error);
+  AVSC_DECLARE_FUNC(avs_is_yv24);
+  AVSC_DECLARE_FUNC(avs_is_yv16);
+  AVSC_DECLARE_FUNC(avs_is_yv12);
+  AVSC_DECLARE_FUNC(avs_is_yv411);
+  AVSC_DECLARE_FUNC(avs_is_y8);
+  AVSC_DECLARE_FUNC(avs_is_color_space);
+
+  AVSC_DECLARE_FUNC(avs_get_plane_width_subsampling);
+  AVSC_DECLARE_FUNC(avs_get_plane_height_subsampling);
+  AVSC_DECLARE_FUNC(avs_bits_per_pixel);
+  AVSC_DECLARE_FUNC(avs_bytes_from_pixels);
+  AVSC_DECLARE_FUNC(avs_row_size_p);
+  AVSC_DECLARE_FUNC(avs_bmp_size);
+  AVSC_DECLARE_FUNC(avs_get_pitch_p);
+  AVSC_DECLARE_FUNC(avs_get_row_size_p);
+  AVSC_DECLARE_FUNC(avs_get_height_p);
+  AVSC_DECLARE_FUNC(avs_get_read_ptr_p);
+  AVSC_DECLARE_FUNC(avs_is_writable);
+  AVSC_DECLARE_FUNC(avs_get_write_ptr_p);
 };

 #undef AVSC_DECLARE_FUNC
@ -831,7 +916,6 @@ AVSC_INLINE AVS_Library * avs_load_library() {
  AVSC_LOAD_FUNC(avs_function_exists);
  AVSC_LOAD_FUNC(avs_get_audio);
  AVSC_LOAD_FUNC(avs_get_cpu_flags);
-  AVSC_LOAD_FUNC(avs_get_error);
  AVSC_LOAD_FUNC(avs_get_frame);
  AVSC_LOAD_FUNC(avs_get_parity);
  AVSC_LOAD_FUNC(avs_get_var);
@ -857,6 +941,27 @@ AVSC_INLINE AVS_Library * avs_load_library() {
  AVSC_LOAD_FUNC(avs_take_clip);
  AVSC_LOAD_FUNC(avs_vsprintf);

+  AVSC_LOAD_FUNC(avs_get_error);
+  AVSC_LOAD_FUNC(avs_is_yv24);
+  AVSC_LOAD_FUNC(avs_is_yv16);
+  AVSC_LOAD_FUNC(avs_is_yv12);
+  AVSC_LOAD_FUNC(avs_is_yv411);
+  AVSC_LOAD_FUNC(avs_is_y8);
+  AVSC_LOAD_FUNC(avs_is_color_space);
+
+  AVSC_LOAD_FUNC(avs_get_plane_width_subsampling);
+  AVSC_LOAD_FUNC(avs_get_plane_height_subsampling);
+  AVSC_LOAD_FUNC(avs_bits_per_pixel);
+  AVSC_LOAD_FUNC(avs_bytes_from_pixels);
+  AVSC_LOAD_FUNC(avs_row_size_p);
+  AVSC_LOAD_FUNC(avs_bmp_size);
+  AVSC_LOAD_FUNC(avs_get_pitch_p);
+  AVSC_LOAD_FUNC(avs_get_row_size_p);
+  AVSC_LOAD_FUNC(avs_get_height_p);
+  AVSC_LOAD_FUNC(avs_get_read_ptr_p);
+  AVSC_LOAD_FUNC(avs_is_writable);
+  AVSC_LOAD_FUNC(avs_get_write_ptr_p);
+
 #undef __AVSC_STRINGIFY
 #undef AVSC_STRINGIFY
 #undef AVSC_LOAD_FUNC
--- a/library/src/main/libenc/jni/libx264/filters/filters.c
+++ b/library/src/main/libenc/jni/libx264/filters/filters.c
@ -48,14 +48,14 @@ char **x264_split_options( const char *opt_str, const char * const *options )
            while( *option && (strlen( *option ) != length || strncmp( opt, *option, length )) )
                option++;

-            RETURN_IF_ERROR( !*option, "Invalid option '%.*s'\n", length, opt )
+            RETURN_IF_ERROR( !*option, "Invalid option '%.*s'\n", length, opt );
            found_named = 1;
            length += strcspn( opt + length, "," );
        }
        else
        {
-            RETURN_IF_ERROR( opt_count >= options_count, "Too many options given\n" )
-            RETURN_IF_ERROR( found_named, "Ordered option given after named\n" )
+            RETURN_IF_ERROR( opt_count >= options_count, "Too many options given\n" );
+            RETURN_IF_ERROR( found_named, "Ordered option given after named\n" );
            size += strlen( options[opt_count] ) + 1;
        }
        opt_count++;
@ -65,7 +65,7 @@ char **x264_split_options( const char *opt_str, const char * const *options )
    int offset = 2 * (opt_count+1) * sizeof(char*);
    size += offset + (opt - opt_str);
    char **opts = calloc( 1, size );
-    RETURN_IF_ERROR( !opts, "malloc failed\n" )
+    RETURN_IF_ERROR( !opts, "malloc failed\n" );

 #define insert_opt( src, length )\
 do {\
--- a/library/src/main/libenc/jni/libx264/filters/video/crop.c
+++ b/library/src/main/libenc/jni/libx264/filters/video/crop.c
@ -52,18 +52,18 @@ static int handle_opts( crop_hnd_t *h, video_info_t *info, char **opts, const ch
    for( int i = 0; i < 4; i++ )
    {
        char *opt = x264_get_option( optlist[i], opts );
-        FAIL_IF_ERROR( !opt, "%s crop value not specified\n", optlist[i] )
+        FAIL_IF_ERROR( !opt, "%s crop value not specified\n", optlist[i] );
        h->dims[i] = x264_otoi( opt, -1 );
-        FAIL_IF_ERROR( h->dims[i] < 0, "%s crop value `%s' is less than 0\n", optlist[i], opt )
+        FAIL_IF_ERROR( h->dims[i] < 0, "%s crop value `%s' is less than 0\n", optlist[i], opt );
        int dim_mod = i&1 ? (h->csp->mod_height << info->interlaced) : h->csp->mod_width;
-        FAIL_IF_ERROR( h->dims[i] % dim_mod, "%s crop value `%s' is not a multiple of %d\n", optlist[i], opt, dim_mod )
+        FAIL_IF_ERROR( h->dims[i] % dim_mod, "%s crop value `%s' is not a multiple of %d\n", optlist[i], opt, dim_mod );
    }
    return 0;
 }

 static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info, x264_param_t *param, char *opt_string )
 {
-    FAIL_IF_ERROR( x264_cli_csp_is_invalid( info->csp ), "invalid csp %d\n", info->csp )
+    FAIL_IF_ERROR( x264_cli_csp_is_invalid( info->csp ), "invalid csp %d\n", info->csp );
    crop_hnd_t *h = calloc( 1, sizeof(crop_hnd_t) );
    if( !h )
        return -1;
@ -81,7 +81,7 @@ static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info, x2

    h->dims[2] = info->width  - h->dims[0] - h->dims[2];
    h->dims[3] = info->height - h->dims[1] - h->dims[3];
-    FAIL_IF_ERROR( h->dims[2] <= 0 || h->dims[3] <= 0, "invalid output resolution %dx%d\n", h->dims[2], h->dims[3] )
+    FAIL_IF_ERROR( h->dims[2] <= 0 || h->dims[3] <= 0, "invalid output resolution %dx%d\n", h->dims[2], h->dims[3] );

    if( info->width != h->dims[2] || info->height != h->dims[3] )
        x264_cli_log( NAME, X264_LOG_INFO, "cropping to %dx%d\n", h->dims[2], h->dims[3] );
--- a/library/src/main/libenc/jni/libx264/filters/video/depth.c
+++ b/library/src/main/libenc/jni/libx264/filters/video/depth.c
@ -217,13 +217,13 @@ static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info,
            ret = 1;
    }

-    FAIL_IF_ERROR( bit_depth != X264_BIT_DEPTH, "this build supports only bit depth %d\n", X264_BIT_DEPTH )
-    FAIL_IF_ERROR( ret, "unsupported bit depth conversion.\n" )
+    FAIL_IF_ERROR( bit_depth != X264_BIT_DEPTH, "this build supports only bit depth %d\n", X264_BIT_DEPTH );
+    FAIL_IF_ERROR( ret, "unsupported bit depth conversion.\n" );

    /* only add the filter to the chain if it's needed */
    if( change_fmt || bit_depth != 8 * x264_cli_csp_depth_factor( csp ) )
    {
-        FAIL_IF_ERROR( !depth_filter_csp_is_supported(csp), "unsupported colorspace.\n" )
+        FAIL_IF_ERROR( !depth_filter_csp_is_supported(csp), "unsupported colorspace.\n" );
        depth_hnd_t *h = x264_malloc( sizeof(depth_hnd_t) + (info->width+1)*sizeof(int16_t) );

        if( !h )
--- a/library/src/main/libenc/jni/libx264/filters/video/internal.c
+++ b/library/src/main/libenc/jni/libx264/filters/video/internal.c
@ -39,7 +39,7 @@ void x264_cli_plane_copy( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int
 int x264_cli_pic_copy( cli_pic_t *out, cli_pic_t *in )
 {
    int csp = in->img.csp & X264_CSP_MASK;
-    FAIL_IF_ERROR( x264_cli_csp_is_invalid( in->img.csp ), "invalid colorspace arg %d\n", in->img.csp )
+    FAIL_IF_ERROR( x264_cli_csp_is_invalid( in->img.csp ), "invalid colorspace arg %d\n", in->img.csp );
    FAIL_IF_ERROR( in->img.csp != out->img.csp || in->img.height != out->img.height
                || in->img.width != out->img.width, "incompatible frame properties\n" );
    /* copy data */
--- a/library/src/main/libenc/jni/libx264/filters/video/resize.c
+++ b/library/src/main/libenc/jni/libx264/filters/video/resize.c
@ -265,7 +265,7 @@ static int handle_opts( const char * const *optlist, char **opts, video_info_t *
    {
        FAIL_IF_ERROR( 2 != sscanf( str_sar, "%u:%u", &out_sar_w, &out_sar_h ) &&
                       2 != sscanf( str_sar, "%u/%u", &out_sar_w, &out_sar_h ),
-                       "invalid sar `%s'\n", str_sar )
+                       "invalid sar `%s'\n", str_sar );
    }
    else
        out_sar_w = out_sar_h = 1;
@ -275,19 +275,19 @@ static int handle_opts( const char * const *optlist, char **opts, video_info_t *
        if( !strcasecmp( fittobox, "both" ) )
        {
            FAIL_IF_ERROR( width <= 0 || height <= 0, "invalid box resolution %sx%s\n",
-                           x264_otos( str_width, "<unset>" ), x264_otos( str_height, "<unset>" ) )
+                           x264_otos( str_width, "<unset>" ), x264_otos( str_height, "<unset>" ) );
        }
        else if( !strcasecmp( fittobox, "width" ) )
        {
-            FAIL_IF_ERROR( width <= 0, "invalid box width `%s'\n", x264_otos( str_width, "<unset>" ) )
+            FAIL_IF_ERROR( width <= 0, "invalid box width `%s'\n", x264_otos( str_width, "<unset>" ) );
            height = INT_MAX;
        }
        else if( !strcasecmp( fittobox, "height" ) )
        {
-            FAIL_IF_ERROR( height <= 0, "invalid box height `%s'\n", x264_otos( str_height, "<unset>" ) )
+            FAIL_IF_ERROR( height <= 0, "invalid box height `%s'\n", x264_otos( str_height, "<unset>" ) );
            width = INT_MAX;
        }
-        else FAIL_IF_ERROR( 1, "invalid fittobox mode `%s'\n", fittobox )
+        else FAIL_IF_ERROR( 1, "invalid fittobox mode `%s'\n", fittobox );

        /* maximally fit the new coded resolution to the box */
        const x264_cli_csp_t *csp = x264_cli_get_csp( h->dst_csp );
@ -313,7 +313,7 @@ static int handle_opts( const char * const *optlist, char **opts, video_info_t *
        if( str_width || str_height )
        {
            FAIL_IF_ERROR( width <= 0 || height <= 0, "invalid resolution %sx%s\n",
-                           x264_otos( str_width, "<unset>" ), x264_otos( str_height, "<unset>" ) )
+                           x264_otos( str_width, "<unset>" ), x264_otos( str_height, "<unset>" ) );
            if( !str_sar ) /* res only -> adjust sar */
            {
                /* new_sar = (new_h * old_w * old_sar_w) / (old_h * new_w * old_sar_h) */
@ -403,7 +403,7 @@ static int check_resizer( resizer_hnd_t *h, cli_pic_t *in )
            return -1;
        h->buffer_allocated = 1;
    }
-    FAIL_IF_ERROR( x264_init_sws_context( h ), "swscale init failed\n" )
+    FAIL_IF_ERROR( x264_init_sws_context( h ), "swscale init failed\n" );
    return 0;
 }

@ -440,7 +440,7 @@ static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info, x2
            h->variable_input = 1;
            h->dst_csp = pick_closest_supported_csp( info->csp );
            FAIL_IF_ERROR( h->dst_csp == X264_CSP_NONE,
-                           "filter get invalid input pixel format %d (colorspace %d)\n", convert_csp_to_pix_fmt( info->csp ), info->csp )
+                           "filter get invalid input pixel format %d (colorspace %d)\n", convert_csp_to_pix_fmt( info->csp ), info->csp );
        }
        else
        {
@ -479,17 +479,17 @@ static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info, x2
    FAIL_IF_ERROR( src_pix_fmt == AV_PIX_FMT_NONE && src_pix_fmt_inv != AV_PIX_FMT_NONE,
                   "input colorspace %s with bit depth %d is not supported\n", av_get_pix_fmt_name( src_pix_fmt_inv ),
                   info->csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
-    FAIL_IF_ERROR( !sws_isSupportedInput( src_pix_fmt ), "input colorspace %s is not supported\n", av_get_pix_fmt_name( src_pix_fmt ) )
+    FAIL_IF_ERROR( !sws_isSupportedInput( src_pix_fmt ), "input colorspace %s is not supported\n", av_get_pix_fmt_name( src_pix_fmt ) );
    FAIL_IF_ERROR( h->dst.pix_fmt == AV_PIX_FMT_NONE && dst_pix_fmt_inv != AV_PIX_FMT_NONE,
                   "input colorspace %s with bit depth %d is not supported\n", av_get_pix_fmt_name( dst_pix_fmt_inv ),
                   h->dst_csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
-    FAIL_IF_ERROR( !sws_isSupportedOutput( h->dst.pix_fmt ), "output colorspace %s is not supported\n", av_get_pix_fmt_name( h->dst.pix_fmt ) )
+    FAIL_IF_ERROR( !sws_isSupportedOutput( h->dst.pix_fmt ), "output colorspace %s is not supported\n", av_get_pix_fmt_name( h->dst.pix_fmt ) );
    FAIL_IF_ERROR( h->dst.height != info->height && info->interlaced,
-                   "swscale is not compatible with interlaced vertical resizing\n" )
+                   "swscale is not compatible with interlaced vertical resizing\n" );
    /* confirm that the desired resolution meets the colorspace requirements */
    const x264_cli_csp_t *csp = x264_cli_get_csp( h->dst_csp );
    FAIL_IF_ERROR( h->dst.width % csp->mod_width || h->dst.height % csp->mod_height,
-                   "resolution %dx%d is not compliant with colorspace %s\n", h->dst.width, h->dst.height, csp->name )
+                   "resolution %dx%d is not compliant with colorspace %s\n", h->dst.width, h->dst.height, csp->name );

    if( h->dst.width != info->width || h->dst.height != info->height )
        x264_cli_log( NAME, X264_LOG_INFO, "resizing to %dx%d\n", h->dst.width, h->dst.height );
@ -580,7 +580,7 @@ static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info, x2
    }

    /* pass if nothing needs to be done, otherwise fail */
-    FAIL_IF_ERROR( ret, "not compiled with swscale support\n" )
+    FAIL_IF_ERROR( ret, "not compiled with swscale support\n" );
    return 0;
 }

--- a/library/src/main/libenc/jni/libx264/filters/video/select_every.c
+++ b/library/src/main/libenc/jni/libx264/filters/video/select_every.c
@ -67,16 +67,16 @@ static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info, x2
        int val = x264_otoi( tok, -1 );
        if( p )
        {
-            FAIL_IF_ERROR( val <= 0, "invalid step `%s'\n", tok )
+            FAIL_IF_ERROR( val <= 0, "invalid step `%s'\n", tok );
            h->step_size = val;
            continue;
        }
-        FAIL_IF_ERROR( val < 0 || val >= h->step_size, "invalid offset `%s'\n", tok )
-        FAIL_IF_ERROR( h->pattern_len >= MAX_PATTERN_SIZE, "max pattern size %d reached\n", MAX_PATTERN_SIZE )
+        FAIL_IF_ERROR( val < 0 || val >= h->step_size, "invalid offset `%s'\n", tok );
+        FAIL_IF_ERROR( h->pattern_len >= MAX_PATTERN_SIZE, "max pattern size %d reached\n", MAX_PATTERN_SIZE );
        offsets[h->pattern_len++] = val;
    }
-    FAIL_IF_ERROR( !h->step_size, "no step size provided\n" )
-    FAIL_IF_ERROR( !h->pattern_len, "no offsets supplied\n" )
+    FAIL_IF_ERROR( !h->step_size, "no step size provided\n" );
+    FAIL_IF_ERROR( !h->pattern_len, "no offsets supplied\n" );

    h->pattern = malloc( h->pattern_len * sizeof(int) );
    if( !h->pattern )
--- a/library/src/main/libenc/jni/libx264/input/avs.c
+++ b/library/src/main/libenc/jni/libx264/input/avs.c
@ -4,6 +4,7 @@
 * Copyright (C) 2009-2016 x264 project
 *
 * Authors: Steven Walters <kemuri9@gmail.com>
+ *          Anton Mitrofanov <BugMaster@narod.ru>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@ -34,7 +35,6 @@
 #define avs_close dlclose
 #define avs_address dlsym
 #else
-#include <windows.h>
 #define avs_open() LoadLibraryW( L"avisynth" )
 #define avs_close FreeLibrary
 #define avs_address GetProcAddress
@ -58,14 +58,6 @@
 #include <libavutil/pixfmt.h>
 #endif

-/* AvxSynth doesn't have yv24, yv16, yv411, or y8, so disable them. */
-#if USE_AVXSYNTH
-#define avs_is_yv24( vi ) 0
-#define avs_is_yv16( vi ) 0
-#define avs_is_yv411( vi ) 0
-#define avs_is_y8( vi ) 0
-#endif
-
 /* maximum size of the sequence of filters to try on non script files */
 #define AVS_MAX_SEQUENCE 5

@ -76,6 +68,14 @@
        goto fail;\
 }

+#define LOAD_AVS_FUNC_ALIAS(name, alias, continue_on_fail)\
+{\
+    if( !h->func.name )\
+        h->func.name = (void*)avs_address( h->library, alias );\
+    if( !continue_on_fail && !h->func.name )\
+        goto fail;\
+}
+
 typedef struct
 {
    AVS_Clip *clip;
@ -96,6 +96,29 @@ typedef struct
        AVSC_DECLARE_FUNC( avs_release_value );
        AVSC_DECLARE_FUNC( avs_release_video_frame );
        AVSC_DECLARE_FUNC( avs_take_clip );
+#if !USE_AVXSYNTH
+        // AviSynth+ extension
+        AVSC_DECLARE_FUNC( avs_is_rgb48 );
+        AVSC_DECLARE_FUNC( avs_is_rgb64 );
+        AVSC_DECLARE_FUNC( avs_is_yuv444p16 );
+        AVSC_DECLARE_FUNC( avs_is_yuv422p16 );
+        AVSC_DECLARE_FUNC( avs_is_yuv420p16 );
+        AVSC_DECLARE_FUNC( avs_is_y16 );
+        AVSC_DECLARE_FUNC( avs_is_yuv444ps );
+        AVSC_DECLARE_FUNC( avs_is_yuv422ps );
+        AVSC_DECLARE_FUNC( avs_is_yuv420ps );
+        AVSC_DECLARE_FUNC( avs_is_y32 );
+        AVSC_DECLARE_FUNC( avs_is_444 );
+        AVSC_DECLARE_FUNC( avs_is_422 );
+        AVSC_DECLARE_FUNC( avs_is_420 );
+        AVSC_DECLARE_FUNC( avs_is_y );
+        AVSC_DECLARE_FUNC( avs_is_yuva );
+        AVSC_DECLARE_FUNC( avs_is_planar_rgb );
+        AVSC_DECLARE_FUNC( avs_is_planar_rgba );
+        AVSC_DECLARE_FUNC( avs_num_components );
+        AVSC_DECLARE_FUNC( avs_component_size );
+        AVSC_DECLARE_FUNC( avs_bits_per_component );
+#endif
    } func;
 } avs_hnd_t;

@ -117,12 +140,66 @@ static int x264_avs_load_library( avs_hnd_t *h )
    LOAD_AVS_FUNC( avs_release_value, 0 );
    LOAD_AVS_FUNC( avs_release_video_frame, 0 );
    LOAD_AVS_FUNC( avs_take_clip, 0 );
+#if !USE_AVXSYNTH
+    // AviSynth+ extension
+    LOAD_AVS_FUNC( avs_is_rgb48, 1 );
+    LOAD_AVS_FUNC_ALIAS( avs_is_rgb48, "_avs_is_rgb48@4", 1 );
+    LOAD_AVS_FUNC( avs_is_rgb64, 1 );
+    LOAD_AVS_FUNC_ALIAS( avs_is_rgb64, "_avs_is_rgb64@4", 1 );
+    LOAD_AVS_FUNC( avs_is_yuv444p16, 1 );
+    LOAD_AVS_FUNC( avs_is_yuv422p16, 1 );
+    LOAD_AVS_FUNC( avs_is_yuv420p16, 1 );
+    LOAD_AVS_FUNC( avs_is_y16, 1 );
+    LOAD_AVS_FUNC( avs_is_yuv444ps, 1 );
+    LOAD_AVS_FUNC( avs_is_yuv422ps, 1 );
+    LOAD_AVS_FUNC( avs_is_yuv420ps, 1 );
+    LOAD_AVS_FUNC( avs_is_y32, 1 );
+    LOAD_AVS_FUNC( avs_is_444, 1 );
+    LOAD_AVS_FUNC( avs_is_422, 1 );
+    LOAD_AVS_FUNC( avs_is_420, 1 );
+    LOAD_AVS_FUNC( avs_is_y, 1 );
+    LOAD_AVS_FUNC( avs_is_yuva, 1 );
+    LOAD_AVS_FUNC( avs_is_planar_rgb, 1 );
+    LOAD_AVS_FUNC( avs_is_planar_rgba, 1 );
+    LOAD_AVS_FUNC( avs_num_components, 1 );
+    LOAD_AVS_FUNC( avs_component_size, 1 );
+    LOAD_AVS_FUNC( avs_bits_per_component, 1 );
+#endif
    return 0;
 fail:
    avs_close( h->library );
+    h->library = NULL;
    return -1;
 }

+/* AvxSynth doesn't have yv24, yv16, yv411, or y8, so disable them. */
+#if USE_AVXSYNTH
+#define avs_is_yv24( vi ) (0)
+#define avs_is_yv16( vi ) (0)
+#define avs_is_yv411( vi ) (0)
+#define avs_is_y8( vi ) (0)
+/* AvxSynth doesn't support AviSynth+ pixel types. */
+#define AVS_IS_AVISYNTHPLUS (0)
+#define AVS_IS_420( vi ) (0)
+#define AVS_IS_422( vi ) (0)
+#define AVS_IS_444( vi ) (0)
+#define AVS_IS_RGB48( vi ) (0)
+#define AVS_IS_RGB64( vi ) (0)
+#define AVS_IS_YUV420P16( vi ) (0)
+#define AVS_IS_YUV422P16( vi ) (0)
+#define AVS_IS_YUV444P16( vi ) (0)
+#else
+#define AVS_IS_AVISYNTHPLUS (h->func.avs_is_420 && h->func.avs_is_422 && h->func.avs_is_444)
+#define AVS_IS_420( vi ) (h->func.avs_is_420 ? h->func.avs_is_420( vi ) : avs_is_yv12( vi ))
+#define AVS_IS_422( vi ) (h->func.avs_is_422 ? h->func.avs_is_422( vi ) : avs_is_yv16( vi ))
+#define AVS_IS_444( vi ) (h->func.avs_is_444 ? h->func.avs_is_444( vi ) : avs_is_yv24( vi ))
+#define AVS_IS_RGB48( vi ) (h->func.avs_is_rgb48 && h->func.avs_is_rgb48( vi ))
+#define AVS_IS_RGB64( vi ) (h->func.avs_is_rgb64 && h->func.avs_is_rgb64( vi ))
+#define AVS_IS_YUV420P16( vi ) (h->func.avs_is_yuv420p16 && h->func.avs_is_yuv420p16( vi ))
+#define AVS_IS_YUV422P16( vi ) (h->func.avs_is_yuv422p16 && h->func.avs_is_yuv422p16( vi ))
+#define AVS_IS_YUV444P16( vi ) (h->func.avs_is_yuv444p16 && h->func.avs_is_yuv444p16( vi ))
+#endif
+
 /* generate a filter sequence to try based on the filename extension */
 static void avs_build_filter_sequence( char *filename_ext, const char *filter[AVS_MAX_SEQUENCE+1] )
 {
@ -160,9 +237,9 @@ static float get_avs_version( avs_hnd_t *h )
 #if USE_AVXSYNTH
    return 2.58f;
 #else
-    FAIL_IF_ERROR( !h->func.avs_function_exists( h->env, "VersionNumber" ), "VersionNumber does not exist\n" )
+    FAIL_IF_ERROR( !h->func.avs_function_exists( h->env, "VersionNumber" ), "VersionNumber does not exist\n" );
    AVS_Value ver = h->func.avs_invoke( h->env, "VersionNumber", avs_new_value_array( NULL, 0 ), NULL );
-    FAIL_IF_ERROR( avs_is_error( ver ), "unable to determine avisynth version: %s\n", avs_as_error( ver ) )
+    FAIL_IF_ERROR( avs_is_error( ver ), "unable to determine avisynth version: %s\n", avs_as_error( ver ) );
    FAIL_IF_ERROR( !avs_is_float( ver ), "VersionNumber did not return a float value\n" );
    float ret = avs_as_float( ver );
    h->func.avs_release_value( ver );
@ -179,10 +256,10 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
    fclose( fh );
    FAIL_IF_ERROR( !b_regular, "AVS input is incompatible with non-regular file `%s'\n", psz_filename );

-    avs_hnd_t *h = malloc( sizeof(avs_hnd_t) );
+    avs_hnd_t *h = calloc( 1, sizeof(avs_hnd_t) );
    if( !h )
        return -1;
-    FAIL_IF_ERROR( x264_avs_load_library( h ), "failed to load avisynth\n" )
+    FAIL_IF_ERROR( x264_avs_load_library( h ), "failed to load avisynth\n" );
    h->env = h->func.avs_create_script_environment( AVS_INTERFACE_25 );
    if( h->func.avs_get_error )
    {
@ -209,7 +286,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
    if( !strcasecmp( filename_ext, "avs" ) )
    {
        res = h->func.avs_invoke( h->env, "Import", arg, NULL );
-        FAIL_IF_ERROR( avs_is_error( res ), "%s\n", avs_as_string( res ) )
+        FAIL_IF_ERROR( avs_is_error( res ), "%s\n", avs_as_string( res ) );
        /* check if the user is using a multi-threaded script and apply distributor if necessary.
           adapted from avisynth's vfw interface */
        AVS_Value mt_test = h->func.avs_invoke( h->env, "GetMTMode", avs_new_value_bool( 0 ), NULL );
@ -249,18 +326,18 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
            }
            x264_cli_printf( X264_LOG_INFO, "failed\n" );
        }
-        FAIL_IF_ERROR( !filter[i], "unable to find source filter to open `%s'\n", psz_filename )
+        FAIL_IF_ERROR( !filter[i], "unable to find source filter to open `%s'\n", psz_filename );
    }
-    FAIL_IF_ERROR( !avs_is_clip( res ), "`%s' didn't return a video clip\n", psz_filename )
+    FAIL_IF_ERROR( !avs_is_clip( res ), "`%s' didn't return a video clip\n", psz_filename );
    h->clip = h->func.avs_take_clip( res, h->env );
    const AVS_VideoInfo *vi = h->func.avs_get_video_info( h->clip );
-    FAIL_IF_ERROR( !avs_has_video( vi ), "`%s' has no video data\n", psz_filename )
+    FAIL_IF_ERROR( !avs_has_video( vi ), "`%s' has no video data\n", psz_filename );
    /* if the clip is made of fields instead of frames, call weave to make them frames */
    if( avs_is_field_based( vi ) )
    {
        x264_cli_log( "avs", X264_LOG_WARNING, "detected fieldbased (separated) input, weaving to frames\n" );
        AVS_Value tmp = h->func.avs_invoke( h->env, "Weave", res, NULL );
-        FAIL_IF_ERROR( avs_is_error( tmp ), "couldn't weave fields into frames\n" )
+        FAIL_IF_ERROR( avs_is_error( tmp ), "couldn't weave fields into frames\n" );
        res = update_clip( h, &vi, tmp, res );
        info->interlaced = 1;
        info->tff = avs_is_tff( vi );
@ -268,22 +345,35 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
 #if !HAVE_SWSCALE
    /* if swscale is not available, convert the CSP if necessary */
    FAIL_IF_ERROR( avs_version < 2.6f && (opt->output_csp == X264_CSP_I422 || opt->output_csp == X264_CSP_I444),
-                   "avisynth >= 2.6 is required for i422/i444 output\n" )
-    if( (opt->output_csp == X264_CSP_I420 && !avs_is_yv12( vi )) || (opt->output_csp == X264_CSP_I422 && !avs_is_yv16( vi )) ||
-        (opt->output_csp == X264_CSP_I444 && !avs_is_yv24( vi )) || (opt->output_csp == X264_CSP_RGB && !avs_is_rgb( vi )) )
+                   "avisynth >= 2.6 is required for i422/i444 output\n" );
+    if( (opt->output_csp == X264_CSP_I420 && !AVS_IS_420( vi )) ||
+        (opt->output_csp == X264_CSP_I422 && !AVS_IS_422( vi )) ||
+        (opt->output_csp == X264_CSP_I444 && !AVS_IS_444( vi )) ||
+        (opt->output_csp == X264_CSP_RGB && !avs_is_rgb( vi )) )
    {
-
-        const char *csp = opt->output_csp == X264_CSP_I420 ? "YV12" :
+        const char *csp;
+        if( AVS_IS_AVISYNTHPLUS )
+        {
+            csp = opt->output_csp == X264_CSP_I420 ? "YUV420" :
+                  opt->output_csp == X264_CSP_I422 ? "YUV422" :
+                  opt->output_csp == X264_CSP_I444 ? "YUV444" :
+                  "RGB";
+        }
+        else
+        {
+            csp = opt->output_csp == X264_CSP_I420 ? "YV12" :
                  opt->output_csp == X264_CSP_I422 ? "YV16" :
-                          opt->output_csp == X264_CSP_I444 ? "YV24" : "RGB";
+                  opt->output_csp == X264_CSP_I444 ? "YV24" :
+                  "RGB";
+        }
        x264_cli_log( "avs", X264_LOG_WARNING, "converting input clip to %s\n", csp );
        FAIL_IF_ERROR( opt->output_csp < X264_CSP_I444 && (vi->width&1),
-                       "input clip width not divisible by 2 (%dx%d)\n", vi->width, vi->height )
+                       "input clip width not divisible by 2 (%dx%d)\n", vi->width, vi->height );
        FAIL_IF_ERROR( opt->output_csp == X264_CSP_I420 && info->interlaced && (vi->height&3),
-                       "input clip height not divisible by 4 (%dx%d)\n", vi->width, vi->height )
+                       "input clip height not divisible by 4 (%dx%d)\n", vi->width, vi->height );
        FAIL_IF_ERROR( (opt->output_csp == X264_CSP_I420 || info->interlaced) && (vi->height&1),
-                       "input clip height not divisible by 2 (%dx%d)\n", vi->width, vi->height )
-        char conv_func[14];
+                       "input clip height not divisible by 2 (%dx%d)\n", vi->width, vi->height );
+        char conv_func[16];
        snprintf( conv_func, sizeof(conv_func), "ConvertTo%s", csp );
        char matrix[7] = "";
        int arg_count = 2;
@ -303,7 +393,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
        arg_arr[1] = avs_new_value_bool( info->interlaced );
        arg_arr[2] = avs_new_value_string( matrix );
        AVS_Value res2 = h->func.avs_invoke( h->env, conv_func, avs_new_value_array( arg_arr, arg_count ), arg_name );
-        FAIL_IF_ERROR( avs_is_error( res2 ), "couldn't convert input clip to %s\n", csp )
+        FAIL_IF_ERROR( avs_is_error( res2 ), "couldn't convert input clip to %s\n", csp );
        res = update_clip( h, &vi, res2, res );
    }
    /* if swscale is not available, change the range if necessary. This only applies to YUV-based CSPs however */
@ -316,7 +406,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
        arg_arr[1] = avs_new_value_string( levels );
        const char *arg_name[] = { NULL, "levels" };
        AVS_Value res2 = h->func.avs_invoke( h->env, "ColorYUV", avs_new_value_array( arg_arr, 2 ), arg_name );
-        FAIL_IF_ERROR( avs_is_error( res2 ), "couldn't convert range: %s\n", avs_as_error( res2 ) )
+        FAIL_IF_ERROR( avs_is_error( res2 ), "couldn't convert range: %s\n", avs_as_error( res2 ) );
        res = update_clip( h, &vi, res2, res );
        // notification that the input range has changed to the desired one
        opt->input_range = opt->output_range;
@ -331,14 +421,24 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
    info->fps_den = vi->fps_denominator;
    h->num_frames = info->num_frames = vi->num_frames;
    info->thread_safe = 1;
-    if( avs_is_rgb32( vi ) )
+    if( AVS_IS_RGB64( vi ) )
+        info->csp = X264_CSP_BGRA | X264_CSP_VFLIP | X264_CSP_HIGH_DEPTH;
+    else if( avs_is_rgb32( vi ) )
        info->csp = X264_CSP_BGRA | X264_CSP_VFLIP;
+    else if( AVS_IS_RGB48( vi ) )
+        info->csp = X264_CSP_BGR | X264_CSP_VFLIP | X264_CSP_HIGH_DEPTH;
    else if( avs_is_rgb24( vi ) )
        info->csp = X264_CSP_BGR | X264_CSP_VFLIP;
+    else if( AVS_IS_YUV444P16( vi ) )
+        info->csp = X264_CSP_I444 | X264_CSP_HIGH_DEPTH;
    else if( avs_is_yv24( vi ) )
        info->csp = X264_CSP_I444;
+    else if( AVS_IS_YUV422P16( vi ) )
+        info->csp = X264_CSP_I422 | X264_CSP_HIGH_DEPTH;
    else if( avs_is_yv16( vi ) )
        info->csp = X264_CSP_I422;
+    else if( AVS_IS_YUV420P16( vi ) )
+        info->csp = X264_CSP_I420 | X264_CSP_HIGH_DEPTH;
    else if( avs_is_yv12( vi ) )
        info->csp = X264_CSP_I420;
 #if HAVE_SWSCALE
@ -350,7 +450,11 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
        info->csp = AV_PIX_FMT_GRAY8 | X264_CSP_OTHER;
 #endif
    else
-        info->csp = X264_CSP_NONE;
+    {
+        AVS_Value pixel_type = h->func.avs_invoke( h->env, "PixelType", res, NULL );
+        const char *pixel_type_name = avs_is_string( pixel_type ) ? avs_as_string( pixel_type ) : "unknown";
+        FAIL_IF_ERROR( 1, "not supported pixel type: %s\n", pixel_type_name );
+    }
    info->vfr = 0;

    *p_handle = h;
@ -382,7 +486,7 @@ static int read_frame( cli_pic_t *pic, hnd_t handle, int i_frame )
        return -1;
    AVS_VideoFrame *frm = pic->opaque = h->func.avs_get_frame( h->clip, i_frame );
    const char *err = h->func.avs_clip_get_error( h->clip );
-    FAIL_IF_ERROR( err, "%s occurred while reading frame %d\n", err, i_frame )
+    FAIL_IF_ERROR( err, "%s occurred while reading frame %d\n", err, i_frame );
    for( int i = 0; i < pic->img.planes; i++ )
    {
        /* explicitly cast away the const attribute to avoid a warning */
@ -407,9 +511,11 @@ static void picture_clean( cli_pic_t *pic, hnd_t handle )
 static int close_file( hnd_t handle )
 {
    avs_hnd_t *h = handle;
+    if( h->func.avs_release_clip && h->clip )
        h->func.avs_release_clip( h->clip );
-    if( h->func.avs_delete_script_environment )
+    if( h->func.avs_delete_script_environment && h->env )
        h->func.avs_delete_script_environment( h->env );
+    if( h->library )
        avs_close( h->library );
    free( h );
    return 0;
--- a/library/src/main/libenc/jni/libx264/input/ffms.c
+++ b/library/src/main/libenc/jni/libx264/input/ffms.c
@ -33,10 +33,6 @@
 #include <libavcodec/avcodec.h>
 #include <libswscale/swscale.h>

-#ifdef _WIN32
-#include <windows.h>
-#endif
-
 #define PROGRESS_LENGTH 36

 typedef struct
@ -106,14 +102,14 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
    if( !idx )
    {
        FFMS_Indexer *indexer = FFMS_CreateIndexer( psz_filename, &e );
-        FAIL_IF_ERROR( !indexer, "could not create indexer\n" )
+        FAIL_IF_ERROR( !indexer, "could not create indexer\n" );

        if( opt->progress )
            FFMS_SetProgressCallback( indexer, update_progress, &h->time );

        idx = FFMS_DoIndexing2( indexer, FFMS_IEH_ABORT, &e );
        fprintf( stderr, "%*c", PROGRESS_LENGTH+1, '\r' );
-        FAIL_IF_ERROR( !idx, "could not create index\n" )
+        FAIL_IF_ERROR( !idx, "could not create index\n" );

        if( opt->index_file && FFMS_WriteIndex( opt->index_file, idx, &e ) )
            x264_cli_log( "ffms", X264_LOG_WARNING, "could not write index file\n" );
@ -124,8 +120,8 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
        h->video_source = FFMS_CreateVideoSource( psz_filename, trackno, idx, 1, seekmode, &e );
    FFMS_DestroyIndex( idx );

-    FAIL_IF_ERROR( trackno < 0, "could not find video track\n" )
-    FAIL_IF_ERROR( !h->video_source, "could not create video source\n" )
+    FAIL_IF_ERROR( trackno < 0, "could not find video track\n" );
+    FAIL_IF_ERROR( !h->video_source, "could not create video source\n" );

    const FFMS_VideoProperties *videop = FFMS_GetVideoProperties( h->video_source );
    info->num_frames   = h->num_frames = videop->NumFrames;
@ -138,7 +134,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
    info->thread_safe  = 0;

    const FFMS_Frame *frame = FFMS_GetFrame( h->video_source, 0, &e );
-    FAIL_IF_ERROR( !frame, "could not read frame 0\n" )
+    FAIL_IF_ERROR( !frame, "could not read frame 0\n" );

    info->fullrange  = 0;
    info->width      = frame->EncodedWidth;
@ -189,7 +185,7 @@ static int read_frame( cli_pic_t *pic, hnd_t handle, int i_frame )
    FFMS_ErrorInfo e;
    e.BufferSize = 0;
    const FFMS_Frame *frame = FFMS_GetFrame( h->video_source, i_frame, &e );
-    FAIL_IF_ERROR( !frame, "could not read frame %d \n", i_frame )
+    FAIL_IF_ERROR( !frame, "could not read frame %d \n", i_frame );

    memcpy( pic->img.stride, frame->Linesize, sizeof(pic->img.stride) );
    memcpy( pic->img.plane, frame->Data, sizeof(pic->img.plane) );
@ -198,7 +194,7 @@ static int read_frame( cli_pic_t *pic, hnd_t handle, int i_frame )
    {
        const FFMS_FrameInfo *info = FFMS_GetFrameInfo( h->track, i_frame );
        FAIL_IF_ERROR( info->PTS == AV_NOPTS_VALUE, "invalid timestamp. "
-                       "Use --force-cfr and specify a framerate with --fps\n" )
+                       "Use --force-cfr and specify a framerate with --fps\n" );

        pic->pts = info->PTS >> h->reduce_pts;
        pic->duration = 0;
--- a/library/src/main/libenc/jni/libx264/input/input.c
+++ b/library/src/main/libenc/jni/libx264/input/input.c
@ -28,7 +28,6 @@

 #ifdef _WIN32
 #include <io.h>
-#include <windows.h>
 #elif HAVE_MMAP
 #include <sys/mman.h>
 #include <unistd.h>
@ -154,6 +153,8 @@ int x264_cli_mmap_init( cli_mmap_t *h, FILE *fh )
        SYSTEM_INFO si;
        GetSystemInfo( &si );
        h->align_mask = si.dwAllocationGranularity - 1;
+        h->prefetch_virtual_memory = (void*)GetProcAddress( GetModuleHandleW( L"kernel32.dll" ), "PrefetchVirtualMemory" );
+        h->process_handle = GetCurrentProcess();
        h->map_handle = CreateFileMappingW( osfhandle, NULL, PAGE_READONLY, 0, 0, NULL );
        return !h->map_handle;
    }
@ -173,9 +174,16 @@ void *x264_cli_mmap( cli_mmap_t *h, int64_t offset, size_t size )
    size   += align;
 #ifdef _WIN32
    uint8_t *base = MapViewOfFile( h->map_handle, FILE_MAP_READ, offset >> 32, offset, size );
-    /* TODO: Would PrefetchVirtualMemory() (only available on Win8+) be beneficial? */
    if( base )
+    {
+        /* PrefetchVirtualMemory() is only available on Windows 8 and newer. */
+        if( h->prefetch_virtual_memory )
+        {
+            struct { void *addr; size_t size; } mem_range = { base, size };
+            h->prefetch_virtual_memory( h->process_handle, 1, &mem_range, 0 );
+        }
        return base + align;
+    }
 #else
    uint8_t *base = mmap( NULL, size, PROT_READ, MAP_PRIVATE, h->fd, offset );
    if( base != MAP_FAILED )
--- a/library/src/main/libenc/jni/libx264/input/input.h
+++ b/library/src/main/libenc/jni/libx264/input/input.h
@ -30,6 +30,10 @@

 #include "x264cli.h"

+#ifdef _WIN32
+#include <windows.h>
+#endif
+
 /* options that are used by only some demuxers */
 typedef struct
 {
@ -135,7 +139,9 @@ typedef struct
 {
    int align_mask;
 #ifdef _WIN32
-    void *map_handle;
+    BOOL (WINAPI *prefetch_virtual_memory)( HANDLE, ULONG_PTR, PVOID, ULONG );
+    HANDLE process_handle;
+    HANDLE map_handle;
 #elif HAVE_MMAP
    int fd;
 #endif
--- a/library/src/main/libenc/jni/libx264/input/lavf.c
+++ b/library/src/main/libenc/jni/libx264/input/lavf.c
@ -168,15 +168,15 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
    if( opt->format )
        FAIL_IF_ERROR( !(format = av_find_input_format( opt->format )), "unknown file format: %s\n", opt->format );

-    FAIL_IF_ERROR( avformat_open_input( &h->lavf, psz_filename, format, &options ), "could not open input file\n" )
+    FAIL_IF_ERROR( avformat_open_input( &h->lavf, psz_filename, format, &options ), "could not open input file\n" );
    if( options )
        av_dict_free( &options );
-    FAIL_IF_ERROR( avformat_find_stream_info( h->lavf, NULL ) < 0, "could not find input stream info\n" )
+    FAIL_IF_ERROR( avformat_find_stream_info( h->lavf, NULL ) < 0, "could not find input stream info\n" );

    int i = 0;
    while( i < h->lavf->nb_streams && h->lavf->streams[i]->codec->codec_type != AVMEDIA_TYPE_VIDEO )
        i++;
-    FAIL_IF_ERROR( i == h->lavf->nb_streams, "could not find video stream\n" )
+    FAIL_IF_ERROR( i == h->lavf->nb_streams, "could not find video stream\n" );
    h->stream_id       = i;
    h->next_frame      = 0;
    AVCodecContext *c  = h->lavf->streams[i]->codec;
@ -188,13 +188,13 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
    info->thread_safe  = 0;
    h->vfr_input       = info->vfr;
    FAIL_IF_ERROR( avcodec_open2( c, avcodec_find_decoder( c->codec_id ), NULL ),
-                   "could not find decoder for video stream\n" )
+                   "could not find decoder for video stream\n" );

    /* prefetch the first frame and set/confirm flags */
    h->first_pic = malloc( sizeof(cli_pic_t) );
    FAIL_IF_ERROR( !h->first_pic || lavf_input.picture_alloc( h->first_pic, h, X264_CSP_OTHER, info->width, info->height ),
-                   "malloc failed\n" )
-    else if( read_frame_internal( h->first_pic, h, 0, info ) )
+                   "malloc failed\n" );
+    if( read_frame_internal( h->first_pic, h, 0, info ) )
        return -1;

    info->width      = c->width;
--- a/library/src/main/libenc/jni/libx264/input/raw.c
+++ b/library/src/main/libenc/jni/libx264/input/raw.c
@ -54,7 +54,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
    }
    else
        sscanf( opt->resolution, "%dx%d", &info->width, &info->height );
-    FAIL_IF_ERROR( !info->width || !info->height, "raw input requires a resolution.\n" )
+    FAIL_IF_ERROR( !info->width || !info->height, "raw input requires a resolution.\n" );
    if( opt->colorspace )
    {
        for( info->csp = X264_CSP_CLI_MAX-1; info->csp > X264_CSP_NONE; info->csp-- )
--- a/library/src/main/libenc/jni/libx264/input/thread.c
+++ b/library/src/main/libenc/jni/libx264/input/thread.c
@ -49,7 +49,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
 {
    thread_hnd_t *h = malloc( sizeof(thread_hnd_t) );
    FAIL_IF_ERR( !h || cli_input.picture_alloc( &h->pic, *p_handle, info->csp, info->width, info->height ),
-                 "x264", "malloc failed\n" )
+                 "x264", "malloc failed\n" );
    h->input = cli_input;
    h->p_handle = *p_handle;
    h->next_frame = -1;
--- a/library/src/main/libenc/jni/libx264/input/timecode.c
+++ b/library/src/main/libenc/jni/libx264/input/timecode.c
@ -61,7 +61,7 @@ static double correct_fps( double fps, timecode_hnd_t *h )
        fps_den = i * h->timebase_num;
        fps_num = round( fps_den * fps_sig ) * exponent;
        FAIL_IF_ERROR( fps_num > UINT32_MAX, "tcfile fps correction failed.\n"
-                       "                  Specify an appropriate timebase manually or remake tcfile.\n" )
+                       "                  Specify an appropriate timebase manually or remake tcfile.\n" );
        if( fabs( ((double)fps_num / fps_den) / exponent - fps_sig ) < DOUBLE_EPSILON )
            break;
        ++i;
@ -87,7 +87,7 @@ static int try_mkv_timebase_den( double *fpss, timecode_hnd_t *h, int loop_num )
        fps_den = round( MKV_TIMEBASE_DEN / fps_sig ) / exponent;
        h->timebase_num = fps_den && h->timebase_num ? gcd( h->timebase_num, fps_den ) : fps_den;
        FAIL_IF_ERROR( h->timebase_num > UINT32_MAX || !h->timebase_num, "automatic timebase generation failed.\n"
-                       "                  Specify timebase manually.\n" )
+                       "                  Specify timebase manually.\n" );
    }
    return 0;
 }
@ -100,7 +100,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
    double *fpss = NULL;

    ret = fscanf( tcfile_in, "# timecode format v%d", &tcfv );
-    FAIL_IF_ERROR( ret != 1 || (tcfv != 1 && tcfv != 2), "unsupported timecode format\n" )
+    FAIL_IF_ERROR( ret != 1 || (tcfv != 1 && tcfv != 2), "unsupported timecode format\n" );
 #define NO_TIMECODE_LINE (buff[0] == '#' || buff[0] == '\n' || buff[0] == '\r')
    if( tcfv == 1 )
    {
@ -115,10 +115,10 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
            if( NO_TIMECODE_LINE )
                continue;
            FAIL_IF_ERROR( sscanf( buff, "assume %lf", &h->assume_fps ) != 1 && sscanf( buff, "Assume %lf", &h->assume_fps ) != 1,
-                           "tcfile parsing error: assumed fps not found\n" )
+                           "tcfile parsing error: assumed fps not found\n" );
            break;
        }
-        FAIL_IF_ERROR( h->assume_fps <= 0, "invalid assumed fps %.6f\n", h->assume_fps )
+        FAIL_IF_ERROR( h->assume_fps <= 0, "invalid assumed fps %.6f\n", h->assume_fps );

        file_pos = ftell( tcfile_in );
        h->stored_pts_num = 0;
@ -131,9 +131,9 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
                continue;
            }
            ret = sscanf( buff, "%d,%d,%lf", &start, &end, &seq_fps );
-            FAIL_IF_ERROR( ret != 3 && ret != EOF, "invalid input tcfile\n" )
+            FAIL_IF_ERROR( ret != 3 && ret != EOF, "invalid input tcfile\n" );
            FAIL_IF_ERROR( start > end || start <= prev_start || end <= prev_end || seq_fps <= 0,
-                           "invalid input tcfile at line %d: %s\n", num, buff )
+                           "invalid input tcfile at line %d: %s\n", num, buff );
            prev_start = start;
            prev_end = end;
            if( h->auto_timebase_den || h->auto_timebase_num )
@ -234,7 +234,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
            h->stored_pts_num++;
        }
        timecodes_num = h->stored_pts_num;
-        FAIL_IF_ERROR( !timecodes_num, "input tcfile doesn't have any timecodes!\n" )
+        FAIL_IF_ERROR( !timecodes_num, "input tcfile doesn't have any timecodes!\n" );
        fseek( tcfile_in, file_pos, SEEK_SET );

        timecodes = malloc( timecodes_num * sizeof(double) );
@ -246,7 +246,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
        {
            ret = sscanf( buff, "%lf", &timecodes[0] );
            timecodes[0] *= 1e-3;         /* Timecode format v2 is expressed in milliseconds. */
-            FAIL_IF_ERROR( ret != 1, "invalid input tcfile for frame 0\n" )
+            FAIL_IF_ERROR( ret != 1, "invalid input tcfile for frame 0\n" );
            for( num = 1; num < timecodes_num && fgets( buff, sizeof(buff), tcfile_in ) != NULL; )
            {
                if( NO_TIMECODE_LINE )
@ -254,11 +254,11 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
                ret = sscanf( buff, "%lf", &timecodes[num] );
                timecodes[num] *= 1e-3;         /* Timecode format v2 is expressed in milliseconds. */
                FAIL_IF_ERROR( ret != 1 || timecodes[num] <= timecodes[num - 1],
-                               "invalid input tcfile for frame %d\n", num )
+                               "invalid input tcfile for frame %d\n", num );
                ++num;
            }
        }
-        FAIL_IF_ERROR( num < timecodes_num, "failed to read input tcfile for frame %d", num )
+        FAIL_IF_ERROR( num < timecodes_num, "failed to read input tcfile for frame %d", num );

        if( timecodes_num == 1 )
            h->timebase_den = info->fps_num;
@ -314,7 +314,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
        x264_cli_log( "timecode", X264_LOG_INFO, "automatic timebase generation %"PRIu64"/%"PRIu64"\n", h->timebase_num, h->timebase_den );
    }
    else FAIL_IF_ERROR( h->timebase_den > UINT32_MAX || !h->timebase_den, "automatic timebase generation failed.\n"
-                        "                  Specify an appropriate timebase manually.\n" )
+                        "                  Specify an appropriate timebase manually.\n" );

    h->pts = malloc( h->stored_pts_num * sizeof(int64_t) );
    if( !h->pts )
@ -322,7 +322,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
    for( num = 0; num < h->stored_pts_num; num++ )
    {
        h->pts[num] = timecodes[num] * ((double)h->timebase_den / h->timebase_num) + 0.5;
-        FAIL_IF_ERROR( num > 0 && h->pts[num] <= h->pts[num - 1], "invalid timebase or timecode for frame %d\n", num )
+        FAIL_IF_ERROR( num > 0 && h->pts[num] <= h->pts[num - 1], "invalid timebase or timecode for frame %d\n", num );
    }

    free( timecodes );
@ -344,7 +344,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
    int ret = 0;
    FILE *tcfile_in;
    timecode_hnd_t *h = malloc( sizeof(timecode_hnd_t) );
-    FAIL_IF_ERROR( !h, "malloc failed\n" )
+    FAIL_IF_ERROR( !h, "malloc failed\n" );
    h->input = cli_input;
    h->p_handle = *p_handle;
    h->pts = NULL;
@ -357,7 +357,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
            h->timebase_den = 0; /* set later by auto timebase generation */
        }
        FAIL_IF_ERROR( h->timebase_num > UINT32_MAX || h->timebase_den > UINT32_MAX,
-                       "timebase you specified exceeds H.264 maximum\n" )
+                       "timebase you specified exceeds H.264 maximum\n" );
    }
    h->auto_timebase_num = !ret;
    h->auto_timebase_den = ret < 2;
@ -367,8 +367,8 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
        h->timebase_den = 0;             /* set later by auto timebase generation */

    tcfile_in = x264_fopen( psz_filename, "rb" );
-    FAIL_IF_ERROR( !tcfile_in, "can't open `%s'\n", psz_filename )
-    else if( !x264_is_regular_file( tcfile_in ) )
+    FAIL_IF_ERROR( !tcfile_in, "can't open `%s'\n", psz_filename );
+    if( !x264_is_regular_file( tcfile_in ) )
    {
        x264_cli_log( "timecode", X264_LOG_ERROR, "tcfile input incompatible with non-regular file `%s'\n", psz_filename );
        fclose( tcfile_in );
--- a/library/src/main/libenc/jni/libx264/input/y4m.c
+++ b/library/src/main/libenc/jni/libx264/input/y4m.c
@ -99,8 +99,8 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
            break;
        }
    }
-    FAIL_IF_ERROR( strncmp( header, Y4M_MAGIC, sizeof(Y4M_MAGIC)-1 ), "bad sequence header magic\n" )
-    FAIL_IF_ERROR( i == MAX_YUV4_HEADER, "bad sequence header length\n" )
+    FAIL_IF_ERROR( strncmp( header, Y4M_MAGIC, sizeof(Y4M_MAGIC)-1 ), "bad sequence header magic\n" );
+    FAIL_IF_ERROR( i == MAX_YUV4_HEADER, "bad sequence header length\n" );

    /* Scan properties */
    header_end = &header[i+1]; /* Include space */
@ -187,7 +187,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
        h->bit_depth  = 8;
    }

-    FAIL_IF_ERROR( colorspace <= X264_CSP_NONE || colorspace >= X264_CSP_MAX, "colorspace unhandled\n" )
+    FAIL_IF_ERROR( colorspace <= X264_CSP_NONE || colorspace >= X264_CSP_MAX, "colorspace unhandled\n" );
    FAIL_IF_ERROR( h->bit_depth < 8 || h->bit_depth > 16, "unsupported bit depth `%d'\n", h->bit_depth );

    info->thread_safe = 1;
@ -215,7 +215,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
        int len = 1;
        while( len <= MAX_FRAME_HEADER && fgetc( h->fh ) != '\n' )
            len++;
-        FAIL_IF_ERROR( len > MAX_FRAME_HEADER || len < sizeof(Y4M_FRAME_MAGIC), "bad frame header length\n" )
+        FAIL_IF_ERROR( len > MAX_FRAME_HEADER || len < sizeof(Y4M_FRAME_MAGIC), "bad frame header length\n" );
        h->frame_header_len = len;
        h->frame_size += len;

@ -252,7 +252,7 @@ static int read_frame_internal( cli_pic_t *pic, y4m_hnd_t *h, int bit_depth_uc )
         * produces y4m files with variable-length frame headers so just error out if that happens. */
        while( i <= h->frame_header_len && header[i-1] != '\n' )
            i++;
-        FAIL_IF_ERROR( i != h->frame_header_len, "bad frame header length\n" )
+        FAIL_IF_ERROR( i != h->frame_header_len, "bad frame header length\n" );
    }
    else
    {
@ -261,9 +261,9 @@ static int read_frame_internal( cli_pic_t *pic, y4m_hnd_t *h, int bit_depth_uc )
            return -1;
        while( i <= MAX_FRAME_HEADER && fgetc( h->fh ) != '\n' )
            i++;
-        FAIL_IF_ERROR( i > MAX_FRAME_HEADER, "bad frame header length\n" )
+        FAIL_IF_ERROR( i > MAX_FRAME_HEADER, "bad frame header length\n" );
    }
-    FAIL_IF_ERROR( memcmp( header, Y4M_FRAME_MAGIC, slen ), "bad frame header magic\n" )
+    FAIL_IF_ERROR( memcmp( header, Y4M_FRAME_MAGIC, slen ), "bad frame header magic\n" );

    for( i = 0; i < pic->img.planes; i++ )
    {
--- a/library/src/main/libenc/jni/libx264/output/matroska_ebml.c
+++ b/library/src/main/libenc/jni/libx264/output/matroska_ebml.c
@ -245,7 +245,7 @@ static int mk_write_bin( mk_context *c, unsigned id, const void *data, unsigned
 {
    CHECK( mk_write_id( c, id ) );
    CHECK( mk_write_size( c, size ) );
-    CHECK( mk_append_context_data( c, data, size ) ) ;
+    CHECK( mk_append_context_data( c, data, size ) );
    return 0;
 }

--- a/library/src/main/libenc/jni/libx264/output/mp4.c
+++ b/library/src/main/libenc/jni/libx264/output/mp4.c
@ -171,7 +171,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt
        return -1;
    int b_regular = x264_is_regular_file( fh );
    fclose( fh );
-    FAIL_IF_ERR( !b_regular, "mp4", "MP4 output is incompatible with non-regular file `%s'\n", psz_filename )
+    FAIL_IF_ERR( !b_regular, "mp4", "MP4 output is incompatible with non-regular file `%s'\n", psz_filename );

    mp4_hnd_t *p_mp4 = calloc( 1, sizeof(mp4_hnd_t) );
    if( !p_mp4 )
@ -180,7 +180,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt
 #ifdef _WIN32
    /* GPAC doesn't support Unicode filenames. */
    char ansi_filename[MAX_PATH];
-    FAIL_IF_ERR( !x264_ansi_filename( psz_filename, ansi_filename, MAX_PATH, 1 ), "mp4", "invalid ansi filename\n" )
+    FAIL_IF_ERR( !x264_ansi_filename( psz_filename, ansi_filename, MAX_PATH, 1 ), "mp4", "invalid ansi filename\n" );
    p_mp4->p_file = gf_isom_open( ansi_filename, GF_ISOM_OPEN_WRITE, NULL );
 #else
    p_mp4->p_file = gf_isom_open( psz_filename, GF_ISOM_OPEN_WRITE, NULL );
@ -210,7 +210,7 @@ static int set_param( hnd_t handle, x264_param_t *p_param )

    p_mp4->i_time_res = (uint64_t)p_param->i_timebase_den * p_mp4->i_dts_compress_multiplier;
    p_mp4->i_time_inc = (uint64_t)p_param->i_timebase_num * p_mp4->i_dts_compress_multiplier;
-    FAIL_IF_ERR( p_mp4->i_time_res > UINT32_MAX, "mp4", "MP4 media timescale %"PRIu64" exceeds maximum\n", p_mp4->i_time_res )
+    FAIL_IF_ERR( p_mp4->i_time_res > UINT32_MAX, "mp4", "MP4 media timescale %"PRIu64" exceeds maximum\n", p_mp4->i_time_res );

    p_mp4->i_track = gf_isom_new_track( p_mp4->p_file, 0, GF_ISOM_MEDIA_VISUAL,
                                        p_mp4->i_time_res );
@ -230,7 +230,7 @@ static int set_param( hnd_t handle, x264_param_t *p_param )
        uint64_t dh = p_param->i_height << 16;
        double sar = (double)p_param->vui.i_sar_width / p_param->vui.i_sar_height;
        if( sar > 1.0 )
-            dw *= sar ;
+            dw *= sar;
        else
            dh /= sar;
        gf_isom_set_pixel_aspect_ratio( p_mp4->p_file, p_mp4->i_track, p_mp4->i_descidx, p_param->vui.i_sar_width, p_param->vui.i_sar_height );
--- a/library/src/main/libenc/jni/libx264/output/mp4_lsmash.c
+++ b/library/src/main/libenc/jni/libx264/output/mp4_lsmash.c
@ -41,19 +41,25 @@

 /* For close_file() */
 #define MP4_LOG_IF_ERR( cond, ... )\
-if( cond )\
+do\
 {\
+    if( cond )\
+    {\
        MP4_LOG_ERROR( __VA_ARGS__ );\
-}
+    }\
+} while( 0 )

 /* For open_file() */
 #define MP4_FAIL_IF_ERR_EX( cond, ... )\
-if( cond )\
+do\
 {\
+    if( cond )\
+    {\
        remove_mp4_hnd( p_mp4 );\
        MP4_LOG_ERROR( __VA_ARGS__ );\
        return -1;\
-}
+    }\
+} while( 0 )

 /*******************/

--- a/library/src/main/libenc/jni/libx264/tools/checkasm.c
+++ b/library/src/main/libenc/jni/libx264/tools/checkasm.c
@ -29,6 +29,10 @@
 #include "common/common.h"
 #include "common/cpu.h"

+#ifdef _WIN32
+#include <windows.h>
+#endif
+
 // GCC doesn't align stack variables on ARM, so use .bss
 #if ARCH_ARM
 #undef ALIGNED_16
@ -1021,8 +1025,8 @@ static int check_dct( int cpu_ref, int cpu_new )
    x264_zigzag_function_t zigzag_ref[2];
    x264_zigzag_function_t zigzag_asm[2];

-    ALIGNED_16( dctcoef level1[64] );
-    ALIGNED_16( dctcoef level2[64] );
+    ALIGNED_ARRAY_16( dctcoef, level1,[64] );
+    ALIGNED_ARRAY_16( dctcoef, level2,[64] );

 #define TEST_ZIGZAG_SCAN( name, t1, t2, dct, size ) \
    if( zigzag_asm[interlace].name != zigzag_ref[interlace].name ) \
@ -2197,7 +2201,7 @@ static int check_quant( int cpu_ref, int cpu_new )
                int dmf = h->dequant4_mf[CQM_4IC][qpdc%6][0] << qpdc/6; \
                if( dmf > 32*64 ) \
                    continue; \
-                for( int i = 16; ; i <<= 1 ) \
+                for( int i = 16;; i <<= 1 ) \
                { \
                    int res_c, res_asm; \
                    int max = X264_MIN( i, PIXEL_MAX*16 ); \
--- a/library/src/main/libenc/jni/libx264/x264.c
+++ b/library/src/main/libenc/jni/libx264/x264.c
@ -355,10 +355,10 @@ int main( int argc, char **argv )
    cli_opt_t opt = {0};
    int ret = 0;

-    FAIL_IF_ERROR( x264_threading_init(), "unable to initialize threading\n" )
+    FAIL_IF_ERROR( x264_threading_init(), "unable to initialize threading\n" );

 #ifdef _WIN32
-    FAIL_IF_ERROR( !get_argv_utf8( &argc, &argv ), "unable to convert command line to UTF-8\n" )
+    FAIL_IF_ERROR( !get_argv_utf8( &argc, &argv ), "unable to convert command line to UTF-8\n" );

    GetConsoleTitleW( org_console_title, CONSOLE_TITLE_SIZE );
    _setmode( _fileno( stdin ),  _O_BINARY );
@ -1256,7 +1256,7 @@ static int select_input( const char *demuxer, char *used_demuxer, char *filename
            cli_input = raw_input;
        }

-        FAIL_IF_ERROR( !(*p_handle), "could not open input file `%s' via any method!\n", filename )
+        FAIL_IF_ERROR( !(*p_handle), "could not open input file `%s' via any method!\n", filename );
    }
    strcpy( used_demuxer, module );

@ -1432,17 +1432,17 @@ static int parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
                output_filename = optarg;
                break;
            case OPT_MUXER:
-                FAIL_IF_ERROR( parse_enum_name( optarg, muxer_names, &muxer ), "Unknown muxer `%s'\n", optarg )
+                FAIL_IF_ERROR( parse_enum_name( optarg, muxer_names, &muxer ), "Unknown muxer `%s'\n", optarg );
                break;
            case OPT_DEMUXER:
-                FAIL_IF_ERROR( parse_enum_name( optarg, demuxer_names, &demuxer ), "Unknown demuxer `%s'\n", optarg )
+                FAIL_IF_ERROR( parse_enum_name( optarg, demuxer_names, &demuxer ), "Unknown demuxer `%s'\n", optarg );
                break;
            case OPT_INDEX:
                input_opt.index_file = optarg;
                break;
            case OPT_QPFILE:
                opt->qpfile = x264_fopen( optarg, "rb" );
-                FAIL_IF_ERROR( !opt->qpfile, "can't open qpfile `%s'\n", optarg )
+                FAIL_IF_ERROR( !opt->qpfile, "can't open qpfile `%s'\n", optarg );
                if( !x264_is_regular_file( opt->qpfile ) )
                {
                    x264_cli_log( "x264", X264_LOG_ERROR, "qpfile incompatible with non-regular file `%s'\n", optarg );
@ -1493,13 +1493,13 @@ static int parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
                break;
            case OPT_TCFILE_OUT:
                opt->tcfile_out = x264_fopen( optarg, "wb" );
-                FAIL_IF_ERROR( !opt->tcfile_out, "can't open `%s'\n", optarg )
+                FAIL_IF_ERROR( !opt->tcfile_out, "can't open `%s'\n", optarg );
                break;
            case OPT_TIMEBASE:
                input_opt.timebase = optarg;
                break;
            case OPT_PULLDOWN:
-                FAIL_IF_ERROR( parse_enum_value( optarg, pulldown_names, &opt->i_pulldown ), "Unknown pulldown `%s'\n", optarg )
+                FAIL_IF_ERROR( parse_enum_value( optarg, pulldown_names, &opt->i_pulldown ), "Unknown pulldown `%s'\n", optarg );
                break;
            case OPT_VIDEO_FILTER:
                vid_filters = optarg;
@ -1520,7 +1520,7 @@ static int parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
                output_opt.use_dts_compress = 1;
                break;
            case OPT_OUTPUT_CSP:
-                FAIL_IF_ERROR( parse_enum_value( optarg, output_csp_names, &output_csp ), "Unknown output csp `%s'\n", optarg )
+                FAIL_IF_ERROR( parse_enum_value( optarg, output_csp_names, &output_csp ), "Unknown output csp `%s'\n", optarg );
                // correct the parsed value to the libx264 csp value
 #if X264_CHROMA_FORMAT
                static const uint8_t output_csp_fix[] = { X264_CHROMA_FORMAT, X264_CSP_RGB };
@ -1530,7 +1530,7 @@ static int parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
                param->i_csp = output_csp = output_csp_fix[output_csp];
                break;
            case OPT_INPUT_RANGE:
-                FAIL_IF_ERROR( parse_enum_value( optarg, range_names, &input_opt.input_range ), "Unknown input range `%s'\n", optarg )
+                FAIL_IF_ERROR( parse_enum_value( optarg, range_names, &input_opt.input_range ), "Unknown input range `%s'\n", optarg );
                input_opt.input_range += RANGE_AUTO;
                break;
            case OPT_RANGE:
@ -1577,11 +1577,11 @@ generic_option:

    /* Get the file name */
    FAIL_IF_ERROR( optind > argc - 1 || !output_filename, "No %s file. Run x264 --help for a list of options.\n",
-                   optind > argc - 1 ? "input" : "output" )
+                   optind > argc - 1 ? "input" : "output" );

    if( select_output( muxer, output_filename, param ) )
        return -1;
-    FAIL_IF_ERROR( cli_output.open_file( output_filename, &opt->hout, &output_opt ), "could not open output file `%s'\n", output_filename )
+    FAIL_IF_ERROR( cli_output.open_file( output_filename, &opt->hout, &output_opt ), "could not open output file `%s'\n", output_filename );

    input_filename = argv[optind++];
    video_info_t info = {0};
@ -1609,7 +1609,7 @@ generic_option:
        return -1;

    FAIL_IF_ERROR( !opt->hin && cli_input.open_file( input_filename, &opt->hin, &info, &input_opt ),
-                   "could not open input file `%s'\n", input_filename )
+                   "could not open input file `%s'\n", input_filename );

    x264_reduce_fraction( &info.sar_width, &info.sar_height );
    x264_reduce_fraction( &info.fps_num, &info.fps_den );
@ -1619,11 +1619,11 @@ generic_option:

    if( tcfile_name )
    {
-        FAIL_IF_ERROR( b_user_fps, "--fps + --tcfile-in is incompatible.\n" )
-        FAIL_IF_ERROR( timecode_input.open_file( tcfile_name, &opt->hin, &info, &input_opt ), "timecode input failed\n" )
+        FAIL_IF_ERROR( b_user_fps, "--fps + --tcfile-in is incompatible.\n" );
+        FAIL_IF_ERROR( timecode_input.open_file( tcfile_name, &opt->hin, &info, &input_opt ), "timecode input failed\n" );
        cli_input = timecode_input;
    }
-    else FAIL_IF_ERROR( !info.vfr && input_opt.timebase, "--timebase is incompatible with cfr input\n" )
+    else FAIL_IF_ERROR( !info.vfr && input_opt.timebase, "--timebase is incompatible with cfr input\n" );

    /* init threaded input while the information about the input video is unaltered by filtering */
 #if HAVE_THREAD
@ -1660,14 +1660,14 @@ generic_option:
        uint64_t i_user_timebase_num;
        uint64_t i_user_timebase_den;
        int ret = sscanf( input_opt.timebase, "%"SCNu64"/%"SCNu64, &i_user_timebase_num, &i_user_timebase_den );
-        FAIL_IF_ERROR( !ret, "invalid argument: timebase = %s\n", input_opt.timebase )
-        else if( ret == 1 )
+        FAIL_IF_ERROR( !ret, "invalid argument: timebase = %s\n", input_opt.timebase );
+        if( ret == 1 )
        {
            i_user_timebase_num = info.timebase_num;
            i_user_timebase_den = strtoul( input_opt.timebase, NULL, 10 );
        }
        FAIL_IF_ERROR( i_user_timebase_num > UINT32_MAX || i_user_timebase_den > UINT32_MAX,
-                       "timebase you specified exceeds H.264 maximum\n" )
+                       "timebase you specified exceeds H.264 maximum\n" );
        opt->timebase_convert_multiplier = ((double)i_user_timebase_den / info.timebase_den)
                                         * ((double)info.timebase_num / i_user_timebase_num);
        info.timebase_num = i_user_timebase_num;
@ -1719,7 +1719,7 @@ generic_option:
        if( input_opt.output_range == RANGE_AUTO )
            param->vui.b_fullrange = RANGE_PC;
        /* otherwise fail if they specified tv */
-        FAIL_IF_ERROR( !param->vui.b_fullrange, "RGB must be PC range" )
+        FAIL_IF_ERROR( !param->vui.b_fullrange, "RGB must be PC range" );
    }

    /* Automatically reduce reference frame count to match the user's target level
@ -1840,12 +1840,15 @@ static void convert_cli_to_lib_pic( x264_picture_t *lib, cli_pic_t *cli )
 }

 #define FAIL_IF_ERROR2( cond, ... )\
-if( cond )\
+do\
 {\
+    if( cond )\
+    {\
        x264_cli_log( "x264", X264_LOG_ERROR, __VA_ARGS__ );\
        retval = -1;\
        goto fail;\
-}
+    }\
+} while( 0 )

 static int encode( x264_param_t *param, cli_opt_t *opt )
 {
@ -1881,7 +1884,7 @@ static int encode( x264_param_t *param, cli_opt_t *opt )
        pulldown = &pulldown_values[opt->i_pulldown];
        param->i_timebase_num = param->i_fps_den;
        FAIL_IF_ERROR2( fmod( param->i_fps_num * pulldown->fps_factor, 1 ),
-                        "unsupported framerate for chosen pulldown\n" )
+                        "unsupported framerate for chosen pulldown\n" );
        param->i_timebase_den = param->i_fps_num * pulldown->fps_factor;
    }

@ -1896,7 +1899,7 @@ static int encode( x264_param_t *param, cli_opt_t *opt )

    /* ticks/frame = ticks/second / frames/second */
    ticks_per_frame = (int64_t)param->i_timebase_den * param->i_fps_den / param->i_timebase_num / param->i_fps_num;
-    FAIL_IF_ERROR2( ticks_per_frame < 1 && !param->b_vfr_input, "ticks_per_frame invalid: %"PRId64"\n", ticks_per_frame )
+    FAIL_IF_ERROR2( ticks_per_frame < 1 && !param->b_vfr_input, "ticks_per_frame invalid: %"PRId64"\n", ticks_per_frame );
    ticks_per_frame = X264_MAX( ticks_per_frame, 1 );

    if( !param->b_repeat_headers )
@ -1905,7 +1908,7 @@ static int encode( x264_param_t *param, cli_opt_t *opt )
        x264_nal_t *headers;
        int i_nal;

-        FAIL_IF_ERROR2( x264_encoder_headers( h, &headers, &i_nal ) < 0, "x264_encoder_headers failed\n" )
+        FAIL_IF_ERROR2( x264_encoder_headers( h, &headers, &i_nal ) < 0, "x264_encoder_headers failed\n" );
        FAIL_IF_ERROR2( (i_file = cli_output.write_headers( opt->hout, headers )) < 0, "error writing headers to output file\n" );
    }

--- a/library/src/main/libenc/jni/libx264/x264_config.h
+++ b/library/src/main/libenc/jni/libx264/x264_config.h
@ -1,6 +0,0 @@
-#define X264_BIT_DEPTH     8
-#define X264_GPL           1
-#define X264_INTERLACED    1
-#define X264_CHROMA_FORMAT 0
-#define X264_VERSION ""
-#define X264_POINTVER "0.148.x"
--- a/library/src/main/libenc/jni/libx264/x264cli.h
+++ b/library/src/main/libenc/jni/libx264/x264cli.h
@ -71,11 +71,14 @@ int x264_ansi_filename( const char *filename, char *ansi_filename, int size, int
 #endif

 #define RETURN_IF_ERR( cond, name, ret, ... )\
-if( cond )\
+do\
 {\
+    if( cond )\
+    {\
        x264_cli_log( name, X264_LOG_ERROR, __VA_ARGS__ );\
        return ret;\
-}
+    }\
+} while( 0 )

 #define FAIL_IF_ERR( cond, name, ... ) RETURN_IF_ERR( cond, name, -1, __VA_ARGS__ )

--- a/library/src/main/libenc/jni/libx264/x264res.rc
+++ b/library/src/main/libenc/jni/libx264/x264res.rc
@ -0,0 +1,82 @@
+/*****************************************************************************
+ * x264res.rc: windows resource file
+ *****************************************************************************
+ * Copyright (C) 2012-2016 x264 project
+ *
+ * Authors: Henrik Gramner <henrik@gramner.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#include <windows.h>
+#include <stdint.h>
+#include "x264.h"
+
+#ifndef X264_REV
+#define X264_REV 0
+#define X264_REV_DIFF 0
+#endif
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+VS_VERSION_INFO VERSIONINFO
+FILEVERSION     0, X264_BUILD, X264_REV, X264_REV_DIFF
+PRODUCTVERSION  0, X264_BUILD, X264_REV, X264_REV_DIFF
+FILEFLAGSMASK   VS_FFI_FILEFLAGSMASK
+#ifdef DEBUG
+FILEFLAGS       VS_FF_DEBUG
+#else
+FILEFLAGS       0
+#endif
+FILEOS          VOS_NT_WINDOWS32 /* Identical for x86-64 */
+#ifdef DLL
+FILETYPE        VFT_DLL
+#else
+FILETYPE        VFT_APP
+#endif
+FILESUBTYPE     VFT2_UNKNOWN
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904B0"
+        BEGIN
+            VALUE "CompanyName",      "x264 project"
+#ifdef DLL
+            VALUE "FileDescription",  "H.264 (MPEG-4 AVC) encoder library"
+#else
+            VALUE "FileDescription",  "H.264 (MPEG-4 AVC) encoder"
+#endif
+            VALUE "FileVersion",      X264_POINTVER
+            VALUE "InternalName",     "x264"
+            VALUE "LegalCopyright",   "Copyright (C) 2003-2016 x264 project"
+#ifdef DLL
+            VALUE "OriginalFilename", "libx264-" xstr(X264_BUILD) ".dll"
+#else
+            VALUE "OriginalFilename", "x264.exe"
+#endif
+            VALUE "ProductName",      "x264"
+            VALUE "ProductVersion",   X264_POINTVER
+        END
+    END
+
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x0409, 0x04B0 /* U.S. English (Unicode) */
+    END
+END