RGBA >> YUV420SP
- C/C++ 朴素实现版
void encodeYUV420SP_CPU(unsigned char *__restrict__ yuv420sp, unsigned char *__restrict__ argb, int width, int height) { int frameSize = width * height; int yIndex = 0; int uvIndex = frameSize; for (int j = 0; j < height; j++) { for (int i = 0; i < width; i++) { uint8_t R = argb[2]; uint8_t G = argb[1]; uint8_t B = argb[0]; argb += 4; // well known RGB to YUV algorithm uint8_t Y = ((66 * R + 129 * G + 25 * B + 128) >> 8) + 16; uint8_t U = ((-38 * R - 74 * G + 112 * B + 128) >> 8) + 128; uint8_t V = ((112 * R - 94 * G - 18 * B + 128) >> 8) + 128; // NV21有一个 Y 平面和 V-U 交叉平面,每一个平面的采样值都是 2 // 意思是每4个 Y 像素(上下左右,不是横向连续的四个)对应1个 V 和1个 U // 像素和其他扫描线。 yuv420sp[yIndex++] = Y; if (j % 2 == 0 && i % 2 == 0) { yuv420sp[uvIndex++] = V; yuv420sp[uvIndex++] = U; } } } }
- Neon 实现版(Github上找的)
void encodeYUV420SP_NEON_Intrinsics(unsigned char *__restrict__ yuv420sp, unsigned char *__restrict__ argb, int width, int height) { const uint16x8_t u16_rounding = vdupq_n_u16(128); const int16x8_t s16_rounding = vdupq_n_s16(128); // +128, u/v 中内层的 +128 const int8x8_t s8_rounding = vdup_n_s8( 128); // -128,即 0x80,最高成了符号位,实际只有 7 位用来表示数字,用来处理符号位, u/v 中外层的 +128 const uint8x16_t offset = vdupq_n_u8(16); const uint16x8_t mask = vdupq_n_s16(255); // 测试 // int16x8_t test = vaddl_s8 (s8_rounding, s8_rounding);// -256 // int8x8_t test_0 = vdup_n_s8(127); // 正常为 127 // int8x8_t test_1 = vadd_s8(test_0, test_0); // -2,因为计算溢出到符号位 int frameSize = width * height; int yIndex = 0; int uvIndex = frameSize; int i; int j; for (j = 0; j < height; j++) { for (i = 0; i < width >> 4; i++) { // Load rgb uint8x16x4_t pixel_argb = vld4q_u8(argb); argb += 4 * 16; uint8x8x2_t uint8_r; uint8x8x2_t uint8_g; uint8x8x2_t uint8_b; uint8_r.val[0] = vget_low_u8(pixel_argb.val[2]); uint8_r.val[1] = vget_high_u8(pixel_argb.val[2]); uint8_g.val[0] = vget_low_u8(pixel_argb.val[1]); uint8_g.val[1] = vget_high_u8(pixel_argb.val[1]); uint8_b.val[0] = vget_low_u8(pixel_argb.val[0]); uint8_b.val[1] = vget_high_u8(pixel_argb.val[0]); // Y = ((66 * R + 129 * G + 25 * B + 128) >> 8) + 16; uint16x8x2_t uint16_y; uint8x8_t scalar = vdup_n_u8(66); uint8x16_t y; uint16_y.val[0] = vmull_u8(uint8_r.val[0], scalar); uint16_y.val[1] = vmull_u8(uint8_r.val[1], scalar); scalar = vdup_n_u8(129); uint16_y.val[0] = vmlal_u8(uint16_y.val[0], uint8_g.val[0], scalar); uint16_y.val[1] = vmlal_u8(uint16_y.val[1], uint8_g.val[1], scalar); scalar = vdup_n_u8(25); uint16_y.val[0] = vmlal_u8(uint16_y.val[0], uint8_b.val[0], scalar); uint16_y.val[1] = vmlal_u8(uint16_y.val[1], uint8_b.val[1], scalar); uint16_y.val[0] = vaddq_u16(uint16_y.val[0], u16_rounding); uint16_y.val[1] = vaddq_u16(uint16_y.val[1], u16_rounding); y = vcombine_u8(vqshrn_n_u16(uint16_y.val[0], 8), vqshrn_n_u16(uint16_y.val[1], 8)); y = vaddq_u8(y, offset); vst1q_u8(yuv420sp + yIndex, y); yIndex += 16; // 在偶数行中计算 U 和 V if (j % 2 == 0) { // uint8_t U = ((-38 * R - 74 * G + 112 * B + 128) >> 8) + 128; // uint8_t V = ((112 * R - 94 * G - 18 * B + 128) >> 8) + 128; int16x8_t u_scalar = vdupq_n_s16(-38); int16x8_t v_scalar = vdupq_n_s16(112); #if 1 // 因为 u,v 的值只有 y 的一半,所以只取高位计算 int16x8_t r = vreinterpretq_s16_u16( vandq_u16(vreinterpretq_u16_u8(pixel_argb.val[2]), mask)); // 测试 // uint16x8_t test_0 = vreinterpretq_u16_u8(pixel_argb.val[2]); // uint16x8_t test_1 = vandq_u16(test_0, mask); // int16x8_t test_2 = vreinterpretq_s16_u16(test_1); int16x8_t g = vreinterpretq_s16_u16( vandq_u16(vreinterpretq_u16_u8(pixel_argb.val[1]), mask)); int16x8_t b = vreinterpretq_s16_u16( vandq_u16(vreinterpretq_u16_u8(pixel_argb.val[0]), mask)); #else // 两者效果是一样的,但是没第一种快 int16x8_t r = vreinterpretq_s16_u16(vmovl_u8( vqshrn_n_u16(vshlq_n_u16(vreinterpretq_u16_u8(pixel_argb.val[2]), 8), 8))); int16x8_t g = vreinterpretq_s16_u16(vmovl_u8( vqshrn_n_u16(vshlq_n_u16(vreinterpretq_u16_u8(pixel_argb.val[1]), 8), 8))); int16x8_t b = vreinterpretq_s16_u16(vmovl_u8( vqshrn_n_u16(vshlq_n_u16(vreinterpretq_u16_u8(pixel_argb.val[0]), 8), 8))); // vshlq_n_u16,结果是 uint16x8_t // vqshrn_n_u16,结果是 uint8x8_t // vmovl_u8,结果是 uint16x8_t #endif int16x8_t u; int16x8_t v; uint8x8x2_t uv; u = vmulq_s16(r, u_scalar); v = vmulq_s16(r, v_scalar); u_scalar = vdupq_n_s16(-74); v_scalar = vdupq_n_s16(-94); u = vmlaq_s16(u, g, u_scalar); v = vmlaq_s16(v, g, v_scalar); u_scalar = vdupq_n_s16(112); v_scalar = vdupq_n_s16(-18); u = vmlaq_s16(u, b, u_scalar); v = vmlaq_s16(v, b, v_scalar); u = vaddq_s16(u, s16_rounding); v = vaddq_s16(v, s16_rounding); uv.val[1] = vreinterpret_u8_s8(vadd_s8(vqshrn_n_s16(u, 8), s8_rounding)); // 测试 // int8x8_t test_3 = vqshrn_n_s16(u, 8); // int8x8_t test_4 = vadd_s8(test_3, s8_rounding); // // uint8x8_t test_5 = vreinterpret_u8_s8(test_4); uv.val[0] = vreinterpret_u8_s8(vadd_s8(vqshrn_n_s16(v, 8), s8_rounding)); vst2_u8(yuv420sp + uvIndex, uv); uvIndex += 2 * 8; } } // 处理余数的好办法 for (i = ((width >> 4) << 4); i < width; i++) { uint8_t R = argb[2]; uint8_t G = argb[1]; uint8_t B = argb[0]; argb += 4; // well known RGB to YUV algorithm uint8_t Y = ((66 * R + 129 * G + 25 * B + 128) >> 8) + 16; uint8_t U = ((-38 * R - 74 * G + 112 * B + 128) >> 8) + 128; uint8_t V = ((112 * R - 94 * G - 18 * B + 128) >> 8) + 128; // NV21有一个 Y 平面和 V-U 交叉平面,每一个平面的采样值都是 2 // 意思是每4个 Y 像素(上下左右,不是横向连续的四个)对应1个 V 和1个 U // 像素和其他扫描线。 yuv420sp[yIndex++] = Y; if (j % 2 == 0 && i % 2 == 0) { yuv420sp[uvIndex++] = V; yuv420sp[uvIndex++] = U; } } } }
通过 objdump 生成 so 库的反汇编
命令行如下:
>J:\Programs\Android\sdk\ndk-bundle\toolchains\aarch64-linux-android-4.9\prebuilt\windows-x86_64\bin\aarch64-linux-android-objdump.exe -d libnative-lib.so > objdump_d.txt
一. arm-linux-objdump
常用来显示二进制文件信息,常用来查看反汇编代码
二. 常用选项:
1.-b bfdname 指定目标码格式
2.—disassemble或者-d 反汇编可执行段
3.—dissassemble-all或者-D 反汇编所有段
4.-EB,-EL指定字节序
5.—file-headers或者-f 显示文件的整体头部摘要信息
6.—section-headers,--headers或者-h 显示目标文件中各个段的头部摘要信息
7.—info 或者-I 显示支持的目标文件格式和CPU架构
8.—section=name或者-j name显示指定section 的信息
9.—architecture=machine或者-m machine 指定反汇编目标文件时使用的架构
三. 示例
Arm-linux-objdump –D elf_file > dis_file 或者
Arm-linux-objdump –D –b binary –m arm bin_file > dis_file
- C/C++ 实现版:
000410dc <_Z18encodeYUV420SP_CPUPhS_ii>: 410dc: b5b0 push {r4, r5, r7, lr} 410de: af02 add r7, sp, #8 410e0: b08f sub sp, #60 ; 0x3c 410e2: 469c mov ip, r3 410e4: 4696 mov lr, r2 410e6: 460c mov r4, r1 410e8: 4605 mov r5, r0 410ea: 900e str r0, [sp, #56] ; 0x38 410ec: 910d str r1, [sp, #52] ; 0x34 410ee: 920c str r2, [sp, #48] ; 0x30 410f0: 930b str r3, [sp, #44] ; 0x2c 410f2: 980c ldr r0, [sp, #48] ; 0x30 410f4: 990b ldr r1, [sp, #44] ; 0x2c 410f6: 4348 muls r0, r1 410f8: 900a str r0, [sp, #40] ; 0x28 410fa: 2000 movs r0, #0 410fc: 9009 str r0, [sp, #36] ; 0x24 410fe: 990a ldr r1, [sp, #40] ; 0x28 41100: 9108 str r1, [sp, #32] 41102: 9007 str r0, [sp, #28] 41104: f8cd c00c str.w ip, [sp, #12] 41108: f8cd e008 str.w lr, [sp, #8] 4110c: 9401 str r4, [sp, #4] 4110e: 9500 str r5, [sp, #0] 41110: e7ff b.n 41112 <_Z18encodeYUV420SP_CPUPhS_ii+0x36> 41112: 9807 ldr r0, [sp, #28] 41114: 990b ldr r1, [sp, #44] ; 0x2c 41116: 4288 cmp r0, r1 41118: f280 808c bge.w 41234 <_Z18encodeYUV420SP_CPUPhS_ii+0x158> 4111c: e7ff b.n 4111e <_Z18encodeYUV420SP_CPUPhS_ii+0x42> 4111e: 2000 movs r0, #0 41120: 9006 str r0, [sp, #24] 41122: e7ff b.n 41124 <_Z18encodeYUV420SP_CPUPhS_ii+0x48> 41124: 9806 ldr r0, [sp, #24] 41126: 990c ldr r1, [sp, #48] ; 0x30 41128: 4288 cmp r0, r1 4112a: da7e bge.n 4122a <_Z18encodeYUV420SP_CPUPhS_ii+0x14e> 4112c: e7ff b.n 4112e <_Z18encodeYUV420SP_CPUPhS_ii+0x52> 4112e: 980d ldr r0, [sp, #52] ; 0x34 41130: 7880 ldrb r0, [r0, #2] 41132: f807 0c2d strb.w r0, [r7, #-45] 41136: 980d ldr r0, [sp, #52] ; 0x34 41138: 7840 ldrb r0, [r0, #1] 4113a: f807 0c2e strb.w r0, [r7, #-46] 4113e: 980d ldr r0, [sp, #52] ; 0x34 41140: 7800 ldrb r0, [r0, #0] 41142: f807 0c2f strb.w r0, [r7, #-47] 41146: 980d ldr r0, [sp, #52] ; 0x34 41148: 3004 adds r0, #4 4114a: 900d str r0, [sp, #52] ; 0x34 4114c: f817 0c2d ldrb.w r0, [r7, #-45] 41150: eb00 1040 add.w r0, r0, r0, lsl #5 41154: f817 1c2e ldrb.w r1, [r7, #-46] 41158: eb01 11c1 add.w r1, r1, r1, lsl #7 4115c: eb01 0040 add.w r0, r1, r0, lsl #1 41160: f817 1c2f ldrb.w r1, [r7, #-47] 41164: 2219 movs r2, #25 41166: fb11 0002 smlabb r0, r1, r2, r0 4116a: 3080 adds r0, #128 ; 0x80 4116c: 2110 movs r1, #16 4116e: eb01 2010 add.w r0, r1, r0, lsr #8 41172: f88d 0014 strb.w r0, [sp, #20] 41176: f817 0c2d ldrb.w r0, [r7, #-45] 4117a: f06f 0125 mvn.w r1, #37 ; 0x25 4117e: fb10 f001 smulbb r0, r0, r1 41182: f817 1c2e ldrb.w r1, [r7, #-46] 41186: 224a movs r2, #74 ; 0x4a 41188: fb01 0012 mls r0, r1, r2, r0 4118c: f817 1c2f ldrb.w r1, [r7, #-47] 41190: ebc1 01c1 rsb r1, r1, r1, lsl #3 41194: eb00 1001 add.w r0, r0, r1, lsl #4 41198: 3080 adds r0, #128 ; 0x80 4119a: 2180 movs r1, #128 ; 0x80 4119c: eb01 2010 add.w r0, r1, r0, lsr #8 411a0: f807 0c31 strb.w r0, [r7, #-49] 411a4: f817 0c2d ldrb.w r0, [r7, #-45] 411a8: ebc0 00c0 rsb r0, r0, r0, lsl #3 411ac: f817 2c2e ldrb.w r2, [r7, #-46] 411b0: 235e movs r3, #94 ; 0x5e 411b2: fb12 f203 smulbb r2, r2, r3 411b6: ebc2 1000 rsb r0, r2, r0, lsl #4 411ba: f817 2c2f ldrb.w r2, [r7, #-47] 411be: eb02 02c2 add.w r2, r2, r2, lsl #3 411c2: eba0 0042 sub.w r0, r0, r2, lsl #1 411c6: 3080 adds r0, #128 ; 0x80 411c8: eb01 2010 add.w r0, r1, r0, lsr #8 411cc: f807 0c32 strb.w r0, [r7, #-50] 411d0: f89d 0014 ldrb.w r0, [sp, #20] 411d4: 990e ldr r1, [sp, #56] ; 0x38 411d6: 9a09 ldr r2, [sp, #36] ; 0x24 411d8: 1c53 adds r3, r2, #1 411da: 9309 str r3, [sp, #36] ; 0x24 411dc: 5488 strb r0, [r1, r2] 411de: 9807 ldr r0, [sp, #28] 411e0: eb00 71d0 add.w r1, r0, r0, lsr #31 411e4: f021 0101 bic.w r1, r1, #1 411e8: 1a40 subs r0, r0, r1 411ea: 2800 cmp r0, #0 411ec: d118 bne.n 41220 <_Z18encodeYUV420SP_CPUPhS_ii+0x144> 411ee: e7ff b.n 411f0 <_Z18encodeYUV420SP_CPUPhS_ii+0x114> 411f0: 9806 ldr r0, [sp, #24] 411f2: eb00 71d0 add.w r1, r0, r0, lsr #31 411f6: f021 0101 bic.w r1, r1, #1 411fa: 1a40 subs r0, r0, r1 411fc: 2800 cmp r0, #0 411fe: d10f bne.n 41220 <_Z18encodeYUV420SP_CPUPhS_ii+0x144> 41200: e7ff b.n 41202 <_Z18encodeYUV420SP_CPUPhS_ii+0x126> 41202: f817 0c32 ldrb.w r0, [r7, #-50] 41206: 990e ldr r1, [sp, #56] ; 0x38 41208: 9a08 ldr r2, [sp, #32] 4120a: 1c53 adds r3, r2, #1 4120c: 9308 str r3, [sp, #32] 4120e: 5488 strb r0, [r1, r2] 41210: f817 0c31 ldrb.w r0, [r7, #-49] 41214: 990e ldr r1, [sp, #56] ; 0x38 41216: 9a08 ldr r2, [sp, #32] 41218: 1c53 adds r3, r2, #1 4121a: 9308 str r3, [sp, #32] 4121c: 5488 strb r0, [r1, r2] 4121e: e7ff b.n 41220 <_Z18encodeYUV420SP_CPUPhS_ii+0x144> 41220: e7ff b.n 41222 <_Z18encodeYUV420SP_CPUPhS_ii+0x146> 41222: 9806 ldr r0, [sp, #24] 41224: 3001 adds r0, #1 41226: 9006 str r0, [sp, #24] 41228: e77c b.n 41124 <_Z18encodeYUV420SP_CPUPhS_ii+0x48> 4122a: e7ff b.n 4122c <_Z18encodeYUV420SP_CPUPhS_ii+0x150> 4122c: 9807 ldr r0, [sp, #28] 4122e: 3001 adds r0, #1 41230: 9007 str r0, [sp, #28] 41232: e76e b.n 41112 <_Z18encodeYUV420SP_CPUPhS_ii+0x36> 41234: b00f add sp, #60 ; 0x3c 41236: bdb0 pop {r4, r5, r7, pc}
- Neon 实现版:
00041238 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii>: 41238: b5f0 push {r4, r5, r6, r7, lr} 4123a: af03 add r7, sp, #12 4123c: e92d 0b00 stmdb sp!, {r8, r9, fp} 41240: f5ad 6d35 sub.w sp, sp, #2896 ; 0xb50 41244: 466c mov r4, sp 41246: f36f 0403 bfc r4, #0, #4 4124a: 46a5 mov sp, r4 4124c: f50d 6cd6 add.w ip, sp, #1712 ; 0x6b0 41250: f10d 0e30 add.w lr, sp, #48 ; 0x30 41254: 461c mov r4, r3 41256: 4615 mov r5, r2 41258: 460e mov r6, r1 4125a: 4680 mov r8, r0 4125c: f8df 9c0c ldr.w r9, [pc, #3084] ; 41e6c <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc34> 41260: 44f9 add r9, pc 41262: f8d9 9000 ldr.w r9, [r9] 41266: f8d9 9000 ldr.w r9, [r9] 4126a: f8cd 9024 str.w r9, [sp, #36] ; 0x24 4126e: 906f str r0, [sp, #444] ; 0x1bc 41270: 916e str r1, [sp, #440] ; 0x1b8 41272: 926d str r2, [sp, #436] ; 0x1b4 41274: 936c str r3, [sp, #432] ; 0x1b0 41276: 2080 movs r0, #128 ; 0x80 41278: f8ad 01ee strh.w r0, [sp, #494] ; 0x1ee 4127c: f50d 71f7 add.w r1, sp, #494 ; 0x1ee 41280: f9e1 0c7f vld1.16 {d16[]-d17[]}, [r1 :16] 41284: a970 add r1, sp, #448 ; 0x1c0 41286: f941 0aef vst1.64 {d16-d17}, [r1 :128] 4128a: f961 0aef vld1.64 {d16-d17}, [r1 :128] 4128e: a974 add r1, sp, #464 ; 0x1d0 41290: f941 0aef vst1.64 {d16-d17}, [r1 :128] 41294: f961 0aef vld1.64 {d16-d17}, [r1 :128] 41298: a968 add r1, sp, #416 ; 0x1a0 4129a: f941 0aef vst1.64 {d16-d17}, [r1 :128] 4129e: f8ad 0a4e strh.w r0, [sp, #2638] ; 0xa4e 412a2: f60d 214e addw r1, sp, #2638 ; 0xa4e 412a6: f9e1 0c7f vld1.16 {d16[]-d17[]}, [r1 :16] 412aa: f50d 6122 add.w r1, sp, #2592 ; 0xa20 412ae: f941 0aef vst1.64 {d16-d17}, [r1 :128] 412b2: f961 0aef vld1.64 {d16-d17}, [r1 :128] 412b6: f50d 6123 add.w r1, sp, #2608 ; 0xa30 412ba: f941 0aef vst1.64 {d16-d17}, [r1 :128] 412be: f961 0aef vld1.64 {d16-d17}, [r1 :128] 412c2: a964 add r1, sp, #400 ; 0x190 412c4: f941 0aef vst1.64 {d16-d17}, [r1 :128] 412c8: f88d 0a1f strb.w r0, [sp, #2591] ; 0xa1f 412cc: f60d 201f addw r0, sp, #2591 ; 0xa1f 412d0: f9e0 2c0f vld1.8 {d18[]}, [r0] 412d4: edcc 2bd6 vstr d18, [ip, #856] ; 0x358 412d8: eddc 2bd6 vldr d18, [ip, #856] ; 0x358 412dc: edcc 2bd8 vstr d18, [ip, #864] ; 0x360 412e0: eddc 2bd8 vldr d18, [ip, #864] ; 0x360 412e4: edce 2b56 vstr d18, [lr, #344] ; 0x158 412e8: 2010 movs r0, #16 412ea: f88d 0a07 strb.w r0, [sp, #2567] ; 0xa07 412ee: f60d 2007 addw r0, sp, #2567 ; 0xa07 412f2: f9e0 0c2f vld1.8 {d16[]-d17[]}, [r0] 412f6: f50d 601e add.w r0, sp, #2528 ; 0x9e0 412fa: f940 0aef vst1.64 {d16-d17}, [r0 :128] 412fe: f960 0aef vld1.64 {d16-d17}, [r0 :128] 41302: f50d 601f add.w r0, sp, #2544 ; 0x9f0 41306: f940 0aef vst1.64 {d16-d17}, [r0 :128] 4130a: f960 0aef vld1.64 {d16-d17}, [r0 :128] 4130e: a85c add r0, sp, #368 ; 0x170 41310: f940 0aef vst1.64 {d16-d17}, [r0 :128] 41314: 20ff movs r0, #255 ; 0xff 41316: f8ad 09de strh.w r0, [sp, #2526] ; 0x9de 4131a: f60d 10de addw r0, sp, #2526 ; 0x9de 4131e: f9e0 0c7f vld1.16 {d16[]-d17[]}, [r0 :16] 41322: f50d 601b add.w r0, sp, #2480 ; 0x9b0 41326: f940 0aef vst1.64 {d16-d17}, [r0 :128] 4132a: f960 0aef vld1.64 {d16-d17}, [r0 :128] 4132e: f50d 601c add.w r0, sp, #2496 ; 0x9c0 41332: f940 0aef vst1.64 {d16-d17}, [r0 :128] 41336: f960 0aef vld1.64 {d16-d17}, [r0 :128] 4133a: a858 add r0, sp, #352 ; 0x160 4133c: f940 0aef vst1.64 {d16-d17}, [r0 :128] 41340: 986d ldr r0, [sp, #436] ; 0x1b4 41342: 996c ldr r1, [sp, #432] ; 0x1b0 41344: 4348 muls r0, r1 41346: 9057 str r0, [sp, #348] ; 0x15c 41348: 2000 movs r0, #0 4134a: 9056 str r0, [sp, #344] ; 0x158 4134c: 9957 ldr r1, [sp, #348] ; 0x15c 4134e: 9155 str r1, [sp, #340] ; 0x154 41350: 9053 str r0, [sp, #332] ; 0x14c 41352: f8cd c020 str.w ip, [sp, #32] 41356: f8cd e01c str.w lr, [sp, #28] 4135a: 9406 str r4, [sp, #24] 4135c: 9505 str r5, [sp, #20] 4135e: 9604 str r6, [sp, #16] 41360: f8cd 800c str.w r8, [sp, #12] 41364: e7ff b.n 41366 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x12e> 41366: 9853 ldr r0, [sp, #332] ; 0x14c 41368: 996c ldr r1, [sp, #432] ; 0x1b0 4136a: 4288 cmp r0, r1 4136c: f280 856d bge.w 41e4a <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc12> 41370: e7ff b.n 41372 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x13a> 41372: 2000 movs r0, #0 41374: 9054 str r0, [sp, #336] ; 0x150 41376: e7ff b.n 41378 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x140> 41378: 9854 ldr r0, [sp, #336] ; 0x150 4137a: 996d ldr r1, [sp, #436] ; 0x1b4 4137c: ebb0 1f21 cmp.w r0, r1, asr #4 41380: f280 84d5 bge.w 41d2e <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaf6> 41384: e7ff b.n 41386 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x14e> 41386: 986e ldr r0, [sp, #440] ; 0x1b8 41388: f960 010d vld4.8 {d16,d18,d20,d22}, [r0]! 4138c: f960 110f vld4.8 {d17,d19,d21,d23}, [r0] 41390: ef66 81f6 vorr q12, q11, q11 41394: ef62 a1f2 vorr q13, q9, q9 41398: ef64 c1f4 vorr q14, q10, q10 4139c: ef60 e1f0 vorr q15, q8, q8 413a0: f50d 602c add.w r0, sp, #2752 ; 0xac0 413a4: f940 eacf vst1.64 {d30-d31}, [r0] 413a8: f100 0120 add.w r1, r0, #32 413ac: f941 cacf vst1.64 {d28-d29}, [r1] 413b0: 4602 mov r2, r0 413b2: f962 caed vld1.64 {d28-d29}, [r2 :128]! 413b6: f942 aacf vst1.64 {d26-d27}, [r2] 413ba: 3030 adds r0, #48 ; 0x30 413bc: f940 8acf vst1.64 {d24-d25}, [r0] 413c0: f50d 6330 add.w r3, sp, #2816 ; 0xb00 413c4: f103 0c20 add.w ip, r3, #32 413c8: f961 8aef vld1.64 {d24-d25}, [r1 :128] 413cc: f94c 8aef vst1.64 {d24-d25}, [ip :128] 413d0: 4619 mov r1, r3 413d2: f941 caed vst1.64 {d28-d29}, [r1 :128]! 413d6: f962 8aef vld1.64 {d24-d25}, [r2 :128] 413da: f941 8aef vst1.64 {d24-d25}, [r1 :128] 413de: f103 0230 add.w r2, r3, #48 ; 0x30 413e2: f960 8aef vld1.64 {d24-d25}, [r0 :128] 413e6: f942 8aef vst1.64 {d24-d25}, [r2 :128] 413ea: 986e ldr r0, [sp, #440] ; 0x1b8 413ec: 3040 adds r0, #64 ; 0x40 413ee: 906e str r0, [sp, #440] ; 0x1b8 413f0: f96c 8aef vld1.64 {d24-d25}, [ip :128] 413f4: f50d 601a add.w r0, sp, #2464 ; 0x9a0 413f8: f940 8aef vst1.64 {d24-d25}, [r0 :128] 413fc: f960 8aef vld1.64 {d24-d25}, [r0 :128] 41400: eeb0 0b68 vmov.f64 d0, d24 41404: 9808 ldr r0, [sp, #32] 41406: ed80 0bba vstr d0, [r0, #744] ; 0x2e8 4140a: ed90 0bba vldr d0, [r0, #744] ; 0x2e8 4140e: f50d 622b add.w r2, sp, #2736 ; 0xab0 41412: f902 071d vst1.8 {d0}, [r2 :64]! 41416: f96c 8aef vld1.64 {d24-d25}, [ip :128] 4141a: f50d 6c18 add.w ip, sp, #2432 ; 0x980 4141e: f94c 8aef vst1.64 {d24-d25}, [ip :128] 41422: f96c 8aef vld1.64 {d24-d25}, [ip :128] 41426: eeb0 0b69 vmov.f64 d0, d25 4142a: ed80 0bb2 vstr d0, [r0, #712] ; 0x2c8 4142e: ed90 0bb2 vldr d0, [r0, #712] ; 0x2c8 41432: ed82 0b00 vstr d0, [r2] 41436: f961 8aef vld1.64 {d24-d25}, [r1 :128] 4143a: f50d 6c16 add.w ip, sp, #2400 ; 0x960 4143e: f94c 8aef vst1.64 {d24-d25}, [ip :128] 41442: f96c 8aef vld1.64 {d24-d25}, [ip :128] 41446: eeb0 0b68 vmov.f64 d0, d24 4144a: ed80 0baa vstr d0, [r0, #680] ; 0x2a8 4144e: ed90 0baa vldr d0, [r0, #680] ; 0x2a8 41452: f50d 6c2a add.w ip, sp, #2720 ; 0xaa0 41456: f90c 071d vst1.8 {d0}, [ip :64]! 4145a: f961 8aef vld1.64 {d24-d25}, [r1 :128] 4145e: f50d 6114 add.w r1, sp, #2368 ; 0x940 41462: f941 8aef vst1.64 {d24-d25}, [r1 :128] 41466: f961 8aef vld1.64 {d24-d25}, [r1 :128] 4146a: eeb0 0b69 vmov.f64 d0, d25 4146e: ed80 0ba2 vstr d0, [r0, #648] ; 0x288 41472: ed90 0ba2 vldr d0, [r0, #648] ; 0x288 41476: ed8c 0b00 vstr d0, [ip] 4147a: f963 8aef vld1.64 {d24-d25}, [r3 :128] 4147e: f50d 6112 add.w r1, sp, #2336 ; 0x920 41482: f941 8aef vst1.64 {d24-d25}, [r1 :128] 41486: f961 8aef vld1.64 {d24-d25}, [r1 :128] 4148a: eeb0 0b68 vmov.f64 d0, d24 4148e: ed80 0b9a vstr d0, [r0, #616] ; 0x268 41492: ed90 0b9a vldr d0, [r0, #616] ; 0x268 41496: f50d 6129 add.w r1, sp, #2704 ; 0xa90 4149a: f901 071d vst1.8 {d0}, [r1 :64]! 4149e: f963 8aef vld1.64 {d24-d25}, [r3 :128] 414a2: f50d 6310 add.w r3, sp, #2304 ; 0x900 414a6: f943 8aef vst1.64 {d24-d25}, [r3 :128] 414aa: f963 8aef vld1.64 {d24-d25}, [r3 :128] 414ae: eeb0 0b69 vmov.f64 d0, d25 414b2: ed80 0b92 vstr d0, [r0, #584] ; 0x248 414b6: ed90 0b92 vldr d0, [r0, #584] ; 0x248 414ba: ed81 0b00 vstr d0, [r1] 414be: 2342 movs r3, #66 ; 0x42 414c0: f88d 38f7 strb.w r3, [sp, #2295] ; 0x8f7 414c4: f60d 03f7 addw r3, sp, #2295 ; 0x8f7 414c8: f9a3 0c0f vld1.8 {d0[]}, [r3] 414cc: ed80 0b8c vstr d0, [r0, #560] ; 0x230 414d0: ed90 0b8c vldr d0, [r0, #560] ; 0x230 414d4: ed80 0b8e vstr d0, [r0, #568] ; 0x238 414d8: ed90 0b8e vldr d0, [r0, #568] ; 0x238 414dc: 9b07 ldr r3, [sp, #28] 414de: ed83 0b44 vstr d0, [r3, #272] ; 0x110 414e2: f50d 6400 add.w r4, sp, #2048 ; 0x800 414e6: ed94 0bac vldr d0, [r4, #688] ; 0x2b0 414ea: ed93 1b44 vldr d1, [r3, #272] ; 0x110 414ee: ed80 0b8a vstr d0, [r0, #552] ; 0x228 414f2: ed80 1b88 vstr d1, [r0, #544] ; 0x220 414f6: ed90 0b8a vldr d0, [r0, #552] ; 0x228 414fa: ed90 1b88 vldr d1, [r0, #544] ; 0x220 414fe: ffc0 8c01 vmull.u8 q12, d0, d1 41502: f50d 6e0c add.w lr, sp, #2240 ; 0x8c0 41506: f94e 8aef vst1.64 {d24-d25}, [lr :128] 4150a: f96e 8aef vld1.64 {d24-d25}, [lr :128] 4150e: f50d 6e27 add.w lr, sp, #2672 ; 0xa70 41512: f94e 8aef vst1.64 {d24-d25}, [lr :128] 41516: ed92 0b00 vldr d0, [r2] 4151a: ed93 1b44 vldr d1, [r3, #272] ; 0x110 4151e: ed80 0b82 vstr d0, [r0, #520] ; 0x208 41522: ed80 1b80 vstr d1, [r0, #512] ; 0x200 41526: ed90 0b82 vldr d0, [r0, #520] ; 0x208 4152a: ed90 1b80 vldr d1, [r0, #512] ; 0x200 4152e: ffc0 8c01 vmull.u8 q12, d0, d1 41532: f50d 620a add.w r2, sp, #2208 ; 0x8a0 41536: f942 8aef vst1.64 {d24-d25}, [r2 :128] 4153a: f962 8aef vld1.64 {d24-d25}, [r2 :128] 4153e: 2281 movs r2, #129 ; 0x81 41540: f88d 289f strb.w r2, [sp, #2207] ; 0x89f 41544: f60d 029f addw r2, sp, #2207 ; 0x89f 41548: f9a2 0c0f vld1.8 {d0[]}, [r2] 4154c: ed80 0b76 vstr d0, [r0, #472] ; 0x1d8 41550: ed90 0b76 vldr d0, [r0, #472] ; 0x1d8 41554: ed80 0b78 vstr d0, [r0, #480] ; 0x1e0 41558: ed90 0b78 vldr d0, [r0, #480] ; 0x1e0 4155c: ed83 0b44 vstr d0, [r3, #272] ; 0x110 41560: 4672 mov r2, lr 41562: f962 aa6d vld1.16 {d26-d27}, [r2 :128]! 41566: f942 8aef vst1.64 {d24-d25}, [r2 :128] 4156a: ed90 0bfc vldr d0, [r0, #1008] ; 0x3f0 4156e: ed93 1b44 vldr d1, [r3, #272] ; 0x110 41572: f50d 6405 add.w r4, sp, #2128 ; 0x850 41576: f944 aaef vst1.64 {d26-d27}, [r4 :128] 4157a: ed80 0b66 vstr d0, [r0, #408] ; 0x198 4157e: ed80 1b64 vstr d1, [r0, #400] ; 0x190 41582: f964 8aef vld1.64 {d24-d25}, [r4 :128] 41586: ed90 0b66 vldr d0, [r0, #408] ; 0x198 4158a: ed90 1b64 vldr d1, [r0, #400] ; 0x190 4158e: ed80 0b74 vstr d0, [r0, #464] ; 0x1d0 41592: ed80 1b72 vstr d1, [r0, #456] ; 0x1c8 41596: ed90 0b74 vldr d0, [r0, #464] ; 0x1d0 4159a: ed90 1b72 vldr d1, [r0, #456] ; 0x1c8 4159e: ffc0 ac01 vmull.u8 q13, d0, d1 415a2: f50d 6406 add.w r4, sp, #2144 ; 0x860 415a6: f944 aaef vst1.64 {d26-d27}, [r4 :128] 415aa: f964 aaef vld1.64 {d26-d27}, [r4 :128] 415ae: ef58 88ea vadd.i16 q12, q12, q13 415b2: f50d 6403 add.w r4, sp, #2096 ; 0x830 415b6: f944 8aef vst1.64 {d24-d25}, [r4 :128] 415ba: f964 8aef vld1.64 {d24-d25}, [r4 :128] 415be: f94e 8aef vst1.64 {d24-d25}, [lr :128] 415c2: f962 8aef vld1.64 {d24-d25}, [r2 :128] 415c6: ed9c 0b00 vldr d0, [ip] 415ca: ed93 1b44 vldr d1, [r3, #272] ; 0x110 415ce: f50d 6c00 add.w ip, sp, #2048 ; 0x800 415d2: f94c 8aef vst1.64 {d24-d25}, [ip :128] 415d6: ed80 0b52 vstr d0, [r0, #328] ; 0x148 415da: ed80 1b50 vstr d1, [r0, #320] ; 0x140 415de: f96c 8aef vld1.64 {d24-d25}, [ip :128] 415e2: ed90 0b52 vldr d0, [r0, #328] ; 0x148 415e6: ed90 1b50 vldr d1, [r0, #320] ; 0x140 415ea: ed80 0b5e vstr d0, [r0, #376] ; 0x178 415ee: ed80 1b5c vstr d1, [r0, #368] ; 0x170 415f2: ed90 0b5e vldr d0, [r0, #376] ; 0x178 415f6: ed90 1b5c vldr d1, [r0, #368] ; 0x170 415fa: ffc0 ac01 vmull.u8 q13, d0, d1 415fe: f50d 6c01 add.w ip, sp, #2064 ; 0x810 41602: f94c aaef vst1.64 {d26-d27}, [ip :128] 41606: f96c aaef vld1.64 {d26-d27}, [ip :128] 4160a: ef58 88ea vadd.i16 q12, q12, q13 4160e: f50d 6cfc add.w ip, sp, #2016 ; 0x7e0 41612: f94c 8aef vst1.64 {d24-d25}, [ip :128] 41616: f96c 8aef vld1.64 {d24-d25}, [ip :128] 4161a: f942 8aef vst1.64 {d24-d25}, [r2 :128] 4161e: f04f 0c19 mov.w ip, #25 41622: f88d c7df strb.w ip, [sp, #2015] ; 0x7df 41626: f20d 7cdf addw ip, sp, #2015 ; 0x7df 4162a: f9ac 0c0f vld1.8 {d0[]}, [ip] 4162e: ed80 0b46 vstr d0, [r0, #280] ; 0x118 41632: ed90 0b46 vldr d0, [r0, #280] ; 0x118 41636: ed80 0b48 vstr d0, [r0, #288] ; 0x120 4163a: ed90 0b48 vldr d0, [r0, #288] ; 0x120 4163e: ed83 0b44 vstr d0, [r3, #272] ; 0x110 41642: f96e 8aef vld1.64 {d24-d25}, [lr :128] 41646: ed90 0bf8 vldr d0, [r0, #992] ; 0x3e0 4164a: ed93 1b44 vldr d1, [r3, #272] ; 0x110 4164e: f50d 6cf2 add.w ip, sp, #1936 ; 0x790 41652: f94c 8aef vst1.64 {d24-d25}, [ip :128] 41656: ed80 0b36 vstr d0, [r0, #216] ; 0xd8 4165a: ed80 1b34 vstr d1, [r0, #208] ; 0xd0 4165e: f96c 8aef vld1.64 {d24-d25}, [ip :128] 41662: ed90 0b36 vldr d0, [r0, #216] ; 0xd8 41666: ed90 1b34 vldr d1, [r0, #208] ; 0xd0 4166a: ed80 0b44 vstr d0, [r0, #272] ; 0x110 4166e: ed80 1b42 vstr d1, [r0, #264] ; 0x108 41672: ed90 0b44 vldr d0, [r0, #272] ; 0x110 41676: ed90 1b42 vldr d1, [r0, #264] ; 0x108 4167a: ffc0 ac01 vmull.u8 q13, d0, d1 4167e: f50d 6cf4 add.w ip, sp, #1952 ; 0x7a0 41682: f94c aaef vst1.64 {d26-d27}, [ip :128] 41686: f96c aaef vld1.64 {d26-d27}, [ip :128] 4168a: ef58 88ea vadd.i16 q12, q12, q13 4168e: f50d 6cee add.w ip, sp, #1904 ; 0x770 41692: f94c 8aef vst1.64 {d24-d25}, [ip :128] 41696: f96c 8aef vld1.64 {d24-d25}, [ip :128] 4169a: f94e 8aef vst1.64 {d24-d25}, [lr :128] 4169e: f962 8aef vld1.64 {d24-d25}, [r2 :128] 416a2: ed91 0b00 vldr d0, [r1] 416a6: ed93 1b44 vldr d1, [r3, #272] ; 0x110 416aa: f50d 61e8 add.w r1, sp, #1856 ; 0x740 416ae: f941 8aef vst1.64 {d24-d25}, [r1 :128] 416b2: ed80 0b22 vstr d0, [r0, #136] ; 0x88 416b6: ed80 1b20 vstr d1, [r0, #128] ; 0x80 416ba: f961 8aef vld1.64 {d24-d25}, [r1 :128] 416be: ed90 0b22 vldr d0, [r0, #136] ; 0x88 416c2: ed90 1b20 vldr d1, [r0, #128] ; 0x80 416c6: ed80 0b2e vstr d0, [r0, #184] ; 0xb8 416ca: ed80 1b2c vstr d1, [r0, #176] ; 0xb0 416ce: ed90 0b2e vldr d0, [r0, #184] ; 0xb8 416d2: ed90 1b2c vldr d1, [r0, #176] ; 0xb0 416d6: ffc0 ac01 vmull.u8 q13, d0, d1 416da: f50d 61ea add.w r1, sp, #1872 ; 0x750 416de: f941 aaef vst1.64 {d26-d27}, [r1 :128] 416e2: f961 aaef vld1.64 {d26-d27}, [r1 :128] 416e6: ef58 88ea vadd.i16 q12, q12, q13 416ea: f50d 61e4 add.w r1, sp, #1824 ; 0x720 416ee: f941 8aef vst1.64 {d24-d25}, [r1 :128] 416f2: f961 8aef vld1.64 {d24-d25}, [r1 :128] 416f6: f942 8aef vst1.64 {d24-d25}, [r2 :128] 416fa: f96e 8aef vld1.64 {d24-d25}, [lr :128] 416fe: a968 add r1, sp, #416 ; 0x1a0 41700: f961 aaef vld1.64 {d26-d27}, [r1 :128] 41704: f50d 6ce2 add.w ip, sp, #1808 ; 0x710 41708: f94c 8aef vst1.64 {d24-d25}, [ip :128] 4170c: f50d 64e0 add.w r4, sp, #1792 ; 0x700 41710: f944 aaef vst1.64 {d26-d27}, [r4 :128] 41714: f96c 8aef vld1.64 {d24-d25}, [ip :128] 41718: f964 aaef vld1.64 {d26-d27}, [r4 :128] 4171c: ef58 88ea vadd.i16 q12, q12, q13 41720: f50d 6cde add.w ip, sp, #1776 ; 0x6f0 41724: f94c 8aef vst1.64 {d24-d25}, [ip :128] 41728: f96c 8aef vld1.64 {d24-d25}, [ip :128] 4172c: f94e 8aef vst1.64 {d24-d25}, [lr :128] 41730: f962 8aef vld1.64 {d24-d25}, [r2 :128] 41734: f961 aaef vld1.64 {d26-d27}, [r1 :128] 41738: f50d 61dc add.w r1, sp, #1760 ; 0x6e0 4173c: f941 8aef vst1.64 {d24-d25}, [r1 :128] 41740: f50d 6cda add.w ip, sp, #1744 ; 0x6d0 41744: f94c aaef vst1.64 {d26-d27}, [ip :128] 41748: f961 8aef vld1.64 {d24-d25}, [r1 :128] 4174c: f96c aaef vld1.64 {d26-d27}, [ip :128] 41750: ef58 88ea vadd.i16 q12, q12, q13 41754: f50d 61d8 add.w r1, sp, #1728 ; 0x6c0 41758: f941 8aef vst1.64 {d24-d25}, [r1 :128] 4175c: f961 8aef vld1.64 {d24-d25}, [r1 :128] 41760: f942 8aef vst1.64 {d24-d25}, [r2 :128] 41764: f96e 8aef vld1.64 {d24-d25}, [lr :128] 41768: a948 add r1, sp, #288 ; 0x120 4176a: f941 8aef vst1.64 {d24-d25}, [r1 :128] 4176e: f961 8aef vld1.64 {d24-d25}, [r1 :128] 41772: ff88 0938 vqshrn.u16 d0, q12, #8 41776: ed83 0b3a vstr d0, [r3, #232] ; 0xe8 4177a: ed93 0b3a vldr d0, [r3, #232] ; 0xe8 4177e: ed83 0b38 vstr d0, [r3, #224] ; 0xe0 41782: ed93 0b38 vldr d0, [r3, #224] ; 0xe0 41786: f962 8aef vld1.64 {d24-d25}, [r2 :128] 4178a: a940 add r1, sp, #256 ; 0x100 4178c: f941 8aef vst1.64 {d24-d25}, [r1 :128] 41790: f961 8aef vld1.64 {d24-d25}, [r1 :128] 41794: ff88 1938 vqshrn.u16 d1, q12, #8 41798: ed83 1b32 vstr d1, [r3, #200] ; 0xc8 4179c: ed93 1b32 vldr d1, [r3, #200] ; 0xc8 417a0: ed83 1b30 vstr d1, [r3, #192] ; 0xc0 417a4: ed93 1b30 vldr d1, [r3, #192] ; 0xc0 417a8: ed80 0b02 vstr d0, [r0, #8] 417ac: ed80 1b00 vstr d1, [r0] 417b0: ed90 0b02 vldr d0, [r0, #8] 417b4: ed90 1b00 vldr d1, [r0] 417b8: eef0 8b40 vmov.f64 d24, d0 417bc: eef0 9b41 vmov.f64 d25, d1 417c0: f50d 61d4 add.w r1, sp, #1696 ; 0x6a0 417c4: f941 8aef vst1.64 {d24-d25}, [r1 :128] 417c8: f961 8aef vld1.64 {d24-d25}, [r1 :128] 417cc: a94c add r1, sp, #304 ; 0x130 417ce: f941 8aef vst1.64 {d24-d25}, [r1 :128] 417d2: f961 8aef vld1.64 {d24-d25}, [r1 :128] 417d6: aa5c add r2, sp, #368 ; 0x170 417d8: f962 aaef vld1.64 {d26-d27}, [r2 :128] 417dc: f50d 62d2 add.w r2, sp, #1680 ; 0x690 417e0: f942 8aef vst1.64 {d24-d25}, [r2 :128] 417e4: f50d 6cd0 add.w ip, sp, #1664 ; 0x680 417e8: f94c aaef vst1.64 {d26-d27}, [ip :128] 417ec: f962 8aef vld1.64 {d24-d25}, [r2 :128] 417f0: f96c aaef vld1.64 {d26-d27}, [ip :128] 417f4: ef48 88ea vadd.i8 q12, q12, q13 417f8: f50d 62ce add.w r2, sp, #1648 ; 0x670 417fc: f942 8aef vst1.64 {d24-d25}, [r2 :128] 41800: f962 8aef vld1.64 {d24-d25}, [r2 :128] 41804: f941 8aef vst1.64 {d24-d25}, [r1 :128] 41808: f961 8aef vld1.64 {d24-d25}, [r1 :128] 4180c: a938 add r1, sp, #224 ; 0xe0 4180e: f941 8aef vst1.64 {d24-d25}, [r1 :128] 41812: 9a6f ldr r2, [sp, #444] ; 0x1bc 41814: f8dd c158 ldr.w ip, [sp, #344] ; 0x158 41818: 4462 add r2, ip 4181a: f961 8aef vld1.64 {d24-d25}, [r1 :128] 4181e: f942 8a0f vst1.8 {d24-d25}, [r2] 41822: 9956 ldr r1, [sp, #344] ; 0x158 41824: 3110 adds r1, #16 41826: 9156 str r1, [sp, #344] ; 0x158 41828: 9953 ldr r1, [sp, #332] ; 0x14c 4182a: eb01 72d1 add.w r2, r1, r1, lsr #31 4182e: f022 0201 bic.w r2, r2, #1 41832: 1a89 subs r1, r1, r2 41834: 2900 cmp r1, #0 41836: f040 8274 bne.w 41d22 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaea> 4183a: e7ff b.n 4183c <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x604> 4183c: f64f 70da movw r0, #65498 ; 0xffda 41840: f8ad 066e strh.w r0, [sp, #1646] ; 0x66e 41844: f20d 606e addw r0, sp, #1646 ; 0x66e 41848: f9e0 0c7f vld1.16 {d16[]-d17[]}, [r0 :16] 4184c: f50d 60c8 add.w r0, sp, #1600 ; 0x640 41850: f940 0aef vst1.64 {d16-d17}, [r0 :128] 41854: f960 0aef vld1.64 {d16-d17}, [r0 :128] 41858: f50d 60ca add.w r0, sp, #1616 ; 0x650 4185c: f940 0aef vst1.64 {d16-d17}, [r0 :128] 41860: f960 0aef vld1.64 {d16-d17}, [r0 :128] 41864: a834 add r0, sp, #208 ; 0xd0 41866: f940 0aef vst1.64 {d16-d17}, [r0 :128] 4186a: 2170 movs r1, #112 ; 0x70 4186c: f8ad 163e strh.w r1, [sp, #1598] ; 0x63e 41870: f20d 623e addw r2, sp, #1598 ; 0x63e 41874: f9e2 0c7f vld1.16 {d16[]-d17[]}, [r2 :16] 41878: f50d 62c2 add.w r2, sp, #1552 ; 0x610 4187c: f942 0aef vst1.64 {d16-d17}, [r2 :128] 41880: f962 0aef vld1.64 {d16-d17}, [r2 :128] 41884: f50d 62c4 add.w r2, sp, #1568 ; 0x620 41888: f942 0aef vst1.64 {d16-d17}, [r2 :128] 4188c: f962 0aef vld1.64 {d16-d17}, [r2 :128] 41890: aa30 add r2, sp, #192 ; 0xc0 41892: f942 0aef vst1.64 {d16-d17}, [r2 :128] 41896: f50d 6330 add.w r3, sp, #2816 ; 0xb00 4189a: f103 0c20 add.w ip, r3, #32 4189e: f96c 0aef vld1.64 {d16-d17}, [ip :128] 418a2: f50d 6cc0 add.w ip, sp, #1536 ; 0x600 418a6: f94c 0aef vst1.64 {d16-d17}, [ip :128] 418aa: f96c 0aef vld1.64 {d16-d17}, [ip :128] 418ae: f50d 6cbe add.w ip, sp, #1520 ; 0x5f0 418b2: f94c 0aef vst1.64 {d16-d17}, [ip :128] 418b6: f96c 0aef vld1.64 {d16-d17}, [ip :128] 418ba: f50d 7cb0 add.w ip, sp, #352 ; 0x160 418be: f96c 2aef vld1.64 {d18-d19}, [ip :128] 418c2: f50d 6ebc add.w lr, sp, #1504 ; 0x5e0 418c6: f94e 0aef vst1.64 {d16-d17}, [lr :128] 418ca: f50d 64ba add.w r4, sp, #1488 ; 0x5d0 418ce: f944 2aef vst1.64 {d18-d19}, [r4 :128] 418d2: f96e 0aef vld1.64 {d16-d17}, [lr :128] 418d6: f964 2aef vld1.64 {d18-d19}, [r4 :128] 418da: ef40 01f2 vand q8, q8, q9 418de: f50d 6eb8 add.w lr, sp, #1472 ; 0x5c0 418e2: f94e 0aef vst1.64 {d16-d17}, [lr :128] 418e6: f96e 0aef vld1.64 {d16-d17}, [lr :128] 418ea: f50d 6eb6 add.w lr, sp, #1456 ; 0x5b0 418ee: f94e 0aef vst1.64 {d16-d17}, [lr :128] 418f2: f96e 0aef vld1.64 {d16-d17}, [lr :128] 418f6: f50d 6eb4 add.w lr, sp, #1440 ; 0x5a0 418fa: f94e 0aef vst1.64 {d16-d17}, [lr :128] 418fe: f96e 0aef vld1.64 {d16-d17}, [lr :128] 41902: f10d 0eb0 add.w lr, sp, #176 ; 0xb0 41906: f94e 0aef vst1.64 {d16-d17}, [lr :128] 4190a: f103 0410 add.w r4, r3, #16 4190e: f964 0aef vld1.64 {d16-d17}, [r4 :128] 41912: f50d 64b2 add.w r4, sp, #1424 ; 0x590 41916: f944 0aef vst1.64 {d16-d17}, [r4 :128] 4191a: f964 0aef vld1.64 {d16-d17}, [r4 :128] 4191e: f50d 64b0 add.w r4, sp, #1408 ; 0x580 41922: f944 0aef vst1.64 {d16-d17}, [r4 :128] 41926: f964 0aef vld1.64 {d16-d17}, [r4 :128] 4192a: f96c 2aef vld1.64 {d18-d19}, [ip :128] 4192e: f50d 64ae add.w r4, sp, #1392 ; 0x570 41932: f944 0aef vst1.64 {d16-d17}, [r4 :128] 41936: f50d 65ac add.w r5, sp, #1376 ; 0x560 4193a: f945 2aef vst1.64 {d18-d19}, [r5 :128] 4193e: f964 0aef vld1.64 {d16-d17}, [r4 :128] 41942: f965 2aef vld1.64 {d18-d19}, [r5 :128] 41946: ef40 01f2 vand q8, q8, q9 4194a: f50d 64aa add.w r4, sp, #1360 ; 0x550 4194e: f944 0aef vst1.64 {d16-d17}, [r4 :128] 41952: f964 0aef vld1.64 {d16-d17}, [r4 :128] 41956: f50d 64a8 add.w r4, sp, #1344 ; 0x540 4195a: f944 0aef vst1.64 {d16-d17}, [r4 :128] 4195e: f964 0aef vld1.64 {d16-d17}, [r4 :128] 41962: f50d 64a6 add.w r4, sp, #1328 ; 0x530 41966: f944 0aef vst1.64 {d16-d17}, [r4 :128] 4196a: f964 0aef vld1.64 {d16-d17}, [r4 :128] 4196e: ac28 add r4, sp, #160 ; 0xa0 41970: f944 0aef vst1.64 {d16-d17}, [r4 :128] 41974: f963 0aef vld1.64 {d16-d17}, [r3 :128] 41978: f50d 63a4 add.w r3, sp, #1312 ; 0x520 4197c: f943 0aef vst1.64 {d16-d17}, [r3 :128] 41980: f963 0aef vld1.64 {d16-d17}, [r3 :128] 41984: f50d 63a2 add.w r3, sp, #1296 ; 0x510 41988: f943 0aef vst1.64 {d16-d17}, [r3 :128] 4198c: f963 0aef vld1.64 {d16-d17}, [r3 :128] 41990: f96c 2aef vld1.64 {d18-d19}, [ip :128] 41994: f50d 63a0 add.w r3, sp, #1280 ; 0x500 41998: f943 0aef vst1.64 {d16-d17}, [r3 :128] 4199c: f50d 6c9e add.w ip, sp, #1264 ; 0x4f0 419a0: f94c 2aef vst1.64 {d18-d19}, [ip :128] 419a4: f963 0aef vld1.64 {d16-d17}, [r3 :128] 419a8: f96c 2aef vld1.64 {d18-d19}, [ip :128] 419ac: ef40 01f2 vand q8, q8, q9 419b0: f50d 639c add.w r3, sp, #1248 ; 0x4e0 419b4: f943 0aef vst1.64 {d16-d17}, [r3 :128] 419b8: f963 0aef vld1.64 {d16-d17}, [r3 :128] 419bc: f50d 639a add.w r3, sp, #1232 ; 0x4d0 419c0: f943 0aef vst1.64 {d16-d17}, [r3 :128] 419c4: f963 0aef vld1.64 {d16-d17}, [r3 :128] 419c8: f50d 6398 add.w r3, sp, #1216 ; 0x4c0 419cc: f943 0aef vst1.64 {d16-d17}, [r3 :128] 419d0: f963 0aef vld1.64 {d16-d17}, [r3 :128] 419d4: ab24 add r3, sp, #144 ; 0x90 419d6: f943 0aef vst1.64 {d16-d17}, [r3 :128] 419da: f96e 0aef vld1.64 {d16-d17}, [lr :128] 419de: f960 2aef vld1.64 {d18-d19}, [r0 :128] 419e2: f50d 6c96 add.w ip, sp, #1200 ; 0x4b0 419e6: f94c 0aef vst1.64 {d16-d17}, [ip :128] 419ea: f50d 6594 add.w r5, sp, #1184 ; 0x4a0 419ee: f945 2aef vst1.64 {d18-d19}, [r5 :128] 419f2: f96c 0aef vld1.64 {d16-d17}, [ip :128] 419f6: f965 2aef vld1.64 {d18-d19}, [r5 :128] 419fa: ef50 09f2 vmul.i16 q8, q8, q9 419fe: f50d 6c92 add.w ip, sp, #1168 ; 0x490 41a02: f94c 0aef vst1.64 {d16-d17}, [ip :128] 41a06: f96c 0aef vld1.64 {d16-d17}, [ip :128] 41a0a: f10d 0c80 add.w ip, sp, #128 ; 0x80 41a0e: f94c 0aef vst1.64 {d16-d17}, [ip :128] 41a12: f96e 0aef vld1.64 {d16-d17}, [lr :128] 41a16: f962 2aef vld1.64 {d18-d19}, [r2 :128] 41a1a: f50d 6e90 add.w lr, sp, #1152 ; 0x480 41a1e: f94e 0aef vst1.64 {d16-d17}, [lr :128] 41a22: f50d 658e add.w r5, sp, #1136 ; 0x470 41a26: f945 2aef vst1.64 {d18-d19}, [r5 :128] 41a2a: f96e 0aef vld1.64 {d16-d17}, [lr :128] 41a2e: f965 2aef vld1.64 {d18-d19}, [r5 :128] 41a32: ef50 09f2 vmul.i16 q8, q8, q9 41a36: f50d 6e8c add.w lr, sp, #1120 ; 0x460 41a3a: f94e 0aef vst1.64 {d16-d17}, [lr :128] 41a3e: f96e 0aef vld1.64 {d16-d17}, [lr :128] 41a42: f10d 0e70 add.w lr, sp, #112 ; 0x70 41a46: f94e 0aef vst1.64 {d16-d17}, [lr :128] 41a4a: f64f 75b6 movw r5, #65462 ; 0xffb6 41a4e: f8ad 545e strh.w r5, [sp, #1118] ; 0x45e 41a52: f20d 455e addw r5, sp, #1118 ; 0x45e 41a56: f9e5 0c7f vld1.16 {d16[]-d17[]}, [r5 :16] 41a5a: f50d 6586 add.w r5, sp, #1072 ; 0x430 41a5e: f945 0aef vst1.64 {d16-d17}, [r5 :128] 41a62: f965 0aef vld1.64 {d16-d17}, [r5 :128] 41a66: f50d 6588 add.w r5, sp, #1088 ; 0x440 41a6a: f945 0aef vst1.64 {d16-d17}, [r5 :128] 41a6e: f965 0aef vld1.64 {d16-d17}, [r5 :128] 41a72: f940 0aef vst1.64 {d16-d17}, [r0 :128] 41a76: f64f 75a2 movw r5, #65442 ; 0xffa2 41a7a: f8ad 542e strh.w r5, [sp, #1070] ; 0x42e 41a7e: f20d 452e addw r5, sp, #1070 ; 0x42e 41a82: f9e5 0c7f vld1.16 {d16[]-d17[]}, [r5 :16] 41a86: f50d 6580 add.w r5, sp, #1024 ; 0x400 41a8a: f945 0aef vst1.64 {d16-d17}, [r5 :128] 41a8e: f965 0aef vld1.64 {d16-d17}, [r5 :128] 41a92: f50d 6582 add.w r5, sp, #1040 ; 0x410 41a96: f945 0aef vst1.64 {d16-d17}, [r5 :128] 41a9a: f965 0aef vld1.64 {d16-d17}, [r5 :128] 41a9e: f942 0aef vst1.64 {d16-d17}, [r2 :128] 41aa2: f96c 0aef vld1.64 {d16-d17}, [ip :128] 41aa6: f964 2aef vld1.64 {d18-d19}, [r4 :128] 41aaa: f960 4aef vld1.64 {d20-d21}, [r0 :128] 41aae: adfc add r5, sp, #1008 ; 0x3f0 41ab0: f945 0aef vst1.64 {d16-d17}, [r5 :128] 41ab4: aef8 add r6, sp, #992 ; 0x3e0 41ab6: f946 2aef vst1.64 {d18-d19}, [r6 :128] 41aba: f50d 7874 add.w r8, sp, #976 ; 0x3d0 41abe: f948 4aef vst1.64 {d20-d21}, [r8 :128] 41ac2: f965 0aef vld1.64 {d16-d17}, [r5 :128] 41ac6: f966 2aef vld1.64 {d18-d19}, [r6 :128] 41aca: f968 4aef vld1.64 {d20-d21}, [r8 :128] 41ace: ef52 09e4 vmla.i16 q8, q9, q10 41ad2: adf0 add r5, sp, #960 ; 0x3c0 41ad4: f945 0aef vst1.64 {d16-d17}, [r5 :128] 41ad8: f965 0aef vld1.64 {d16-d17}, [r5 :128] 41adc: f94c 0aef vst1.64 {d16-d17}, [ip :128] 41ae0: f96e 0aef vld1.64 {d16-d17}, [lr :128] 41ae4: f964 2aef vld1.64 {d18-d19}, [r4 :128] 41ae8: f962 4aef vld1.64 {d20-d21}, [r2 :128] 41aec: acec add r4, sp, #944 ; 0x3b0 41aee: f944 0aef vst1.64 {d16-d17}, [r4 :128] 41af2: ade8 add r5, sp, #928 ; 0x3a0 41af4: f945 2aef vst1.64 {d18-d19}, [r5 :128] 41af8: aee4 add r6, sp, #912 ; 0x390 41afa: f946 4aef vst1.64 {d20-d21}, [r6 :128] 41afe: f964 0aef vld1.64 {d16-d17}, [r4 :128] 41b02: f965 2aef vld1.64 {d18-d19}, [r5 :128] 41b06: f966 4aef vld1.64 {d20-d21}, [r6 :128] 41b0a: ef52 09e4 vmla.i16 q8, q9, q10 41b0e: ace0 add r4, sp, #896 ; 0x380 41b10: f944 0aef vst1.64 {d16-d17}, [r4 :128] 41b14: f964 0aef vld1.64 {d16-d17}, [r4 :128] 41b18: f94e 0aef vst1.64 {d16-d17}, [lr :128] 41b1c: f8ad 137e strh.w r1, [sp, #894] ; 0x37e 41b20: f20d 317e addw r1, sp, #894 ; 0x37e 41b24: f9e1 0c7f vld1.16 {d16[]-d17[]}, [r1 :16] 41b28: a9d4 add r1, sp, #848 ; 0x350 41b2a: f941 0aef vst1.64 {d16-d17}, [r1 :128] 41b2e: f961 0aef vld1.64 {d16-d17}, [r1 :128] 41b32: a9d8 add r1, sp, #864 ; 0x360 41b34: f941 0aef vst1.64 {d16-d17}, [r1 :128] 41b38: f961 0aef vld1.64 {d16-d17}, [r1 :128] 41b3c: f940 0aef vst1.64 {d16-d17}, [r0 :128] 41b40: f64f 71ee movw r1, #65518 ; 0xffee 41b44: f8ad 134e strh.w r1, [sp, #846] ; 0x34e 41b48: f20d 314e addw r1, sp, #846 ; 0x34e 41b4c: f9e1 0c7f vld1.16 {d16[]-d17[]}, [r1 :16] 41b50: a9c8 add r1, sp, #800 ; 0x320 41b52: f941 0aef vst1.64 {d16-d17}, [r1 :128] 41b56: f961 0aef vld1.64 {d16-d17}, [r1 :128] 41b5a: a9cc add r1, sp, #816 ; 0x330 41b5c: f941 0aef vst1.64 {d16-d17}, [r1 :128] 41b60: f961 0aef vld1.64 {d16-d17}, [r1 :128] 41b64: f942 0aef vst1.64 {d16-d17}, [r2 :128] 41b68: f96c 0aef vld1.64 {d16-d17}, [ip :128] 41b6c: f963 2aef vld1.64 {d18-d19}, [r3 :128] 41b70: f960 4aef vld1.64 {d20-d21}, [r0 :128] 41b74: a8c4 add r0, sp, #784 ; 0x310 41b76: f940 0aef vst1.64 {d16-d17}, [r0 :128] 41b7a: a9c0 add r1, sp, #768 ; 0x300 41b7c: f941 2aef vst1.64 {d18-d19}, [r1 :128] 41b80: acbc add r4, sp, #752 ; 0x2f0 41b82: f944 4aef vst1.64 {d20-d21}, [r4 :128] 41b86: f960 0aef vld1.64 {d16-d17}, [r0 :128] 41b8a: f961 2aef vld1.64 {d18-d19}, [r1 :128] 41b8e: f964 4aef vld1.64 {d20-d21}, [r4 :128] 41b92: ef52 09e4 vmla.i16 q8, q9, q10 41b96: a8b8 add r0, sp, #736 ; 0x2e0 41b98: f940 0aef vst1.64 {d16-d17}, [r0 :128] 41b9c: f960 0aef vld1.64 {d16-d17}, [r0 :128] 41ba0: f94c 0aef vst1.64 {d16-d17}, [ip :128] 41ba4: f96e 0aef vld1.64 {d16-d17}, [lr :128] 41ba8: f963 2aef vld1.64 {d18-d19}, [r3 :128] 41bac: f962 4aef vld1.64 {d20-d21}, [r2 :128] 41bb0: a8b4 add r0, sp, #720 ; 0x2d0 41bb2: f940 0aef vst1.64 {d16-d17}, [r0 :128] 41bb6: a9b0 add r1, sp, #704 ; 0x2c0 41bb8: f941 2aef vst1.64 {d18-d19}, [r1 :128] 41bbc: aaac add r2, sp, #688 ; 0x2b0 41bbe: f942 4aef vst1.64 {d20-d21}, [r2 :128] 41bc2: f960 0aef vld1.64 {d16-d17}, [r0 :128] 41bc6: f961 2aef vld1.64 {d18-d19}, [r1 :128] 41bca: f962 4aef vld1.64 {d20-d21}, [r2 :128] 41bce: ef52 09e4 vmla.i16 q8, q9, q10 41bd2: a8a8 add r0, sp, #672 ; 0x2a0 41bd4: f940 0aef vst1.64 {d16-d17}, [r0 :128] 41bd8: f960 0aef vld1.64 {d16-d17}, [r0 :128] 41bdc: f94e 0aef vst1.64 {d16-d17}, [lr :128] 41be0: f96c 0aef vld1.64 {d16-d17}, [ip :128] 41be4: a864 add r0, sp, #400 ; 0x190 41be6: f960 2aef vld1.64 {d18-d19}, [r0 :128] 41bea: a9a4 add r1, sp, #656 ; 0x290 41bec: f941 0aef vst1.64 {d16-d17}, [r1 :128] 41bf0: aaa0 add r2, sp, #640 ; 0x280 41bf2: f942 2aef vst1.64 {d18-d19}, [r2 :128] 41bf6: f961 0aef vld1.64 {d16-d17}, [r1 :128] 41bfa: f962 2aef vld1.64 {d18-d19}, [r2 :128] 41bfe: ef50 08e2 vadd.i16 q8, q8, q9 41c02: a99c add r1, sp, #624 ; 0x270 41c04: f941 0aef vst1.64 {d16-d17}, [r1 :128] 41c08: f961 0aef vld1.64 {d16-d17}, [r1 :128] 41c0c: f94c 0aef vst1.64 {d16-d17}, [ip :128] 41c10: f96e 0aef vld1.64 {d16-d17}, [lr :128] 41c14: f960 2aef vld1.64 {d18-d19}, [r0 :128] 41c18: a898 add r0, sp, #608 ; 0x260 41c1a: f940 0aef vst1.64 {d16-d17}, [r0 :128] 41c1e: a994 add r1, sp, #592 ; 0x250 41c20: f941 2aef vst1.64 {d18-d19}, [r1 :128] 41c24: f960 0aef vld1.64 {d16-d17}, [r0 :128] 41c28: f961 2aef vld1.64 {d18-d19}, [r1 :128] 41c2c: ef50 08e2 vadd.i16 q8, q8, q9 41c30: a890 add r0, sp, #576 ; 0x240 41c32: f940 0aef vst1.64 {d16-d17}, [r0 :128] 41c36: f960 0aef vld1.64 {d16-d17}, [r0 :128] 41c3a: f94e 0aef vst1.64 {d16-d17}, [lr :128] 41c3e: f96c 0aef vld1.64 {d16-d17}, [ip :128] 41c42: a818 add r0, sp, #96 ; 0x60 41c44: f940 0aef vst1.64 {d16-d17}, [r0 :128] 41c48: f960 0aef vld1.64 {d16-d17}, [r0 :128] 41c4c: efc8 6930 vqshrn.s16 d22, q8, #8 41c50: 9807 ldr r0, [sp, #28] 41c52: edc0 6b0a vstr d22, [r0, #40] ; 0x28 41c56: edd0 6b0a vldr d22, [r0, #40] ; 0x28 41c5a: edc0 6b08 vstr d22, [r0, #32] 41c5e: edd0 6b08 vldr d22, [r0, #32] 41c62: edd0 7b56 vldr d23, [r0, #344] ; 0x158 41c66: edc0 6b82 vstr d22, [r0, #520] ; 0x208 41c6a: edc0 7b80 vstr d23, [r0, #512] ; 0x200 41c6e: edd0 6b82 vldr d22, [r0, #520] ; 0x208 41c72: edd0 7b80 vldr d23, [r0, #512] ; 0x200 41c76: ef46 68a7 vadd.i8 d22, d22, d23 41c7a: edc0 6b7e vstr d22, [r0, #504] ; 0x1f8 41c7e: edd0 6b7e vldr d22, [r0, #504] ; 0x1f8 41c82: edc0 6b7c vstr d22, [r0, #496] ; 0x1f0 41c86: edd0 6b7c vldr d22, [r0, #496] ; 0x1f0 41c8a: edc0 6b7a vstr d22, [r0, #488] ; 0x1e8 41c8e: edd0 6b7a vldr d22, [r0, #488] ; 0x1e8 41c92: 9908 ldr r1, [sp, #32] 41c94: edc1 6bee vstr d22, [r1, #952] ; 0x3b8 41c98: f96e 0aef vld1.64 {d16-d17}, [lr :128] 41c9c: aa10 add r2, sp, #64 ; 0x40 41c9e: f942 0aef vst1.64 {d16-d17}, [r2 :128] 41ca2: f962 0aef vld1.64 {d16-d17}, [r2 :128] 41ca6: efc8 6930 vqshrn.s16 d22, q8, #8 41caa: edc0 6b02 vstr d22, [r0, #8] 41cae: edd0 6b02 vldr d22, [r0, #8] 41cb2: edc0 6b00 vstr d22, [r0] 41cb6: edd0 6b00 vldr d22, [r0] 41cba: edd0 7b56 vldr d23, [r0, #344] ; 0x158 41cbe: edc0 6b78 vstr d22, [r0, #480] ; 0x1e0 41cc2: edc0 7b76 vstr d23, [r0, #472] ; 0x1d8 41cc6: edd0 6b78 vldr d22, [r0, #480] ; 0x1e0 41cca: edd0 7b76 vldr d23, [r0, #472] ; 0x1d8 41cce: ef46 68a7 vadd.i8 d22, d22, d23 41cd2: edc0 6b74 vstr d22, [r0, #464] ; 0x1d0 41cd6: edd0 6b74 vldr d22, [r0, #464] ; 0x1d0 41cda: edc0 6b72 vstr d22, [r0, #456] ; 0x1c8 41cde: edd0 6b72 vldr d22, [r0, #456] ; 0x1c8 41ce2: edc0 6b70 vstr d22, [r0, #448] ; 0x1c0 41ce6: edd0 6b70 vldr d22, [r0, #448] ; 0x1c0 41cea: edc1 6bec vstr d22, [r1, #944] ; 0x3b0 41cee: f50d 6226 add.w r2, sp, #2656 ; 0xa60 41cf2: f962 0acf vld1.64 {d16-d17}, [r2] 41cf6: f50d 6225 add.w r2, sp, #2640 ; 0xa50 41cfa: f942 0acf vst1.64 {d16-d17}, [r2] 41cfe: 9b6f ldr r3, [sp, #444] ; 0x1bc 41d00: f8dd c154 ldr.w ip, [sp, #340] ; 0x154 41d04: 4463 add r3, ip 41d06: f962 671d vld1.8 {d22}, [r2 :64]! 41d0a: edd2 7b00 vldr d23, [r2] 41d0e: eef0 0b66 vmov.f64 d16, d22 41d12: eef0 1b67 vmov.f64 d17, d23 41d16: f943 080f vst2.8 {d16-d17}, [r3] 41d1a: 9a55 ldr r2, [sp, #340] ; 0x154 41d1c: 3210 adds r2, #16 41d1e: 9255 str r2, [sp, #340] ; 0x154 41d20: e7ff b.n 41d22 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaea> 41d22: e7ff b.n 41d24 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaec> 41d24: 9854 ldr r0, [sp, #336] ; 0x150 41d26: 3001 adds r0, #1 41d28: 9054 str r0, [sp, #336] ; 0x150 41d2a: f7ff bb25 b.w 41378 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x140> 41d2e: 986d ldr r0, [sp, #436] ; 0x1b4 41d30: f020 000f bic.w r0, r0, #15 41d34: 9054 str r0, [sp, #336] ; 0x150 41d36: e7ff b.n 41d38 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xb00> 41d38: 9854 ldr r0, [sp, #336] ; 0x150 41d3a: 996d ldr r1, [sp, #436] ; 0x1b4 41d3c: 4288 cmp r0, r1 41d3e: da7e bge.n 41e3e <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc06> 41d40: e7ff b.n 41d42 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xb0a> 41d42: 986e ldr r0, [sp, #440] ; 0x1b8 41d44: 7880 ldrb r0, [r0, #2] 41d46: f88d 002f strb.w r0, [sp, #47] ; 0x2f 41d4a: 986e ldr r0, [sp, #440] ; 0x1b8 41d4c: 7840 ldrb r0, [r0, #1] 41d4e: f88d 002e strb.w r0, [sp, #46] ; 0x2e 41d52: 986e ldr r0, [sp, #440] ; 0x1b8 41d54: 7800 ldrb r0, [r0, #0] 41d56: f88d 002d strb.w r0, [sp, #45] ; 0x2d 41d5a: 986e ldr r0, [sp, #440] ; 0x1b8 41d5c: 3004 adds r0, #4 41d5e: 906e str r0, [sp, #440] ; 0x1b8 41d60: f89d 002f ldrb.w r0, [sp, #47] ; 0x2f 41d64: eb00 1040 add.w r0, r0, r0, lsl #5 41d68: f89d 102e ldrb.w r1, [sp, #46] ; 0x2e 41d6c: eb01 11c1 add.w r1, r1, r1, lsl #7 41d70: eb01 0040 add.w r0, r1, r0, lsl #1 41d74: f89d 102d ldrb.w r1, [sp, #45] ; 0x2d 41d78: 2219 movs r2, #25 41d7a: fb11 0002 smlabb r0, r1, r2, r0 41d7e: 3080 adds r0, #128 ; 0x80 41d80: 2110 movs r1, #16 41d82: eb01 2010 add.w r0, r1, r0, lsr #8 41d86: f88d 002c strb.w r0, [sp, #44] ; 0x2c 41d8a: f89d 002f ldrb.w r0, [sp, #47] ; 0x2f 41d8e: f06f 0125 mvn.w r1, #37 ; 0x25 41d92: fb10 f001 smulbb r0, r0, r1 41d96: f89d 102e ldrb.w r1, [sp, #46] ; 0x2e 41d9a: 224a movs r2, #74 ; 0x4a 41d9c: fb01 0012 mls r0, r1, r2, r0 41da0: f89d 102d ldrb.w r1, [sp, #45] ; 0x2d 41da4: ebc1 01c1 rsb r1, r1, r1, lsl #3 41da8: eb00 1001 add.w r0, r0, r1, lsl #4 41dac: 3080 adds r0, #128 ; 0x80 41dae: 2180 movs r1, #128 ; 0x80 41db0: eb01 2010 add.w r0, r1, r0, lsr #8 41db4: f88d 002b strb.w r0, [sp, #43] ; 0x2b 41db8: f89d 002f ldrb.w r0, [sp, #47] ; 0x2f 41dbc: ebc0 00c0 rsb r0, r0, r0, lsl #3 41dc0: f89d 202e ldrb.w r2, [sp, #46] ; 0x2e 41dc4: 235e movs r3, #94 ; 0x5e 41dc6: fb12 f203 smulbb r2, r2, r3 41dca: ebc2 1000 rsb r0, r2, r0, lsl #4 41dce: f89d 202d ldrb.w r2, [sp, #45] ; 0x2d 41dd2: eb02 02c2 add.w r2, r2, r2, lsl #3 41dd6: eba0 0042 sub.w r0, r0, r2, lsl #1 41dda: 3080 adds r0, #128 ; 0x80 41ddc: eb01 2010 add.w r0, r1, r0, lsr #8 41de0: f88d 002a strb.w r0, [sp, #42] ; 0x2a 41de4: f89d 002c ldrb.w r0, [sp, #44] ; 0x2c 41de8: 996f ldr r1, [sp, #444] ; 0x1bc 41dea: 9a56 ldr r2, [sp, #344] ; 0x158 41dec: 1c53 adds r3, r2, #1 41dee: 9356 str r3, [sp, #344] ; 0x158 41df0: 5488 strb r0, [r1, r2] 41df2: 9853 ldr r0, [sp, #332] ; 0x14c 41df4: eb00 71d0 add.w r1, r0, r0, lsr #31 41df8: f021 0101 bic.w r1, r1, #1 41dfc: 1a40 subs r0, r0, r1 41dfe: 2800 cmp r0, #0 41e00: d118 bne.n 41e34 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfc> 41e02: e7ff b.n 41e04 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbcc> 41e04: 9854 ldr r0, [sp, #336] ; 0x150 41e06: eb00 71d0 add.w r1, r0, r0, lsr #31 41e0a: f021 0101 bic.w r1, r1, #1 41e0e: 1a40 subs r0, r0, r1 41e10: 2800 cmp r0, #0 41e12: d10f bne.n 41e34 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfc> 41e14: e7ff b.n 41e16 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbde> 41e16: f89d 002a ldrb.w r0, [sp, #42] ; 0x2a 41e1a: 996f ldr r1, [sp, #444] ; 0x1bc 41e1c: 9a55 ldr r2, [sp, #340] ; 0x154 41e1e: 1c53 adds r3, r2, #1 41e20: 9355 str r3, [sp, #340] ; 0x154 41e22: 5488 strb r0, [r1, r2] 41e24: f89d 002b ldrb.w r0, [sp, #43] ; 0x2b 41e28: 996f ldr r1, [sp, #444] ; 0x1bc 41e2a: 9a55 ldr r2, [sp, #340] ; 0x154 41e2c: 1c53 adds r3, r2, #1 41e2e: 9355 str r3, [sp, #340] ; 0x154 41e30: 5488 strb r0, [r1, r2] 41e32: e7ff b.n 41e34 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfc> 41e34: e7ff b.n 41e36 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfe> 41e36: 9854 ldr r0, [sp, #336] ; 0x150 41e38: 3001 adds r0, #1 41e3a: 9054 str r0, [sp, #336] ; 0x150 41e3c: e77c b.n 41d38 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xb00> 41e3e: e7ff b.n 41e40 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc08> 41e40: 9853 ldr r0, [sp, #332] ; 0x14c 41e42: 3001 adds r0, #1 41e44: 9053 str r0, [sp, #332] ; 0x14c 41e46: f7ff ba8e b.w 41366 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x12e> 41e4a: 4809 ldr r0, [pc, #36] ; (41e70 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc38>) 41e4c: 4478 add r0, pc 41e4e: 6800 ldr r0, [r0, #0] 41e50: 6800 ldr r0, [r0, #0] 41e52: 9909 ldr r1, [sp, #36] ; 0x24 41e54: 4288 cmp r0, r1 41e56: d106 bne.n 41e66 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc2e> 41e58: e7ff b.n 41e5a <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc22> 41e5a: f1a7 0418 sub.w r4, r7, #24 41e5e: 46a5 mov sp, r4 41e60: e8bd 0b00 ldmia.w sp!, {r8, r9, fp} 41e64: bdf0 pop {r4, r5, r6, r7, pc} 41e66: f7f8 ec6e blx 3a744 <__stack_chk_fail@plt> 41e6a: bf00 nop 41e6c: 001ef0f8 .word 0x001ef0f8 41e70: 001ee50c .word 0x001ee50c
Neon 版在我的手机上是 “负优化” 。。。跑得比朴素 CPU 还慢。。。看来网上给的 NEON 代码也未必靠谱,还是得亲自实践对比!
经过我的优化后,NEON 版达到了 53ms 左右,展开(一次)版是51 ms左右,原图大小为 1600*1873