mirror of git://sourceware.org/git/glibc.git
Implement x86-64 memmove with unaligned load/store and rep movsb. Support 16-byte, 32-byte and 64-byte vector register sizes. When size <= 8 times of vector register size, there is no check for address overlap bewteen source and destination. Since overhead for overlap check is small when size > 8 times of vector register size, memcpy is an alias of memmove. A single file provides 2 implementations of memmove, one with rep movsb and the other without rep movsb. They share the same codes when size is between 2 times of vector register size and REP_MOVSB_THRESHOLD which is 2KB for 16-byte vector register size and scaled up by large vector register size. Key features: 1. Use overlapping load and store to avoid branch. 2. For size <= 8 times of vector register size, load all sources into registers and store them together. 3. If there is no address overlap bewteen source and destination, copy from both ends with 4 times of vector register size at a time. 4. If address of destination > address of source, backward copy 8 times of vector register size at a time. 5. Otherwise, forward copy 8 times of vector register size at a time. 6. Use rep movsb only for forward copy. Avoid slow backward rep movsb by fallbacking to backward copy 8 times of vector register size at a time. 7. Skip when address of destination == address of source. [BZ #19776] * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add memmove-sse2-unaligned-erms, memmove-avx-unaligned-erms and memmove-avx512-unaligned-erms. * sysdeps/x86_64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Test __memmove_chk_avx512_unaligned_2, __memmove_chk_avx512_unaligned_erms, __memmove_chk_avx_unaligned_2, __memmove_chk_avx_unaligned_erms, __memmove_chk_sse2_unaligned_2, __memmove_chk_sse2_unaligned_erms, __memmove_avx_unaligned_2, __memmove_avx_unaligned_erms, __memmove_avx512_unaligned_2, __memmove_avx512_unaligned_erms, __memmove_erms, __memmove_sse2_unaligned_2, __memmove_sse2_unaligned_erms, __memcpy_chk_avx512_unaligned_2, __memcpy_chk_avx512_unaligned_erms, __memcpy_chk_avx_unaligned_2, __memcpy_chk_avx_unaligned_erms, __memcpy_chk_sse2_unaligned_2, __memcpy_chk_sse2_unaligned_erms, __memcpy_avx_unaligned_2, __memcpy_avx_unaligned_erms, __memcpy_avx512_unaligned_2, __memcpy_avx512_unaligned_erms, __memcpy_sse2_unaligned_2, __memcpy_sse2_unaligned_erms, __memcpy_erms, __mempcpy_chk_avx512_unaligned_2, __mempcpy_chk_avx512_unaligned_erms, __mempcpy_chk_avx_unaligned_2, __mempcpy_chk_avx_unaligned_erms, __mempcpy_chk_sse2_unaligned_2, __mempcpy_chk_sse2_unaligned_erms, __mempcpy_avx512_unaligned_2, __mempcpy_avx512_unaligned_erms, __mempcpy_avx_unaligned_2, __mempcpy_avx_unaligned_erms, __mempcpy_sse2_unaligned_2, __mempcpy_sse2_unaligned_erms and __mempcpy_erms. * sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S: New file. * sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S: Likwise. * sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S: Likwise. * sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S: Likwise. |
||
|---|---|---|
| .. | ||
| Makefile | ||
| bcopy.S | ||
| ifunc-defines.sym | ||
| ifunc-impl-list.c | ||
| memcmp-sse4.S | ||
| memcmp-ssse3.S | ||
| memcmp.S | ||
| memcpy-avx-unaligned.S | ||
| memcpy-sse2-unaligned.S | ||
| memcpy-ssse3-back.S | ||
| memcpy-ssse3.S | ||
| memcpy.S | ||
| memcpy_chk.S | ||
| memmove-avx-unaligned-erms.S | ||
| memmove-avx-unaligned.S | ||
| memmove-avx512-no-vzeroupper.S | ||
| memmove-avx512-unaligned-erms.S | ||
| memmove-sse2-unaligned-erms.S | ||
| memmove-ssse3-back.S | ||
| memmove-ssse3.S | ||
| memmove-vec-unaligned-erms.S | ||
| memmove.c | ||
| memmove_chk.c | ||
| mempcpy.S | ||
| mempcpy_chk.S | ||
| memset-avx2.S | ||
| memset-avx512-no-vzeroupper.S | ||
| memset.S | ||
| memset_chk.S | ||
| sched_cpucount.c | ||
| stpcpy-sse2-unaligned.S | ||
| stpcpy-ssse3.S | ||
| stpcpy.S | ||
| stpncpy-c.c | ||
| stpncpy-sse2-unaligned.S | ||
| stpncpy-ssse3.S | ||
| stpncpy.S | ||
| strcasecmp_l-ssse3.S | ||
| strcasecmp_l.S | ||
| strcat-sse2-unaligned.S | ||
| strcat-ssse3.S | ||
| strcat.S | ||
| strchr-sse2-no-bsf.S | ||
| strchr.S | ||
| strcmp-sse2-unaligned.S | ||
| strcmp-sse42.S | ||
| strcmp-ssse3.S | ||
| strcmp.S | ||
| strcpy-sse2-unaligned.S | ||
| strcpy-ssse3.S | ||
| strcpy.S | ||
| strcspn-c.c | ||
| strcspn.S | ||
| strncase_l-ssse3.S | ||
| strncase_l.S | ||
| strncat-c.c | ||
| strncat-sse2-unaligned.S | ||
| strncat-ssse3.S | ||
| strncat.S | ||
| strncmp-ssse3.S | ||
| strncmp.S | ||
| strncpy-c.c | ||
| strncpy-sse2-unaligned.S | ||
| strncpy-ssse3.S | ||
| strncpy.S | ||
| strpbrk-c.c | ||
| strpbrk.S | ||
| strspn-c.c | ||
| strspn.S | ||
| strstr-sse2-unaligned.S | ||
| strstr.c | ||
| test-multiarch.c | ||
| varshift.c | ||
| varshift.h | ||
| wcscpy-c.c | ||
| wcscpy-ssse3.S | ||
| wcscpy.S | ||
| wmemcmp-c.c | ||
| wmemcmp-sse4.S | ||
| wmemcmp-ssse3.S | ||
| wmemcmp.S | ||