Speed up memmove on x86
authorAndrew Gallatin <gallatin@google.com>
Fri, 6 Jun 2014 21:56:11 +0000 (14:56 -0700)
committerAndrew Gallatin <gallatin@google.com>
Mon, 9 Jun 2014 14:37:12 +0000 (07:37 -0700)
Take an asm bcopy (which has the same overlapping semantics
as memmove) from FreeBSD.  This speeds up netperf by anywhere
from 25-40% on x86_64

Note:  I also brought in a copy for i686, but i686 does not
compile due to other prolems in the tree, so I was unable to test it.

kern/arch/x86/Kbuild
kern/arch/x86/support32.S [new file with mode: 0644]
kern/arch/x86/support64.S [new file with mode: 0644]
kern/src/string.c

index 19aa14b..6c65cae 100644 (file)
@@ -21,6 +21,7 @@ obj-y                                         += pmap.o pmap$(BITS).o
 obj-y                                          += process$(BITS).o
 obj-y                                          += rdtsc_test.o
 obj-y                                          += setjmp$(BITS).o
+obj-y                                          += support$(BITS).o
 obj-y                                          += smp.o
 obj-y                                          += smp_boot.o
 obj-y                                          += smp_entry$(BITS).o
diff --git a/kern/arch/x86/support32.S b/kern/arch/x86/support32.S
new file mode 100644 (file)
index 0000000..c8981a5
--- /dev/null
@@ -0,0 +1,90 @@
+/*-
+ * Copyright (c) 1993 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * bcopy(src, dst, cnt)
+ *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
+ */
+
+.text
+.align 4
+.globl bcopy
+.type bcopy, @function
+bcopy:
+       pushl   %ebp
+       movl    %esp,%ebp
+       pushl   %esi
+       pushl   %edi
+       movl    8(%ebp),%esi
+       movl    12(%ebp),%edi
+       movl    16(%ebp),%ecx
+
+       movl    %edi,%eax
+       subl    %esi,%eax
+       cmpl    %ecx,%eax                       /* overlapping && src < dst? */
+       jb      1f
+
+       shrl    $2,%ecx                         /* copy by 32-bit words */
+       cld                                     /* nope, copy forwards */
+       rep
+       movsl
+       movl    16(%ebp),%ecx
+       andl    $3,%ecx                         /* any bytes left? */
+       rep
+       movsb
+       popl    %edi
+       popl    %esi
+       popl    %ebp
+       ret
+
+       .p2align 2,0x90
+1:
+       addl    %ecx,%edi                       /* copy backwards */
+       addl    %ecx,%esi
+       decl    %edi
+       decl    %esi
+       andl    $3,%ecx                         /* any fractional bytes? */
+       std
+       rep
+       movsb
+       movl    16(%ebp),%ecx                   /* copy remainder by 32-bit words */
+       shrl    $2,%ecx
+       subl    $3,%esi
+       subl    $3,%edi
+       rep
+       movsl
+       popl    %edi
+       popl    %esi
+       cld
+       popl    %ebp
+       ret
+
+.size bcopy,.-bcopy
diff --git a/kern/arch/x86/support64.S b/kern/arch/x86/support64.S
new file mode 100644 (file)
index 0000000..25a7e4b
--- /dev/null
@@ -0,0 +1,83 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm.
+ * Copyright (c) 1993 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+
+
+/*
+ * bcopy(src, dst, cnt)
+ *       rdi, rsi, rdx
+ *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
+ */
+.text
+.align 4
+.globl bcopy
+.type bcopy,  @function
+bcopy:
+       xchgq   %rsi,%rdi
+       movq    %rdx,%rcx
+
+       movq    %rdi,%rax
+       subq    %rsi,%rax
+       cmpq    %rcx,%rax                       /* overlapping && src < dst? */
+       jb      1f
+
+       shrq    $3,%rcx                         /* copy by 64-bit words */
+       cld                                     /* nope, copy forwards */
+       rep
+       movsq
+       movq    %rdx,%rcx
+       andq    $7,%rcx                         /* any bytes left? */
+       rep
+       movsb
+       ret
+
+       /* ALIGN_TEXT */
+1:
+       addq    %rcx,%rdi                       /* copy backwards */
+       addq    %rcx,%rsi
+       decq    %rdi
+       decq    %rsi
+       andq    $7,%rcx                         /* any fractional bytes? */
+       std
+       rep
+       movsb
+       movq    %rdx,%rcx                       /* copy remainder by 32-bit words */
+       shrq    $3,%rcx
+       subq    $7,%rsi
+       subq    $7,%rdi
+       rep
+       movsq
+       cld
+       ret
+.size bcopy,.-bcopy
+
+
index 6142c52..427c4d5 100644 (file)
@@ -272,9 +272,17 @@ memcpy(void* dst, const void* src, size_t _n)
        return dst;
 }
 
+#ifdef CONFIG_X86
+void bcopy(const void *src, void *dst, size_t len);
+#endif
+
 void *
 memmove(void *COUNT(_n) dst, const void *COUNT(_n) src, size_t _n)
 {
+#ifdef CONFIG_X86
+       bcopy(src, dst, _n);
+       return dst;
+#else
        const char *BND(src,src+_n) s;
        char *BND(dst,dst+_n) d;
        size_t n = _n;
@@ -291,6 +299,7 @@ memmove(void *COUNT(_n) dst, const void *COUNT(_n) src, size_t _n)
                        *d++ = *s++;
 
        return dst;
+#endif
 }
 
 int