b6cbf7203b
This patch imports the unmodified current version of NetBSD libc. The NetBSD includes are in /nbsd_include, while the libc code itself is split between lib/nbsd_libc and common/lib/libc.
298 lines
6.8 KiB
ArmAsm
298 lines
6.8 KiB
ArmAsm
/* $NetBSD: memset.S,v 1.2 2008/02/16 17:37:13 apb Exp $ */
|
|
|
|
/*-
|
|
* Copyright (c) 2002 SHIMIZU Ryo. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. The name of the author may not be used to endorse or promote products
|
|
* derived from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <machine/asm.h>
|
|
|
|
#if defined(LIBC_SCCS) && !defined(lint)
|
|
RCSID("$NetBSD: memset.S,v 1.2 2008/02/16 17:37:13 apb Exp $")
|
|
#endif
|
|
|
|
#define REG_PTR r0
|
|
#define REG_TMP1 r1
|
|
|
|
#ifdef BZERO
|
|
# define REG_C r2
|
|
# define REG_DST r4
|
|
# define REG_LEN r5
|
|
#else
|
|
# define REG_DST0 r3
|
|
# define REG_DST r4
|
|
# define REG_C r5
|
|
# define REG_LEN r6
|
|
#endif
|
|
|
|
#ifdef BZERO
|
|
ENTRY(bzero)
|
|
#else
|
|
ENTRY(memset)
|
|
mov REG_DST,REG_DST0 /* for return value */
|
|
#endif
|
|
/* small amount to fill ? */
|
|
mov #28,REG_TMP1
|
|
cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */
|
|
bt/s large
|
|
mov #12,REG_TMP1 /* if (len >= 12) goto small; */
|
|
cmp/hs REG_TMP1,REG_LEN
|
|
bt/s small
|
|
#ifdef BZERO
|
|
mov #0,REG_C
|
|
#endif
|
|
/* very little fill (0 ~ 11 bytes) */
|
|
tst REG_LEN,REG_LEN
|
|
add REG_DST,REG_LEN
|
|
bt/s done
|
|
add #1,REG_DST
|
|
|
|
/* unroll 4 loops */
|
|
cmp/eq REG_DST,REG_LEN
|
|
1: mov.b REG_C,@-REG_LEN
|
|
bt/s done
|
|
cmp/eq REG_DST,REG_LEN
|
|
mov.b REG_C,@-REG_LEN
|
|
bt/s done
|
|
cmp/eq REG_DST,REG_LEN
|
|
mov.b REG_C,@-REG_LEN
|
|
bt/s done
|
|
cmp/eq REG_DST,REG_LEN
|
|
mov.b REG_C,@-REG_LEN
|
|
bf/s 1b
|
|
cmp/eq REG_DST,REG_LEN
|
|
done:
|
|
#ifdef BZERO
|
|
rts
|
|
nop
|
|
#else
|
|
rts
|
|
mov REG_DST0,r0
|
|
#endif
|
|
|
|
|
|
small:
|
|
mov REG_DST,r0
|
|
tst #1,r0
|
|
bt/s small_aligned
|
|
mov REG_DST,REG_TMP1
|
|
shll REG_LEN
|
|
mova 1f,r0 /* 1f must be 4bytes aligned! */
|
|
add #16,REG_TMP1 /* REG_TMP1 = dst+16; */
|
|
sub REG_LEN,r0
|
|
jmp @r0
|
|
mov REG_C,r0
|
|
|
|
.align 2
|
|
mov.b r0,@(15,REG_TMP1)
|
|
mov.b r0,@(14,REG_TMP1)
|
|
mov.b r0,@(13,REG_TMP1)
|
|
mov.b r0,@(12,REG_TMP1)
|
|
mov.b r0,@(11,REG_TMP1)
|
|
mov.b r0,@(10,REG_TMP1)
|
|
mov.b r0,@(9,REG_TMP1)
|
|
mov.b r0,@(8,REG_TMP1)
|
|
mov.b r0,@(7,REG_TMP1)
|
|
mov.b r0,@(6,REG_TMP1)
|
|
mov.b r0,@(5,REG_TMP1)
|
|
mov.b r0,@(4,REG_TMP1)
|
|
mov.b r0,@(3,REG_TMP1)
|
|
mov.b r0,@(2,REG_TMP1)
|
|
mov.b r0,@(1,REG_TMP1)
|
|
mov.b r0,@REG_TMP1
|
|
mov.b r0,@(15,REG_DST)
|
|
mov.b r0,@(14,REG_DST)
|
|
mov.b r0,@(13,REG_DST)
|
|
mov.b r0,@(12,REG_DST)
|
|
mov.b r0,@(11,REG_DST)
|
|
mov.b r0,@(10,REG_DST)
|
|
mov.b r0,@(9,REG_DST)
|
|
mov.b r0,@(8,REG_DST)
|
|
mov.b r0,@(7,REG_DST)
|
|
mov.b r0,@(6,REG_DST)
|
|
mov.b r0,@(5,REG_DST)
|
|
mov.b r0,@(4,REG_DST)
|
|
mov.b r0,@(3,REG_DST)
|
|
mov.b r0,@(2,REG_DST)
|
|
mov.b r0,@(1,REG_DST)
|
|
#ifdef BZERO
|
|
rts
|
|
1: mov.b r0,@REG_DST
|
|
#else
|
|
mov.b r0,@REG_DST
|
|
1: rts
|
|
mov REG_DST0,r0
|
|
#endif
|
|
|
|
|
|
/* 2 bytes aligned small fill */
|
|
small_aligned:
|
|
#ifndef BZERO
|
|
extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */
|
|
shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */
|
|
or REG_TMP1,REG_C /* REG_C = ????xxxx */
|
|
#endif
|
|
|
|
mov REG_LEN,r0
|
|
tst #1,r0 /* len is aligned? */
|
|
bt/s 1f
|
|
add #-1,r0
|
|
mov.b REG_C,@(r0,REG_DST) /* fill last a byte */
|
|
mov r0,REG_LEN
|
|
1:
|
|
|
|
mova 1f,r0 /* 1f must be 4bytes aligned! */
|
|
sub REG_LEN,r0
|
|
jmp @r0
|
|
mov REG_C,r0
|
|
|
|
.align 2
|
|
mov.w r0,@(30,REG_DST)
|
|
mov.w r0,@(28,REG_DST)
|
|
mov.w r0,@(26,REG_DST)
|
|
mov.w r0,@(24,REG_DST)
|
|
mov.w r0,@(22,REG_DST)
|
|
mov.w r0,@(20,REG_DST)
|
|
mov.w r0,@(18,REG_DST)
|
|
mov.w r0,@(16,REG_DST)
|
|
mov.w r0,@(14,REG_DST)
|
|
mov.w r0,@(12,REG_DST)
|
|
mov.w r0,@(10,REG_DST)
|
|
mov.w r0,@(8,REG_DST)
|
|
mov.w r0,@(6,REG_DST)
|
|
mov.w r0,@(4,REG_DST)
|
|
mov.w r0,@(2,REG_DST)
|
|
#ifdef BZERO
|
|
rts
|
|
1: mov.w r0,@REG_DST
|
|
#else
|
|
mov.w r0,@REG_DST
|
|
1: rts
|
|
mov REG_DST0,r0
|
|
#endif
|
|
|
|
|
|
|
|
.align 2
|
|
large:
|
|
#ifdef BZERO
|
|
mov #0,REG_C
|
|
#else
|
|
extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */
|
|
shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */
|
|
or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */
|
|
swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */
|
|
xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */
|
|
#endif
|
|
|
|
mov #3,REG_TMP1
|
|
tst REG_TMP1,REG_DST
|
|
mov REG_DST,REG_PTR
|
|
bf/s unaligned_dst
|
|
add REG_LEN,REG_PTR /* REG_PTR = dst + len; */
|
|
tst REG_TMP1,REG_LEN
|
|
bf/s unaligned_len
|
|
|
|
aligned:
|
|
/* fill 32*n bytes */
|
|
mov #32,REG_TMP1
|
|
cmp/hi REG_LEN,REG_TMP1
|
|
bt 9f
|
|
.align 2
|
|
1: sub REG_TMP1,REG_PTR
|
|
mov.l REG_C,@REG_PTR
|
|
sub REG_TMP1,REG_LEN
|
|
mov.l REG_C,@(4,REG_PTR)
|
|
cmp/hi REG_LEN,REG_TMP1
|
|
mov.l REG_C,@(8,REG_PTR)
|
|
mov.l REG_C,@(12,REG_PTR)
|
|
mov.l REG_C,@(16,REG_PTR)
|
|
mov.l REG_C,@(20,REG_PTR)
|
|
mov.l REG_C,@(24,REG_PTR)
|
|
bf/s 1b
|
|
mov.l REG_C,@(28,REG_PTR)
|
|
9:
|
|
|
|
/* fill left 4*n bytes */
|
|
cmp/eq REG_DST,REG_PTR
|
|
bt 9f
|
|
add #4,REG_DST
|
|
cmp/eq REG_DST,REG_PTR
|
|
1: mov.l REG_C,@-REG_PTR
|
|
bt/s 9f
|
|
cmp/eq REG_DST,REG_PTR
|
|
mov.l REG_C,@-REG_PTR
|
|
bt/s 9f
|
|
cmp/eq REG_DST,REG_PTR
|
|
mov.l REG_C,@-REG_PTR
|
|
bt/s 9f
|
|
cmp/eq REG_DST,REG_PTR
|
|
mov.l REG_C,@-REG_PTR
|
|
bf/s 1b
|
|
cmp/eq REG_DST,REG_PTR
|
|
9:
|
|
#ifdef BZERO
|
|
rts
|
|
nop
|
|
#else
|
|
rts
|
|
mov REG_DST0,r0
|
|
#endif
|
|
|
|
|
|
unaligned_dst:
|
|
mov #1,REG_TMP1
|
|
tst REG_TMP1,REG_DST /* if (dst & 1) { */
|
|
add #1,REG_TMP1
|
|
bt/s 2f
|
|
tst REG_TMP1,REG_DST
|
|
mov.b REG_C,@REG_DST /* *dst++ = c; */
|
|
add #1,REG_DST
|
|
tst REG_TMP1,REG_DST
|
|
2: /* } */
|
|
/* if (dst & 2) { */
|
|
bt 4f
|
|
mov.w REG_C,@REG_DST /* *(uint16_t*)dst++ = c; */
|
|
add #2,REG_DST
|
|
4: /* } */
|
|
|
|
|
|
tst #3,REG_PTR /* if (ptr & 3) { */
|
|
bt/s 4f /* */
|
|
unaligned_len:
|
|
tst #1,REG_PTR /* if (ptr & 1) { */
|
|
bt/s 2f
|
|
tst #2,REG_PTR
|
|
mov.b REG_C,@-REG_PTR /* --ptr = c; */
|
|
2: /* } */
|
|
/* if (ptr & 2) { */
|
|
bt 4f
|
|
mov.w REG_C,@-REG_PTR /* *--(uint16_t*)ptr = c; */
|
|
4: /* } */
|
|
/* } */
|
|
|
|
mov REG_PTR,REG_LEN
|
|
bra aligned
|
|
sub REG_DST,REG_LEN
|
|
|