/* memset.S: optimised assembly memset * * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ .text .p2align 4 ############################################################################### # # void *memset(void *p, char ch, size_t count) # # - NOTE: must not use any stack. exception detection performs function return # to caller's fixup routine, aborting the remainder of the set # GR4, GR7, GR8, and GR11 must be managed # ############################################################################### .globl memset,__memset_end .type memset,@function memset: orcc.p gr10,gr0,gr5,icc3 ; GR5 = count andi gr9,#0xff,gr9 or.p gr8,gr0,gr4 ; GR4 = address beqlr icc3,#0 # conditionally write a byte to 2b-align the address setlos.p #1,gr6 andicc gr4,#1,gr0,icc0 ckne icc0,cc7 cstb.p gr9,@(gr4,gr0) ,cc7,#1 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 cadd.p gr4,gr6,gr4 ,cc7,#1 beqlr icc3,#0 # conditionally write a word to 4b-align the address andicc.p gr4,#2,gr0,icc0 subicc gr5,#2,gr0,icc1 setlos.p #2,gr6 ckne icc0,cc7 slli.p gr9,#8,gr12 ; need to double up the pattern cknc icc1,cc5 or.p gr9,gr12,gr12 andcr cc7,cc5,cc7 csth.p gr12,@(gr4,gr0) ,cc7,#1 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 cadd.p gr4,gr6,gr4 ,cc7,#1 beqlr icc3,#0 # conditionally write a dword to 8b-align the address andicc.p gr4,#4,gr0,icc0 subicc gr5,#4,gr0,icc1 setlos.p #4,gr6 ckne icc0,cc7 slli.p gr12,#16,gr13 ; need to quadruple-up the pattern cknc icc1,cc5 or.p gr13,gr12,gr12 andcr cc7,cc5,cc7 cst.p gr12,@(gr4,gr0) ,cc7,#1 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 cadd.p gr4,gr6,gr4 ,cc7,#1 beqlr icc3,#0 or.p gr12,gr12,gr13 ; need to octuple-up the pattern # the address is now 8b-aligned - loop around writing 64b chunks setlos #8,gr7 subi.p gr4,#8,gr4 ; store with update index does weird stuff setlos #64,gr6 subicc gr5,#64,gr0,icc0 0: cknc icc0,cc7 cstdu gr12,@(gr4,gr7) ,cc7,#1 cstdu gr12,@(gr4,gr7) ,cc7,#1 cstdu gr12,@(gr4,gr7) ,cc7,#1 cstdu gr12,@(gr4,gr7) ,cc7,#1 cstdu gr12,@(gr4,gr7) ,cc7,#1 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 subicc gr5,#64,gr0,icc0 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 beqlr icc3,#0 bnc icc0,#2,0b # now do 32-byte remnant subicc.p gr5,#32,gr0,icc0 setlos #32,gr6 cknc icc0,cc7 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 setlos #16,gr6 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 subicc gr5,#16,gr0,icc0 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 beqlr icc3,#0 # now do 16-byte remnant cknc icc0,cc7 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 beqlr icc3,#0 # now do 8-byte remnant subicc gr5,#8,gr0,icc1 cknc icc1,cc7 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 setlos.p #4,gr7 beqlr icc3,#0 # now do 4-byte remnant subicc gr5,#4,gr0,icc0 addi.p gr4,#4,gr4 cknc icc0,cc7 cstu.p gr12,@(gr4,gr7) ,cc7,#1 csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 subicc.p gr5,#2,gr0,icc1 beqlr icc3,#0 # now do 2-byte remnant setlos #2,gr7 addi.p gr4,#2,gr4 cknc icc1,cc7 csthu.p gr12,@(gr4,gr7) ,cc7,#1 csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 subicc.p gr5,#1,gr0,icc0 beqlr icc3,#0 # now do 1-byte remnant setlos #0,gr7 addi.p gr4,#2,gr4 cknc icc0,cc7 cstb.p gr12,@(gr4,gr0) ,cc7,#1 bralr __memset_end: .size memset, __memset_end-memset ############################################################################### # # clear memory in userspace # - return the number of bytes that could not be cleared (0 on complete success) # # long __memset_user(void *p, size_t count) # ############################################################################### .globl __memset_user, __memset_user_error_lr, __memset_user_error_handler .type __memset_user,@function __memset_user: movsg lr,gr11 # abuse memset to do the dirty work or.p gr9,gr9,gr10 setlos #0,gr9 call memset __memset_user_error_lr: jmpl.p @(gr11,gr0) setlos #0,gr8 # deal any exception generated by memset # GR4 - memset's address tracking pointer # GR7 - memset's step value (index register for store insns) # GR8 - memset's original start address # GR10 - memset's original count __memset_user_error_handler: add.p gr4,gr7,gr4 add gr8,gr10,gr8 jmpl.p @(gr11,gr0) sub gr8,gr4,gr8 ; we return the amount left uncleared .size __memset_user, .-__memset_user