/* Copyright 2002 Andi Kleen, SuSE Labs. * Subject to the GNU Public License v2. * * Functions to copy from and to user space. */ #include #include #define FIX_ALIGNMENT 1 #include #include #include #include .macro ALTERNATIVE_JUMP feature,orig,alt 0: .byte 0xe9 /* 32bit jump */ .long \orig-1f /* by default jump to orig */ 1: .section .altinstr_replacement,"ax" 2: .byte 0xe9 /* near jump with 32bit immediate */ .long \alt-1b /* offset */ /* or alternatively to alt */ .previous .section .altinstructions,"a" .align 8 .quad 0b .quad 2b .byte \feature /* when feature is set */ .byte 5 .byte 5 .previous .endm /* Standard copy_to_user with segment limit checking */ ENTRY(copy_to_user) CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rdi,%rcx addq %rdx,%rcx jc bad_to_user cmpq threadinfo_addr_limit(%rax),%rcx jae bad_to_user xorl %eax,%eax /* clear zero flag */ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC ENTRY(copy_user_generic) CFI_STARTPROC movl $1,%ecx /* set zero flag */ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC ENTRY(__copy_from_user_inatomic) CFI_STARTPROC xorl %ecx,%ecx /* clear zero flag */ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC /* Standard copy_from_user with segment limit checking */ ENTRY(copy_from_user) CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rsi,%rcx addq %rdx,%rcx jc bad_from_user cmpq threadinfo_addr_limit(%rax),%rcx jae bad_from_user movl $1,%ecx /* set zero flag */ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC ENDPROC(copy_from_user) .section .fixup,"ax" /* must zero dest */ bad_from_user: CFI_STARTPROC movl %edx,%ecx xorl %eax,%eax rep stosb bad_to_user: movl %edx,%eax ret CFI_ENDPROC END(bad_from_user) .previous /* * copy_user_generic_unrolled - memory copy with exception handling. * This version is for CPUs like P4 that don't have efficient micro code for rep movsq * * Input: * rdi destination * rsi source * rdx count * ecx zero flag -- if true zero destination on error * * Output: * eax uncopied bytes or 0 if successful. */ ENTRY(copy_user_generic_unrolled) CFI_STARTPROC pushq %rbx CFI_ADJUST_CFA_OFFSET 8 CFI_REL_OFFSET rbx, 0 pushq %rcx CFI_ADJUST_CFA_OFFSET 8 CFI_REL_OFFSET rcx, 0 xorl %eax,%eax /*zero for the exception handler */ #ifdef FIX_ALIGNMENT /* check for bad alignment of destination */ movl %edi,%ecx andl $7,%ecx jnz .Lbad_alignment .Lafter_bad_alignment: #endif movq %rdx,%rcx movl $64,%ebx shrq $6,%rdx decq %rdx js .Lhandle_tail .p2align 4 .Lloop: .Ls1: movq (%rsi),%r11 .Ls2: movq 1*8(%rsi),%r8 .Ls3: movq 2*8(%rsi),%r9 .Ls4: movq 3*8(%rsi),%r10 .Ld1: movq %r11,(%rdi) .Ld2: movq %r8,1*8(%rdi) .Ld3: movq %r9,2*8(%rdi) .Ld4: movq %r10,3*8(%rdi) .Ls5: movq 4*8(%rsi),%r11 .Ls6: movq 5*8(%rsi),%r8 .Ls7: movq 6*8(%rsi),%r9 .Ls8: movq 7*8(%rsi),%r10 .Ld5: movq %r11,4*8(%rdi) .Ld6: movq %r8,5*8(%rdi) .Ld7: movq %r9,6*8(%rdi) .Ld8: movq %r10,7*8(%rdi) decq %rdx leaq 64(%rsi),%rsi leaq 64(%rdi),%rdi jns .Lloop .p2align 4 .Lhandle_tail: movl %ecx,%edx andl $63,%ecx shrl $3,%ecx jz .Lhandle_7 movl $8,%ebx .p2align 4 .Lloop_8: .Ls9: movq (%rsi),%r8 .Ld9: movq %r8,(%rdi) decl %ecx leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jnz .Lloop_8 .Lhandle_7: movl %edx,%ecx andl $7,%ecx jz .Lende .p2align 4 .Lloop_1: .Ls10: movb (%rsi),%bl .Ld10: movb %bl,(%rdi) incq %rdi incq %rsi decl %ecx jnz .Lloop_1 CFI_REMEMBER_STATE .Lende: popq %rcx CFI_ADJUST_CFA_OFFSET -8 CFI_RESTORE rcx popq %rbx CFI_ADJUST_CFA_OFFSET -8 CFI_RESTORE rbx ret CFI_RESTORE_STATE #ifdef FIX_ALIGNMENT /* align destination */ .p2align 4 .Lbad_alignment: movl $8,%r9d subl %ecx,%r9d movl %r9d,%ecx cmpq %r9,%rdx jz .Lhandle_7 js .Lhandle_7 .Lalign_1: .Ls11: movb (%rsi),%bl .Ld11: movb %bl,(%rdi) incq %rsi incq %rdi decl %ecx jnz .Lalign_1 subq %r9,%rdx jmp .Lafter_bad_alignment #endif /* table sorted by exception address */ .section __ex_table,"a" .align 8 .quad .Ls1,.Ls1e /* Ls1-Ls4 have copied zero bytes */ .quad .Ls2,.Ls1e .quad .Ls3,.Ls1e .quad .Ls4,.Ls1e .quad .Ld1,.Ls1e /* Ld1-Ld4 have copied 0-24 bytes */ .quad .Ld2,.Ls2e .quad .Ld3,.Ls3e .quad .Ld4,.Ls4e .quad .Ls5,.Ls5e /* Ls5-Ls8 have copied 32 bytes */ .quad .Ls6,.Ls5e .quad .Ls7,.Ls5e .quad .Ls8,.Ls5e .quad .Ld5,.Ls5e /* Ld5-Ld8 have copied 32-56 bytes */ .quad .Ld6,.Ls6e .quad .Ld7,.Ls7e .quad .Ld8,.Ls8e .quad .Ls9,.Le_quad .quad .Ld9,.Le_quad .quad .Ls10,.Le_byte .quad .Ld10,.Le_byte #ifdef FIX_ALIGNMENT .quad .Ls11,.Lzero_rest .quad .Ld11,.Lzero_rest #endif .quad .Le5,.Le_zero .previous /* eax: zero, ebx: 64 */ .Ls1e: addl $8,%eax /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */ .Ls2e: addl $8,%eax .Ls3e: addl $8,%eax .Ls4e: addl $8,%eax .Ls5e: addl $8,%eax .Ls6e: addl $8,%eax .Ls7e: addl $8,%eax .Ls8e: addl $8,%eax addq %rbx,%rdi /* +64 */ subq %rax,%rdi /* correct destination with computed offset */ shlq $6,%rdx /* loop counter * 64 (stride length) */ addq %rax,%rdx /* add offset to loopcnt */ andl $63,%ecx /* remaining bytes */ addq %rcx,%rdx /* add them */ jmp .Lzero_rest /* exception on quad word loop in tail handling */ /* ecx: loopcnt/8, %edx: length, rdi: correct */ .Le_quad: shll $3,%ecx andl $7,%edx addl %ecx,%edx /* edx: bytes to zero, rdi: dest, eax:zero */ .Lzero_rest: cmpl $0,(%rsp) jz .Le_zero movq %rdx,%rcx .Le_byte: xorl %eax,%eax .Le5: rep stosb /* when there is another exception while zeroing the rest just return */ .Le_zero: movq %rdx,%rax jmp .Lende CFI_ENDPROC ENDPROC(copy_user_generic) /* Some CPUs run faster using the string copy instructions. This is also a lot simpler. Use them when possible. Patch in jmps to this code instead of copying it fully to avoid unwanted aliasing in the exception tables. */ /* rdi destination * rsi source * rdx count * ecx zero flag * * Output: * eax uncopied bytes or 0 if successfull. * * Only 4GB of copy is supported. This shouldn't be a problem * because the kernel normally only writes from/to page sized chunks * even if user space passed a longer buffer. * And more would be dangerous because both Intel and AMD have * errata with rep movsq > 4GB. If someone feels the need to fix * this please consider this. */ ENTRY(copy_user_generic_string) CFI_STARTPROC movl %ecx,%r8d /* save zero flag */ movl %edx,%ecx shrl $3,%ecx andl $7,%edx jz 10f 1: rep movsq movl %edx,%ecx 2: rep movsb 9: movl %ecx,%eax ret /* multiple of 8 byte */ 10: rep movsq xor %eax,%eax ret /* exception handling */ 3: lea (%rdx,%rcx,8),%rax /* exception on quad loop */ jmp 6f 5: movl %ecx,%eax /* exception on byte loop */ /* eax: left over bytes */ 6: testl %r8d,%r8d /* zero flag set? */ jz 7f movl %eax,%ecx /* initialize x86 loop counter */ push %rax xorl %eax,%eax 8: rep stosb /* zero the rest */ 11: pop %rax 7: ret CFI_ENDPROC END(copy_user_generic_c) .section __ex_table,"a" .quad 1b,3b .quad 2b,5b .quad 8b,11b .quad 10b,3b .previous