219 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			ArmAsm
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			219 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			ArmAsm
		
	
	
		
			Executable File
		
	
	
	
	
| /*
 | |
|  * Copyright (C) 2008 The Android Open Source Project
 | |
|  * All rights reserved.
 | |
|  *
 | |
|  * Redistribution and use in source and binary forms, with or without
 | |
|  * modification, are permitted provided that the following conditions
 | |
|  * are met:
 | |
|  *  * Redistributions of source code must retain the above copyright
 | |
|  *    notice, this list of conditions and the following disclaimer.
 | |
|  *  * Redistributions in binary form must reproduce the above copyright
 | |
|  *    notice, this list of conditions and the following disclaimer in
 | |
|  *    the documentation and/or other materials provided with the
 | |
|  *    distribution.
 | |
|  *
 | |
|  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 | |
|  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 | |
|  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 | |
|  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 | |
|  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 | |
|  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 | |
|  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 | |
|  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 | |
|  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | |
|  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 | |
|  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 | |
|  * SUCH DAMAGE.
 | |
|  */
 | |
| 
 | |
| #ifndef ALIGN
 | |
| #define ALIGN .align            4
 | |
| #endif
 | |
| 
 | |
| #ifndef ENTRY
 | |
| #define ENTRY(name) \
 | |
| 	.globl name; \
 | |
| 	ALIGN; \
 | |
| name:
 | |
| #endif
 | |
| 
 | |
| #ifndef WEAK
 | |
| #define WEAK(name) \
 | |
| 	.weak name; \
 | |
| name:
 | |
| #endif
 | |
| 
 | |
| #ifndef END
 | |
| #define END(name) \
 | |
| 	.size name, .-name
 | |
| #endif
 | |
| 
 | |
| #define ENDPROC(name) \
 | |
| 	.type name, %function; \
 | |
| 	END(name)
 | |
| 
 | |
| #if defined(__ARM_NEON__)
 | |
|     .fpu    neon
 | |
| #endif
 | |
| 
 | |
| ENTRY(bzero)
 | |
|         mov     r2, r1
 | |
|         mov     r1, #0
 | |
|         // Fall through to memset...
 | |
| END(bzero)
 | |
| 
 | |
| ENTRY(memset)
 | |
| 	.fnstart
 | |
| #if defined(__ARM_NEON__)
 | |
| 
 | |
| #ifdef  NEON_MEMSET_DIVIDER
 | |
|         cmp         r2, #NEON_MEMSET_DIVIDER
 | |
|         bhi         11f
 | |
| #endif
 | |
|         .save       {r0}
 | |
|         stmfd       sp!, {r0}
 | |
| 
 | |
|         vdup.8      q0, r1
 | |
| 
 | |
| #ifndef NEON_UNALIGNED_ACCESS
 | |
|         /* do we have at least 16-bytes to write (needed for alignment below) */
 | |
|         cmp         r2, #16
 | |
|         blo         3f
 | |
| 
 | |
|         /* align destination to 16 bytes for the write-buffer */
 | |
|         rsb         r3, r0, #0
 | |
|         ands        r3, r3, #0xF
 | |
|         beq         2f
 | |
| 
 | |
|         /* write up to 15-bytes (count in r3) */
 | |
|         sub         r2, r2, r3
 | |
|         movs        ip, r3, lsl #31
 | |
|         strmib      r1, [r0], #1
 | |
|         strcsb      r1, [r0], #1
 | |
|         strcsb      r1, [r0], #1
 | |
|         movs        ip, r3, lsl #29
 | |
|         bge         1f
 | |
| 
 | |
|         // writes 4 bytes, 32-bits aligned
 | |
|         vst1.32     {d0[0]}, [r0, :32]!
 | |
| 1:      bcc         2f
 | |
| 
 | |
|         // writes 8 bytes, 64-bits aligned
 | |
|         vst1.8      {d0}, [r0, :64]!
 | |
| 2:
 | |
| #endif
 | |
|         /* make sure we have at least 32 bytes to write */
 | |
|         subs        r2, r2, #32
 | |
|         blo         2f
 | |
|         vmov        q1, q0
 | |
| 
 | |
| 1:      /* The main loop writes 32 bytes at a time */
 | |
|         subs        r2, r2, #32
 | |
| #ifndef NEON_UNALIGNED_ACCESS
 | |
|         vst1.8      {d0 - d3}, [r0, :128]!
 | |
| #else
 | |
|         vst1.8      {d0 - d3}, [r0]!
 | |
| #endif
 | |
|         bhs         1b
 | |
| 
 | |
| 2:      /* less than 32 left */
 | |
|         add         r2, r2, #32
 | |
|         tst         r2, #0x10
 | |
|         beq         3f
 | |
| 
 | |
|         // writes 16 bytes, 128-bits aligned
 | |
| #ifndef NEON_UNALIGNED_ACCESS
 | |
|         vst1.8      {d0, d1}, [r0, :128]!
 | |
| #else
 | |
|         vst1.8      {d0, d1}, [r0]!
 | |
| #endif
 | |
| 3:      /* write up to 15-bytes (count in r2) */
 | |
|         movs        ip, r2, lsl #29
 | |
|         bcc         1f
 | |
|         vst1.8      {d0}, [r0]!
 | |
| 1:      bge         2f
 | |
|         vst1.32     {d0[0]}, [r0]!
 | |
| 2:      movs        ip, r2, lsl #31
 | |
|         strmib      r1, [r0], #1
 | |
|         strcsb      r1, [r0], #1
 | |
|         strcsb      r1, [r0], #1
 | |
|         ldmfd       sp!, {r0}
 | |
|         bx          lr
 | |
| 11:
 | |
| #endif
 | |
| 
 | |
|         /*
 | |
|          * Optimized memset() for ARM.
 | |
|          *
 | |
|          * memset() returns its first argument.
 | |
|          */
 | |
| 
 | |
| 		/* compute the offset to align the destination
 | |
| 		 * offset = (4-(src&3))&3 = -src & 3
 | |
| 		 */
 | |
| 
 | |
|         .save       {r0, r4-r7, lr}
 | |
| 		stmfd		sp!, {r0, r4-r7, lr}
 | |
| 		rsb			r3, r0, #0
 | |
| 		ands		r3, r3, #3
 | |
|         cmp         r3, r2
 | |
|         movhi       r3, r2
 | |
| 
 | |
|         /* splat r1 */
 | |
|         mov         r1, r1, lsl #24
 | |
|         orr         r1, r1, r1, lsr #8
 | |
|         orr         r1, r1, r1, lsr #16
 | |
| 
 | |
| 		movs		r12, r3, lsl #31
 | |
| 		strcsb		r1, [r0], #1    /* can't use strh (alignment unknown) */
 | |
| 		strcsb		r1, [r0], #1
 | |
| 		strmib		r1, [r0], #1
 | |
| 		subs		r2, r2, r3
 | |
|         ldmlsfd     sp!, {r0, r4-r7, lr}   /* return */
 | |
|         bxls        lr
 | |
| 
 | |
| 		/* align the destination to a cache-line */
 | |
|         mov         r12, r1
 | |
|         mov         lr, r1
 | |
|         mov         r4, r1
 | |
|         mov         r5, r1
 | |
|         mov         r6, r1
 | |
|         mov         r7, r1
 | |
| 
 | |
| 		rsb         r3, r0, #0
 | |
| 		ands		r3, r3, #0x1C
 | |
| 		beq         3f
 | |
| 		cmp         r3, r2
 | |
| 		andhi		r3, r2, #0x1C
 | |
| 		sub         r2, r2, r3
 | |
| 
 | |
| 		/* conditionally writes 0 to 7 words (length in r3) */
 | |
| 		movs		r3, r3, lsl #28
 | |
| 		stmcsia		r0!, {r1, lr}
 | |
| 		stmcsia		r0!, {r1, lr}
 | |
| 		stmmiia		r0!, {r1, lr}
 | |
| 		movs		r3, r3, lsl #2
 | |
|         strcs       r1, [r0], #4
 | |
| 
 | |
| 3:
 | |
|         subs        r2, r2, #32
 | |
|         mov         r3, r1
 | |
|         bmi         2f
 | |
| 1:      subs        r2, r2, #32
 | |
|         stmia		r0!, {r1,r3,r4,r5,r6,r7,r12,lr}
 | |
|         bhs         1b
 | |
| 2:      add         r2, r2, #32
 | |
| 
 | |
| 		/* conditionally stores 0 to 31 bytes */
 | |
| 		movs		r2, r2, lsl #28
 | |
| 		stmcsia		r0!, {r1,r3,r12,lr}
 | |
| 		stmmiia		r0!, {r1, lr}
 | |
| 		movs		r2, r2, lsl #2
 | |
|         strcs       r1, [r0], #4
 | |
| 		strmih		r1, [r0], #2
 | |
| 		movs		r2, r2, lsl #2
 | |
| 		strcsb		r1, [r0]
 | |
|         ldmfd		sp!, {r0, r4-r7, lr}
 | |
|         bx          lr
 | |
| END(memset)
 | 
