150 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			150 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
/* Copyright (C) 2006 Free Software Foundation, Inc.
 | 
						|
 | 
						|
This file is free software; you can redistribute it and/or modify it
 | 
						|
under the terms of the GNU General Public License as published by the
 | 
						|
Free Software Foundation; either version 2, or (at your option) any
 | 
						|
later version.
 | 
						|
 | 
						|
In addition to the permissions in the GNU General Public License, the
 | 
						|
Free Software Foundation gives you unlimited permission to link the
 | 
						|
compiled version of this file into combinations with other programs,
 | 
						|
and to distribute those combinations without any restriction coming
 | 
						|
from the use of this file.  (The General Public License restrictions
 | 
						|
do apply in other respects; for example, they cover modification of
 | 
						|
the file, and distribution when not linked into a combine
 | 
						|
executable.)
 | 
						|
 | 
						|
This file is distributed in the hope that it will be useful, but
 | 
						|
WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
General Public License for more details.
 | 
						|
 | 
						|
You should have received a copy of the GNU General Public License
 | 
						|
along with this program; see the file COPYING.  If not, write to
 | 
						|
the Free Software Foundation, 51 Franklin Street, Fifth Floor,
 | 
						|
Boston, MA 02110-1301, USA.  */
 | 
						|
 | 
						|
/* Moderately Space-optimized libgcc routines for the Renesas SH /
 | 
						|
   STMicroelectronics ST40 CPUs.
 | 
						|
   Contributed by J"orn Rennecke joern.rennecke@st.com.  */
 | 
						|
 | 
						|
/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
 | 
						|
   sh4-200 run times:
 | 
						|
   udiv small divisor: 55 cycles
 | 
						|
   udiv large divisor: 52 cycles
 | 
						|
   sdiv small divisor, positive result: 59 cycles
 | 
						|
   sdiv large divisor, positive result: 56 cycles
 | 
						|
   sdiv small divisor, negative result: 65 cycles (*)
 | 
						|
   sdiv large divisor, negative result: 62 cycles (*)
 | 
						|
   (*): r2 is restored in the rts delay slot and has a lingering latency
 | 
						|
        of two more cycles.  */
 | 
						|
	.balign 4
 | 
						|
	.global	__udivsi3_i4i
 | 
						|
	.global	__udivsi3_i4
 | 
						|
	.set	__udivsi3_i4, __udivsi3_i4i
 | 
						|
	.type	__udivsi3_i4i, @function
 | 
						|
	.type	__sdivsi3_i4i, @function
 | 
						|
__udivsi3_i4i:
 | 
						|
	sts pr,r1
 | 
						|
	mov.l r4,@-r15
 | 
						|
	extu.w r5,r0
 | 
						|
	cmp/eq r5,r0
 | 
						|
	swap.w r4,r0
 | 
						|
	shlr16 r4
 | 
						|
	bf/s large_divisor
 | 
						|
	div0u
 | 
						|
	mov.l r5,@-r15
 | 
						|
	shll16 r5
 | 
						|
sdiv_small_divisor:
 | 
						|
	div1 r5,r4
 | 
						|
	bsr div6
 | 
						|
	div1 r5,r4
 | 
						|
	div1 r5,r4
 | 
						|
	bsr div6
 | 
						|
	div1 r5,r4
 | 
						|
	xtrct r4,r0
 | 
						|
	xtrct r0,r4
 | 
						|
	bsr div7
 | 
						|
	swap.w r4,r4
 | 
						|
	div1 r5,r4
 | 
						|
	bsr div7
 | 
						|
	div1 r5,r4
 | 
						|
	xtrct r4,r0
 | 
						|
	mov.l @r15+,r5
 | 
						|
	swap.w r0,r0
 | 
						|
	mov.l @r15+,r4
 | 
						|
	jmp @r1
 | 
						|
	rotcl r0
 | 
						|
div7:
 | 
						|
	div1 r5,r4
 | 
						|
div6:
 | 
						|
	            div1 r5,r4; div1 r5,r4; div1 r5,r4
 | 
						|
	div1 r5,r4; div1 r5,r4; rts;        div1 r5,r4
 | 
						|
 | 
						|
divx3:
 | 
						|
	rotcl r0
 | 
						|
	div1 r5,r4
 | 
						|
	rotcl r0
 | 
						|
	div1 r5,r4
 | 
						|
	rotcl r0
 | 
						|
	rts
 | 
						|
	div1 r5,r4
 | 
						|
 | 
						|
large_divisor:
 | 
						|
	mov.l r5,@-r15
 | 
						|
sdiv_large_divisor:
 | 
						|
	xor r4,r0
 | 
						|
	.rept 4
 | 
						|
	rotcl r0
 | 
						|
	bsr divx3
 | 
						|
	div1 r5,r4
 | 
						|
	.endr
 | 
						|
	mov.l @r15+,r5
 | 
						|
	mov.l @r15+,r4
 | 
						|
	jmp @r1
 | 
						|
	rotcl r0
 | 
						|
 | 
						|
	.global	__sdivsi3_i4i
 | 
						|
	.global __sdivsi3_i4
 | 
						|
	.global __sdivsi3
 | 
						|
	.set	__sdivsi3_i4, __sdivsi3_i4i
 | 
						|
	.set	__sdivsi3, __sdivsi3_i4i
 | 
						|
__sdivsi3_i4i:
 | 
						|
	mov.l r4,@-r15
 | 
						|
	cmp/pz r5
 | 
						|
	mov.l r5,@-r15
 | 
						|
	bt/s pos_divisor
 | 
						|
	cmp/pz r4
 | 
						|
	neg r5,r5
 | 
						|
	extu.w r5,r0
 | 
						|
	bt/s neg_result
 | 
						|
	cmp/eq r5,r0
 | 
						|
	neg r4,r4
 | 
						|
pos_result:
 | 
						|
	swap.w r4,r0
 | 
						|
	bra sdiv_check_divisor
 | 
						|
	sts pr,r1
 | 
						|
pos_divisor:
 | 
						|
	extu.w r5,r0
 | 
						|
	bt/s pos_result
 | 
						|
	cmp/eq r5,r0
 | 
						|
	neg r4,r4
 | 
						|
neg_result:
 | 
						|
	mova negate_result,r0
 | 
						|
	;
 | 
						|
	mov r0,r1
 | 
						|
	swap.w r4,r0
 | 
						|
	lds r2,macl
 | 
						|
	sts pr,r2
 | 
						|
sdiv_check_divisor:
 | 
						|
	shlr16 r4
 | 
						|
	bf/s sdiv_large_divisor
 | 
						|
	div0u
 | 
						|
	bra sdiv_small_divisor
 | 
						|
	shll16 r5
 | 
						|
	.balign 4
 | 
						|
negate_result:
 | 
						|
	neg r0,r0
 | 
						|
	jmp @r2
 | 
						|
	sts macl,r2
 |