420 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			420 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| ;
 | |
| ;  linux/arch/c6x/lib/csum_64plus.s
 | |
| ;
 | |
| ;  Port on Texas Instruments TMS320C6x architecture
 | |
| ;
 | |
| ;  Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated
 | |
| ;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
 | |
| ;
 | |
| ;  This program is free software; you can redistribute it and/or modify
 | |
| ;  it under the terms of the GNU General Public License version 2 as
 | |
| ;  published by the Free Software Foundation.
 | |
| ;
 | |
| #include <linux/linkage.h>
 | |
| 
 | |
| ;
 | |
| ;unsigned int csum_partial_copy(const char *src, char * dst,
 | |
| ;				int len, int sum)
 | |
| ;
 | |
| ; A4:	src
 | |
| ; B4:	dst
 | |
| ; A6:	len
 | |
| ; B6:	sum
 | |
| ; return csum in A4
 | |
| ;
 | |
| 
 | |
| 	.text
 | |
| ENTRY(csum_partial_copy)
 | |
| 	MVC	.S2	ILC,B30
 | |
| 
 | |
| 	MV	.D1X	B6,A31		; given csum
 | |
| 	ZERO	.D1	A9		; csum (a side)
 | |
| ||	ZERO	.D2	B9		; csum (b side)
 | |
| ||	SHRU	.S2X	A6,2,B5		; len / 4
 | |
| 
 | |
| 	;; Check alignment and size
 | |
| 	AND	.S1	3,A4,A1
 | |
| ||	AND	.S2	3,B4,B0
 | |
| 	OR	.L2X	B0,A1,B0	; non aligned condition
 | |
| ||	MVC	.S2	B5,ILC
 | |
| ||	MVK	.D2	1,B2
 | |
| ||	MV	.D1X	B5,A1		; words condition
 | |
|   [!A1]	B	.S1	L8
 | |
|    [B0] BNOP	.S1	L6,5
 | |
| 
 | |
| 	SPLOOP		1
 | |
| 
 | |
| 	;; Main loop for aligned words
 | |
| 	LDW	.D1T1	*A4++,A7
 | |
| 	NOP	4
 | |
| 	MV	.S2X	A7,B7
 | |
| ||	EXTU	.S1	A7,0,16,A16
 | |
| 	STW	.D2T2	B7,*B4++
 | |
| ||	MPYU	.M2	B7,B2,B8
 | |
| ||	ADD	.L1	A16,A9,A9
 | |
| 	NOP
 | |
| 	SPKERNEL	8,0
 | |
| ||	ADD	.L2	B8,B9,B9
 | |
| 
 | |
| 	ZERO	.D1	A1
 | |
| ||	ADD	.L1X	A9,B9,A9	;  add csum from a and b sides
 | |
| 
 | |
| L6:
 | |
|   [!A1]	BNOP	.S1	L8,5
 | |
| 
 | |
| 	;; Main loop for non-aligned words
 | |
| 	SPLOOP		2
 | |
|  ||	MVK	.L1	1,A2
 | |
| 
 | |
| 	LDNW	.D1T1	*A4++,A7
 | |
| 	NOP		3
 | |
| 
 | |
| 	NOP
 | |
| 	MV	.S2X	A7,B7
 | |
|  ||	EXTU	.S1	A7,0,16,A16
 | |
|  ||	MPYU	.M1	A7,A2,A8
 | |
| 
 | |
| 	ADD	.L1	A16,A9,A9
 | |
| 	SPKERNEL	6,0
 | |
|  ||	STNW	.D2T2	B7,*B4++
 | |
|  ||	ADD	.L1	A8,A9,A9
 | |
| 
 | |
| L8:	AND	.S2X	2,A6,B5
 | |
| 	CMPGT	.L2	B5,0,B0
 | |
|   [!B0]	BNOP	.S1	L82,4
 | |
| 
 | |
| 	;; Manage half-word
 | |
| 	ZERO	.L1	A7
 | |
| ||	ZERO	.D1	A8
 | |
| 
 | |
| #ifdef CONFIG_CPU_BIG_ENDIAN
 | |
| 
 | |
| 	LDBU	.D1T1	*A4++,A7
 | |
| 	LDBU	.D1T1	*A4++,A8
 | |
| 	NOP		3
 | |
| 	SHL	.S1	A7,8,A0
 | |
| 	ADD	.S1	A8,A9,A9
 | |
| 	STB	.D2T1	A7,*B4++
 | |
| ||	ADD	.S1	A0,A9,A9
 | |
| 	STB	.D2T1	A8,*B4++
 | |
| 
 | |
| #else
 | |
| 
 | |
| 	LDBU	.D1T1	*A4++,A7
 | |
| 	LDBU	.D1T1	*A4++,A8
 | |
| 	NOP		3
 | |
| 	ADD	.S1	A7,A9,A9
 | |
| 	SHL	.S1	A8,8,A0
 | |
| 
 | |
| 	STB	.D2T1	A7,*B4++
 | |
| ||	ADD	.S1	A0,A9,A9
 | |
| 	STB	.D2T1	A8,*B4++
 | |
| 
 | |
| #endif
 | |
| 
 | |
| 	;; Manage eventually the last byte
 | |
| L82:	AND	.S2X	1,A6,B0
 | |
|   [!B0]	BNOP	.S1	L9,5
 | |
| 
 | |
| ||	ZERO	.L1	A7
 | |
| 
 | |
| L83:	LDBU	.D1T1	*A4++,A7
 | |
| 	NOP		4
 | |
| 
 | |
| 	MV	.L2X	A7,B7
 | |
| 
 | |
| #ifdef CONFIG_CPU_BIG_ENDIAN
 | |
| 
 | |
| 	STB	.D2T2	B7,*B4++
 | |
| ||	SHL	.S1	A7,8,A7
 | |
| 	ADD	.S1	A7,A9,A9
 | |
| 
 | |
| #else
 | |
| 
 | |
| 	STB	.D2T2	B7,*B4++
 | |
| ||	ADD	.S1	A7,A9,A9
 | |
| 
 | |
| #endif
 | |
| 
 | |
| 	;; Fold the csum
 | |
| L9:	SHRU	.S2X	A9,16,B0
 | |
|   [!B0]	BNOP	.S1	L10,5
 | |
| 
 | |
| L91:	SHRU	.S2X	A9,16,B4
 | |
| ||	EXTU	.S1	A9,16,16,A3
 | |
| 	ADD	.D1X	A3,B4,A9
 | |
| 
 | |
| 	SHRU	.S1	A9,16,A0
 | |
|    [A0]	BNOP	.S1	L91,5
 | |
| 
 | |
| L10:	ADD	.D1	A31,A9,A9
 | |
| 	MV	.D1	A9,A4
 | |
| 
 | |
| 	BNOP	.S2	B3,4
 | |
| 	MVC	.S2	B30,ILC
 | |
| ENDPROC(csum_partial_copy)
 | |
| 
 | |
| ;
 | |
| ;unsigned short
 | |
| ;ip_fast_csum(unsigned char *iph, unsigned int ihl)
 | |
| ;{
 | |
| ;	unsigned int checksum = 0;
 | |
| ;	unsigned short *tosum = (unsigned short *) iph;
 | |
| ;	int len;
 | |
| ;
 | |
| ;	len = ihl*4;
 | |
| ;
 | |
| ;	if (len <= 0)
 | |
| ;		return 0;
 | |
| ;
 | |
| ;	while(len) {
 | |
| ;		len -= 2;
 | |
| ;		checksum += *tosum++;
 | |
| ;	}
 | |
| ;	if (len & 1)
 | |
| ;		checksum += *(unsigned char*) tosum;
 | |
| ;
 | |
| ;	while(checksum >> 16)
 | |
| ;		checksum = (checksum & 0xffff) + (checksum >> 16);
 | |
| ;
 | |
| ;	return ~checksum;
 | |
| ;}
 | |
| ;
 | |
| ; A4:	iph
 | |
| ; B4:	ihl
 | |
| ; return checksum in A4
 | |
| ;
 | |
| 	.text
 | |
| 
 | |
| ENTRY(ip_fast_csum)
 | |
| 	ZERO	.D1	A5
 | |
|  ||	MVC	.S2	ILC,B30
 | |
| 	SHL	.S2	B4,2,B0
 | |
| 	CMPGT	.L2	B0,0,B1
 | |
|   [!B1] BNOP	.S1	L15,4
 | |
|   [!B1]	ZERO	.D1	A3
 | |
| 
 | |
|   [!B0]	B	.S1	L12
 | |
| 	SHRU	.S2	B0,1,B0
 | |
| 	MVC	.S2	B0,ILC
 | |
| 	NOP	3
 | |
| 
 | |
| 	SPLOOP	1
 | |
| 	LDHU	.D1T1	*A4++,A3
 | |
| 	NOP	3
 | |
| 	NOP
 | |
| 	SPKERNEL	5,0
 | |
|  ||	ADD	.L1	A3,A5,A5
 | |
| 
 | |
| L12:	SHRU	.S1	A5,16,A0
 | |
|   [!A0]	BNOP	.S1	L14,5
 | |
| 
 | |
| L13:	SHRU	.S2X	A5,16,B4
 | |
| 	EXTU	.S1	A5,16,16,A3
 | |
| 	ADD	.D1X	A3,B4,A5
 | |
| 	SHRU	.S1	A5,16,A0
 | |
|   [A0]	BNOP	.S1	L13,5
 | |
| 
 | |
| L14:	NOT	.D1	A5,A3
 | |
| 	EXTU	.S1	A3,16,16,A3
 | |
| 
 | |
| L15:	BNOP	.S2	B3,3
 | |
| 	MVC	.S2	B30,ILC
 | |
| 	MV	.D1	A3,A4
 | |
| ENDPROC(ip_fast_csum)
 | |
| 
 | |
| ;
 | |
| ;unsigned short
 | |
| ;do_csum(unsigned char *buff, unsigned int len)
 | |
| ;{
 | |
| ;	int odd, count;
 | |
| ;	unsigned int result = 0;
 | |
| ;
 | |
| ;	if (len <= 0)
 | |
| ;		goto out;
 | |
| ;	odd = 1 & (unsigned long) buff;
 | |
| ;	if (odd) {
 | |
| ;#ifdef __LITTLE_ENDIAN
 | |
| ;		result += (*buff << 8);
 | |
| ;#else
 | |
| ;		result = *buff;
 | |
| ;#endif
 | |
| ;		len--;
 | |
| ;		buff++;
 | |
| ;	}
 | |
| ;	count = len >> 1;		/* nr of 16-bit words.. */
 | |
| ;	if (count) {
 | |
| ;		if (2 & (unsigned long) buff) {
 | |
| ;			result += *(unsigned short *) buff;
 | |
| ;			count--;
 | |
| ;			len -= 2;
 | |
| ;			buff += 2;
 | |
| ;		}
 | |
| ;		count >>= 1;		/* nr of 32-bit words.. */
 | |
| ;		if (count) {
 | |
| ;			unsigned int carry = 0;
 | |
| ;			do {
 | |
| ;				unsigned int w = *(unsigned int *) buff;
 | |
| ;				count--;
 | |
| ;				buff += 4;
 | |
| ;				result += carry;
 | |
| ;				result += w;
 | |
| ;				carry = (w > result);
 | |
| ;			} while (count);
 | |
| ;			result += carry;
 | |
| ;			result = (result & 0xffff) + (result >> 16);
 | |
| ;		}
 | |
| ;		if (len & 2) {
 | |
| ;			result += *(unsigned short *) buff;
 | |
| ;			buff += 2;
 | |
| ;		}
 | |
| ;	}
 | |
| ;	if (len & 1)
 | |
| ;#ifdef __LITTLE_ENDIAN
 | |
| ;		result += *buff;
 | |
| ;#else
 | |
| ;		result += (*buff << 8);
 | |
| ;#endif
 | |
| ;	result = (result & 0xffff) + (result >> 16);
 | |
| ;	/* add up carry.. */
 | |
| ;	result = (result & 0xffff) + (result >> 16);
 | |
| ;	if (odd)
 | |
| ;		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
 | |
| ;out:
 | |
| ;	return result;
 | |
| ;}
 | |
| ;
 | |
| ; A4:	buff
 | |
| ; B4:	len
 | |
| ; return checksum in A4
 | |
| ;
 | |
| 
 | |
| ENTRY(do_csum)
 | |
| 	   CMPGT   .L2	   B4,0,B0
 | |
|    [!B0]   BNOP    .S1	   L26,3
 | |
| 	   EXTU    .S1	   A4,31,31,A0
 | |
| 
 | |
| 	   MV	   .L1	   A0,A3
 | |
| ||	   MV	   .S1X    B3,A5
 | |
| ||	   MV	   .L2	   B4,B3
 | |
| ||	   ZERO    .D1	   A1
 | |
| 
 | |
| #ifdef CONFIG_CPU_BIG_ENDIAN
 | |
|    [A0]    SUB	   .L2	   B3,1,B3
 | |
| || [A0]    LDBU    .D1T1   *A4++,A1
 | |
| #else
 | |
|    [!A0]   BNOP    .S1	   L21,5
 | |
| || [A0]    LDBU    .D1T1   *A4++,A0
 | |
| 	   SUB	   .L2	   B3,1,B3
 | |
| ||	   SHL	   .S1	   A0,8,A1
 | |
| L21:
 | |
| #endif
 | |
| 	   SHR	   .S2	   B3,1,B0
 | |
|    [!B0]   BNOP    .S1	   L24,3
 | |
| 	   MVK	   .L1	   2,A0
 | |
| 	   AND	   .L1	   A4,A0,A0
 | |
| 
 | |
|    [!A0]   BNOP    .S1	   L22,5
 | |
| || [A0]    LDHU    .D1T1   *A4++,A0
 | |
| 	   SUB	   .L2	   B0,1,B0
 | |
| ||	   SUB	   .S2	   B3,2,B3
 | |
| ||	   ADD	   .L1	   A0,A1,A1
 | |
| L22:
 | |
| 	   SHR	   .S2	   B0,1,B0
 | |
| ||	   ZERO    .L1	   A0
 | |
| 
 | |
|    [!B0]   BNOP    .S1	   L23,5
 | |
| || [B0]    MVC	   .S2	   B0,ILC
 | |
| 
 | |
| 	   SPLOOP  3
 | |
| 	   SPMASK  L1
 | |
| ||	   MV	   .L1	   A1,A2
 | |
| ||	   LDW	   .D1T1   *A4++,A1
 | |
| 
 | |
| 	   NOP	   4
 | |
| 	   ADD	   .L1	   A0,A1,A0
 | |
| 	   ADD	   .L1	   A2,A0,A2
 | |
| 
 | |
| 	   SPKERNEL 1,2
 | |
| ||	   CMPGTU  .L1	   A1,A2,A0
 | |
| 
 | |
| 	   ADD	   .L1	   A0,A2,A6
 | |
| 	   EXTU    .S1	   A6,16,16,A7
 | |
| 	   SHRU    .S2X    A6,16,B0
 | |
| 	   NOP		   1
 | |
| 	   ADD	   .L1X    A7,B0,A1
 | |
| L23:
 | |
| 	   MVK	   .L2	   2,B0
 | |
| 	   AND	   .L2	   B3,B0,B0
 | |
|    [B0]    LDHU    .D1T1   *A4++,A0
 | |
| 	   NOP	   4
 | |
|    [B0]    ADD	   .L1	   A0,A1,A1
 | |
| L24:
 | |
| 	   EXTU    .S2	   B3,31,31,B0
 | |
| #ifdef CONFIG_CPU_BIG_ENDIAN
 | |
|    [!B0]   BNOP    .S1	   L25,4
 | |
| || [B0]    LDBU    .D1T1   *A4,A0
 | |
| 	   SHL	   .S1	   A0,8,A0
 | |
| 	   ADD	   .L1	   A0,A1,A1
 | |
| L25:
 | |
| #else
 | |
|    [B0]    LDBU    .D1T1   *A4,A0
 | |
| 	   NOP	   4
 | |
|    [B0]    ADD	   .L1	   A0,A1,A1
 | |
| #endif
 | |
| 	   EXTU    .S1	   A1,16,16,A0
 | |
| 	   SHRU    .S2X    A1,16,B0
 | |
| 	   NOP	   1
 | |
| 	   ADD	   .L1X    A0,B0,A0
 | |
| 	   SHRU    .S1	   A0,16,A1
 | |
| 	   ADD	   .L1	   A0,A1,A0
 | |
| 	   EXTU    .S1	   A0,16,16,A1
 | |
| 	   EXTU    .S1	   A1,16,24,A2
 | |
| 
 | |
| 	   EXTU    .S1	   A1,24,16,A0
 | |
| ||	   MV	   .L2X    A3,B0
 | |
| 
 | |
|    [B0]    OR	   .L1	   A0,A2,A1
 | |
| L26:
 | |
| 	   NOP	   1
 | |
| 	   BNOP    .S2X    A5,4
 | |
| 	   MV	   .L1	   A1,A4
 | |
| ENDPROC(do_csum)
 | |
| 
 | |
| ;__wsum csum_partial(const void *buff, int len, __wsum wsum)
 | |
| ;{
 | |
| ;	unsigned int sum = (__force unsigned int)wsum;
 | |
| ;	unsigned int result = do_csum(buff, len);
 | |
| ;
 | |
| ;	/* add in old sum, and carry.. */
 | |
| ;	result += sum;
 | |
| ;	if (sum > result)
 | |
| ;		result += 1;
 | |
| ;	return (__force __wsum)result;
 | |
| ;}
 | |
| ;
 | |
| ENTRY(csum_partial)
 | |
| 	   MV	   .L1X    B3,A9
 | |
| ||	   CALLP   .S2	   do_csum,B3
 | |
| ||	   MV	   .S1	   A6,A8
 | |
| 	   BNOP    .S2X    A9,2
 | |
| 	   ADD	   .L1	   A8,A4,A1
 | |
| 	   CMPGTU  .L1	   A8,A1,A0
 | |
| 	   ADD	   .L1	   A1,A0,A4
 | |
| ENDPROC(csum_partial)
 | |
| 
 | |
| ;unsigned short
 | |
| ;ip_compute_csum(unsigned char *buff, unsigned int len)
 | |
| ;
 | |
| ; A4:	buff
 | |
| ; B4:	len
 | |
| ; return checksum in A4
 | |
| 
 | |
| ENTRY(ip_compute_csum)
 | |
| 	   MV	   .L1X    B3,A9
 | |
| ||	   CALLP   .S2	   do_csum,B3
 | |
| 	   BNOP    .S2X    A9,3
 | |
| 	   NOT	   .S1	   A4,A4
 | |
| 	   CLR     .S1	   A4,16,31,A4
 | |
| ENDPROC(ip_compute_csum)
 | 
