142 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			142 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| /*
 | |
|  * sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions
 | |
|  *
 | |
|  * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or modify
 | |
|  * it under the terms of the GNU General Public License version 2 as
 | |
|  * published by the Free Software Foundation.
 | |
|  */
 | |
| 
 | |
| #include <linux/linkage.h>
 | |
| #include <asm/assembler.h>
 | |
| 
 | |
| 	.irp		b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
 | |
| 	.set		.Lv\b\().4s, \b
 | |
| 	.endr
 | |
| 
 | |
| 	.macro		sm3partw1, rd, rn, rm
 | |
| 	.inst		0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
 | |
| 	.endm
 | |
| 
 | |
| 	.macro		sm3partw2, rd, rn, rm
 | |
| 	.inst		0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
 | |
| 	.endm
 | |
| 
 | |
| 	.macro		sm3ss1, rd, rn, rm, ra
 | |
| 	.inst		0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
 | |
| 	.endm
 | |
| 
 | |
| 	.macro		sm3tt1a, rd, rn, rm, imm2
 | |
| 	.inst		0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
 | |
| 	.endm
 | |
| 
 | |
| 	.macro		sm3tt1b, rd, rn, rm, imm2
 | |
| 	.inst		0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
 | |
| 	.endm
 | |
| 
 | |
| 	.macro		sm3tt2a, rd, rn, rm, imm2
 | |
| 	.inst		0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
 | |
| 	.endm
 | |
| 
 | |
| 	.macro		sm3tt2b, rd, rn, rm, imm2
 | |
| 	.inst		0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
 | |
| 	.endm
 | |
| 
 | |
| 	.macro		round, ab, s0, t0, t1, i
 | |
| 	sm3ss1		v5.4s, v8.4s, \t0\().4s, v9.4s
 | |
| 	shl		\t1\().4s, \t0\().4s, #1
 | |
| 	sri		\t1\().4s, \t0\().4s, #31
 | |
| 	sm3tt1\ab	v8.4s, v5.4s, v10.4s, \i
 | |
| 	sm3tt2\ab	v9.4s, v5.4s, \s0\().4s, \i
 | |
| 	.endm
 | |
| 
 | |
| 	.macro		qround, ab, s0, s1, s2, s3, s4
 | |
| 	.ifnb		\s4
 | |
| 	ext		\s4\().16b, \s1\().16b, \s2\().16b, #12
 | |
| 	ext		v6.16b, \s0\().16b, \s1\().16b, #12
 | |
| 	ext		v7.16b, \s2\().16b, \s3\().16b, #8
 | |
| 	sm3partw1	\s4\().4s, \s0\().4s, \s3\().4s
 | |
| 	.endif
 | |
| 
 | |
| 	eor		v10.16b, \s0\().16b, \s1\().16b
 | |
| 
 | |
| 	round		\ab, \s0, v11, v12, 0
 | |
| 	round		\ab, \s0, v12, v11, 1
 | |
| 	round		\ab, \s0, v11, v12, 2
 | |
| 	round		\ab, \s0, v12, v11, 3
 | |
| 
 | |
| 	.ifnb		\s4
 | |
| 	sm3partw2	\s4\().4s, v7.4s, v6.4s
 | |
| 	.endif
 | |
| 	.endm
 | |
| 
 | |
| 	/*
 | |
| 	 * void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
 | |
| 	 *                       int blocks)
 | |
| 	 */
 | |
| 	.text
 | |
| ENTRY(sm3_ce_transform)
 | |
| 	/* load state */
 | |
| 	ld1		{v8.4s-v9.4s}, [x0]
 | |
| 	rev64		v8.4s, v8.4s
 | |
| 	rev64		v9.4s, v9.4s
 | |
| 	ext		v8.16b, v8.16b, v8.16b, #8
 | |
| 	ext		v9.16b, v9.16b, v9.16b, #8
 | |
| 
 | |
| 	adr_l		x8, .Lt
 | |
| 	ldp		s13, s14, [x8]
 | |
| 
 | |
| 	/* load input */
 | |
| 0:	ld1		{v0.16b-v3.16b}, [x1], #64
 | |
| 	sub		w2, w2, #1
 | |
| 
 | |
| 	mov		v15.16b, v8.16b
 | |
| 	mov		v16.16b, v9.16b
 | |
| 
 | |
| CPU_LE(	rev32		v0.16b, v0.16b		)
 | |
| CPU_LE(	rev32		v1.16b, v1.16b		)
 | |
| CPU_LE(	rev32		v2.16b, v2.16b		)
 | |
| CPU_LE(	rev32		v3.16b, v3.16b		)
 | |
| 
 | |
| 	ext		v11.16b, v13.16b, v13.16b, #4
 | |
| 
 | |
| 	qround		a, v0, v1, v2, v3, v4
 | |
| 	qround		a, v1, v2, v3, v4, v0
 | |
| 	qround		a, v2, v3, v4, v0, v1
 | |
| 	qround		a, v3, v4, v0, v1, v2
 | |
| 
 | |
| 	ext		v11.16b, v14.16b, v14.16b, #4
 | |
| 
 | |
| 	qround		b, v4, v0, v1, v2, v3
 | |
| 	qround		b, v0, v1, v2, v3, v4
 | |
| 	qround		b, v1, v2, v3, v4, v0
 | |
| 	qround		b, v2, v3, v4, v0, v1
 | |
| 	qround		b, v3, v4, v0, v1, v2
 | |
| 	qround		b, v4, v0, v1, v2, v3
 | |
| 	qround		b, v0, v1, v2, v3, v4
 | |
| 	qround		b, v1, v2, v3, v4, v0
 | |
| 	qround		b, v2, v3, v4, v0, v1
 | |
| 	qround		b, v3, v4
 | |
| 	qround		b, v4, v0
 | |
| 	qround		b, v0, v1
 | |
| 
 | |
| 	eor		v8.16b, v8.16b, v15.16b
 | |
| 	eor		v9.16b, v9.16b, v16.16b
 | |
| 
 | |
| 	/* handled all input blocks? */
 | |
| 	cbnz		w2, 0b
 | |
| 
 | |
| 	/* save state */
 | |
| 	rev64		v8.4s, v8.4s
 | |
| 	rev64		v9.4s, v9.4s
 | |
| 	ext		v8.16b, v8.16b, v8.16b, #8
 | |
| 	ext		v9.16b, v9.16b, v9.16b, #8
 | |
| 	st1		{v8.4s-v9.4s}, [x0]
 | |
| 	ret
 | |
| ENDPROC(sm3_ce_transform)
 | |
| 
 | |
| 	.section	".rodata", "a"
 | |
| 	.align		3
 | |
| .Lt:	.word		0x79cc4519, 0x9d8a7a87
 | 
