827 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			827 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| /*
 | |
|  * AES-NI + SSE2 implementation of AEGIS-128L
 | |
|  *
 | |
|  * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
 | |
|  * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or modify it
 | |
|  * under the terms of the GNU General Public License version 2 as published
 | |
|  * by the Free Software Foundation.
 | |
|  */
 | |
| 
 | |
| #include <linux/linkage.h>
 | |
| #include <asm/frame.h>
 | |
| 
 | |
| #define STATE0	%xmm0
 | |
| #define STATE1	%xmm1
 | |
| #define STATE2	%xmm2
 | |
| #define STATE3	%xmm3
 | |
| #define STATE4	%xmm4
 | |
| #define STATE5	%xmm5
 | |
| #define STATE6	%xmm6
 | |
| #define STATE7	%xmm7
 | |
| #define MSG0	%xmm8
 | |
| #define MSG1	%xmm9
 | |
| #define T0	%xmm10
 | |
| #define T1	%xmm11
 | |
| #define T2	%xmm12
 | |
| #define T3	%xmm13
 | |
| 
 | |
| #define STATEP	%rdi
 | |
| #define LEN	%rsi
 | |
| #define SRC	%rdx
 | |
| #define DST	%rcx
 | |
| 
 | |
| .section .rodata.cst16.aegis128l_const, "aM", @progbits, 32
 | |
| .align 16
 | |
| .Laegis128l_const_0:
 | |
| 	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
 | |
| 	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
 | |
| .Laegis128l_const_1:
 | |
| 	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
 | |
| 	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
 | |
| 
 | |
| .section .rodata.cst16.aegis128l_counter, "aM", @progbits, 16
 | |
| .align 16
 | |
| .Laegis128l_counter0:
 | |
| 	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
 | |
| 	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
 | |
| .Laegis128l_counter1:
 | |
| 	.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
 | |
| 	.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
 | |
| 
 | |
| .text
 | |
| 
 | |
| /*
 | |
|  * __load_partial: internal ABI
 | |
|  * input:
 | |
|  *   LEN - bytes
 | |
|  *   SRC - src
 | |
|  * output:
 | |
|  *   MSG0 - first message block
 | |
|  *   MSG1 - second message block
 | |
|  * changed:
 | |
|  *   T0
 | |
|  *   %r8
 | |
|  *   %r9
 | |
|  */
 | |
| __load_partial:
 | |
| 	xor %r9d, %r9d
 | |
| 	pxor MSG0, MSG0
 | |
| 	pxor MSG1, MSG1
 | |
| 
 | |
| 	mov LEN, %r8
 | |
| 	and $0x1, %r8
 | |
| 	jz .Lld_partial_1
 | |
| 
 | |
| 	mov LEN, %r8
 | |
| 	and $0x1E, %r8
 | |
| 	add SRC, %r8
 | |
| 	mov (%r8), %r9b
 | |
| 
 | |
| .Lld_partial_1:
 | |
| 	mov LEN, %r8
 | |
| 	and $0x2, %r8
 | |
| 	jz .Lld_partial_2
 | |
| 
 | |
| 	mov LEN, %r8
 | |
| 	and $0x1C, %r8
 | |
| 	add SRC, %r8
 | |
| 	shl $0x10, %r9
 | |
| 	mov (%r8), %r9w
 | |
| 
 | |
| .Lld_partial_2:
 | |
| 	mov LEN, %r8
 | |
| 	and $0x4, %r8
 | |
| 	jz .Lld_partial_4
 | |
| 
 | |
| 	mov LEN, %r8
 | |
| 	and $0x18, %r8
 | |
| 	add SRC, %r8
 | |
| 	shl $32, %r9
 | |
| 	mov (%r8), %r8d
 | |
| 	xor %r8, %r9
 | |
| 
 | |
| .Lld_partial_4:
 | |
| 	movq %r9, MSG0
 | |
| 
 | |
| 	mov LEN, %r8
 | |
| 	and $0x8, %r8
 | |
| 	jz .Lld_partial_8
 | |
| 
 | |
| 	mov LEN, %r8
 | |
| 	and $0x10, %r8
 | |
| 	add SRC, %r8
 | |
| 	pslldq $8, MSG0
 | |
| 	movq (%r8), T0
 | |
| 	pxor T0, MSG0
 | |
| 
 | |
| .Lld_partial_8:
 | |
| 	mov LEN, %r8
 | |
| 	and $0x10, %r8
 | |
| 	jz .Lld_partial_16
 | |
| 
 | |
| 	movdqa MSG0, MSG1
 | |
| 	movdqu (SRC), MSG0
 | |
| 
 | |
| .Lld_partial_16:
 | |
| 	ret
 | |
| ENDPROC(__load_partial)
 | |
| 
 | |
| /*
 | |
|  * __store_partial: internal ABI
 | |
|  * input:
 | |
|  *   LEN - bytes
 | |
|  *   DST - dst
 | |
|  * output:
 | |
|  *   T0   - first message block
 | |
|  *   T1   - second message block
 | |
|  * changed:
 | |
|  *   %r8
 | |
|  *   %r9
 | |
|  *   %r10
 | |
|  */
 | |
| __store_partial:
 | |
| 	mov LEN, %r8
 | |
| 	mov DST, %r9
 | |
| 
 | |
| 	cmp $16, %r8
 | |
| 	jl .Lst_partial_16
 | |
| 
 | |
| 	movdqu T0, (%r9)
 | |
| 	movdqa T1, T0
 | |
| 
 | |
| 	sub $16, %r8
 | |
| 	add $16, %r9
 | |
| 
 | |
| .Lst_partial_16:
 | |
| 	movq T0, %r10
 | |
| 
 | |
| 	cmp $8, %r8
 | |
| 	jl .Lst_partial_8
 | |
| 
 | |
| 	mov %r10, (%r9)
 | |
| 	psrldq $8, T0
 | |
| 	movq T0, %r10
 | |
| 
 | |
| 	sub $8, %r8
 | |
| 	add $8, %r9
 | |
| 
 | |
| .Lst_partial_8:
 | |
| 	cmp $4, %r8
 | |
| 	jl .Lst_partial_4
 | |
| 
 | |
| 	mov %r10d, (%r9)
 | |
| 	shr $32, %r10
 | |
| 
 | |
| 	sub $4, %r8
 | |
| 	add $4, %r9
 | |
| 
 | |
| .Lst_partial_4:
 | |
| 	cmp $2, %r8
 | |
| 	jl .Lst_partial_2
 | |
| 
 | |
| 	mov %r10w, (%r9)
 | |
| 	shr $0x10, %r10
 | |
| 
 | |
| 	sub $2, %r8
 | |
| 	add $2, %r9
 | |
| 
 | |
| .Lst_partial_2:
 | |
| 	cmp $1, %r8
 | |
| 	jl .Lst_partial_1
 | |
| 
 | |
| 	mov %r10b, (%r9)
 | |
| 
 | |
| .Lst_partial_1:
 | |
| 	ret
 | |
| ENDPROC(__store_partial)
 | |
| 
 | |
| .macro update
 | |
| 	movdqa STATE7, T0
 | |
| 	aesenc STATE0, STATE7
 | |
| 	aesenc STATE1, STATE0
 | |
| 	aesenc STATE2, STATE1
 | |
| 	aesenc STATE3, STATE2
 | |
| 	aesenc STATE4, STATE3
 | |
| 	aesenc STATE5, STATE4
 | |
| 	aesenc STATE6, STATE5
 | |
| 	aesenc T0,     STATE6
 | |
| .endm
 | |
| 
 | |
| .macro update0
 | |
| 	update
 | |
| 	pxor MSG0, STATE7
 | |
| 	pxor MSG1, STATE3
 | |
| .endm
 | |
| 
 | |
| .macro update1
 | |
| 	update
 | |
| 	pxor MSG0, STATE6
 | |
| 	pxor MSG1, STATE2
 | |
| .endm
 | |
| 
 | |
| .macro update2
 | |
| 	update
 | |
| 	pxor MSG0, STATE5
 | |
| 	pxor MSG1, STATE1
 | |
| .endm
 | |
| 
 | |
| .macro update3
 | |
| 	update
 | |
| 	pxor MSG0, STATE4
 | |
| 	pxor MSG1, STATE0
 | |
| .endm
 | |
| 
 | |
| .macro update4
 | |
| 	update
 | |
| 	pxor MSG0, STATE3
 | |
| 	pxor MSG1, STATE7
 | |
| .endm
 | |
| 
 | |
| .macro update5
 | |
| 	update
 | |
| 	pxor MSG0, STATE2
 | |
| 	pxor MSG1, STATE6
 | |
| .endm
 | |
| 
 | |
| .macro update6
 | |
| 	update
 | |
| 	pxor MSG0, STATE1
 | |
| 	pxor MSG1, STATE5
 | |
| .endm
 | |
| 
 | |
| .macro update7
 | |
| 	update
 | |
| 	pxor MSG0, STATE0
 | |
| 	pxor MSG1, STATE4
 | |
| .endm
 | |
| 
 | |
| .macro state_load
 | |
| 	movdqu 0x00(STATEP), STATE0
 | |
| 	movdqu 0x10(STATEP), STATE1
 | |
| 	movdqu 0x20(STATEP), STATE2
 | |
| 	movdqu 0x30(STATEP), STATE3
 | |
| 	movdqu 0x40(STATEP), STATE4
 | |
| 	movdqu 0x50(STATEP), STATE5
 | |
| 	movdqu 0x60(STATEP), STATE6
 | |
| 	movdqu 0x70(STATEP), STATE7
 | |
| .endm
 | |
| 
 | |
| .macro state_store s0 s1 s2 s3 s4 s5 s6 s7
 | |
| 	movdqu \s7, 0x00(STATEP)
 | |
| 	movdqu \s0, 0x10(STATEP)
 | |
| 	movdqu \s1, 0x20(STATEP)
 | |
| 	movdqu \s2, 0x30(STATEP)
 | |
| 	movdqu \s3, 0x40(STATEP)
 | |
| 	movdqu \s4, 0x50(STATEP)
 | |
| 	movdqu \s5, 0x60(STATEP)
 | |
| 	movdqu \s6, 0x70(STATEP)
 | |
| .endm
 | |
| 
 | |
| .macro state_store0
 | |
| 	state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
 | |
| .endm
 | |
| 
 | |
| .macro state_store1
 | |
| 	state_store STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
 | |
| .endm
 | |
| 
 | |
| .macro state_store2
 | |
| 	state_store STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
 | |
| .endm
 | |
| 
 | |
| .macro state_store3
 | |
| 	state_store STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
 | |
| .endm
 | |
| 
 | |
| .macro state_store4
 | |
| 	state_store STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
 | |
| .endm
 | |
| 
 | |
| .macro state_store5
 | |
| 	state_store STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
 | |
| .endm
 | |
| 
 | |
| .macro state_store6
 | |
| 	state_store STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
 | |
| .endm
 | |
| 
 | |
| .macro state_store7
 | |
| 	state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
 | |
| .endm
 | |
| 
 | |
| /*
 | |
|  * void crypto_aegis128l_aesni_init(void *state, const void *key, const void *iv);
 | |
|  */
 | |
| ENTRY(crypto_aegis128l_aesni_init)
 | |
| 	FRAME_BEGIN
 | |
| 
 | |
| 	/* load key: */
 | |
| 	movdqa (%rsi), MSG1
 | |
| 	movdqa MSG1, STATE0
 | |
| 	movdqa MSG1, STATE4
 | |
| 	movdqa MSG1, STATE5
 | |
| 	movdqa MSG1, STATE6
 | |
| 	movdqa MSG1, STATE7
 | |
| 
 | |
| 	/* load IV: */
 | |
| 	movdqu (%rdx), MSG0
 | |
| 	pxor MSG0, STATE0
 | |
| 	pxor MSG0, STATE4
 | |
| 
 | |
| 	/* load the constants: */
 | |
| 	movdqa .Laegis128l_const_0, STATE2
 | |
| 	movdqa .Laegis128l_const_1, STATE1
 | |
| 	movdqa STATE1, STATE3
 | |
| 	pxor STATE2, STATE5
 | |
| 	pxor STATE1, STATE6
 | |
| 	pxor STATE2, STATE7
 | |
| 
 | |
| 	/* update 10 times with IV and KEY: */
 | |
| 	update0
 | |
| 	update1
 | |
| 	update2
 | |
| 	update3
 | |
| 	update4
 | |
| 	update5
 | |
| 	update6
 | |
| 	update7
 | |
| 	update0
 | |
| 	update1
 | |
| 
 | |
| 	state_store1
 | |
| 
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| ENDPROC(crypto_aegis128l_aesni_init)
 | |
| 
 | |
| .macro ad_block a i
 | |
| 	movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
 | |
| 	movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
 | |
| 	update\i
 | |
| 	sub $0x20, LEN
 | |
| 	cmp $0x20, LEN
 | |
| 	jl .Lad_out_\i
 | |
| .endm
 | |
| 
 | |
| /*
 | |
|  * void crypto_aegis128l_aesni_ad(void *state, unsigned int length,
 | |
|  *                                const void *data);
 | |
|  */
 | |
| ENTRY(crypto_aegis128l_aesni_ad)
 | |
| 	FRAME_BEGIN
 | |
| 
 | |
| 	cmp $0x20, LEN
 | |
| 	jb .Lad_out
 | |
| 
 | |
| 	state_load
 | |
| 
 | |
| 	mov  SRC, %r8
 | |
| 	and $0xf, %r8
 | |
| 	jnz .Lad_u_loop
 | |
| 
 | |
| .align 8
 | |
| .Lad_a_loop:
 | |
| 	ad_block a 0
 | |
| 	ad_block a 1
 | |
| 	ad_block a 2
 | |
| 	ad_block a 3
 | |
| 	ad_block a 4
 | |
| 	ad_block a 5
 | |
| 	ad_block a 6
 | |
| 	ad_block a 7
 | |
| 
 | |
| 	add $0x100, SRC
 | |
| 	jmp .Lad_a_loop
 | |
| 
 | |
| .align 8
 | |
| .Lad_u_loop:
 | |
| 	ad_block u 0
 | |
| 	ad_block u 1
 | |
| 	ad_block u 2
 | |
| 	ad_block u 3
 | |
| 	ad_block u 4
 | |
| 	ad_block u 5
 | |
| 	ad_block u 6
 | |
| 	ad_block u 7
 | |
| 
 | |
| 	add $0x100, SRC
 | |
| 	jmp .Lad_u_loop
 | |
| 
 | |
| .Lad_out_0:
 | |
| 	state_store0
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Lad_out_1:
 | |
| 	state_store1
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Lad_out_2:
 | |
| 	state_store2
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Lad_out_3:
 | |
| 	state_store3
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Lad_out_4:
 | |
| 	state_store4
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Lad_out_5:
 | |
| 	state_store5
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Lad_out_6:
 | |
| 	state_store6
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Lad_out_7:
 | |
| 	state_store7
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Lad_out:
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| ENDPROC(crypto_aegis128l_aesni_ad)
 | |
| 
 | |
| .macro crypt m0 m1 s0 s1 s2 s3 s4 s5 s6 s7
 | |
| 	pxor \s1, \m0
 | |
| 	pxor \s6, \m0
 | |
| 	movdqa \s2, T3
 | |
| 	pand \s3, T3
 | |
| 	pxor T3, \m0
 | |
| 
 | |
| 	pxor \s2, \m1
 | |
| 	pxor \s5, \m1
 | |
| 	movdqa \s6, T3
 | |
| 	pand \s7, T3
 | |
| 	pxor T3, \m1
 | |
| .endm
 | |
| 
 | |
| .macro crypt0 m0 m1
 | |
| 	crypt \m0 \m1 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
 | |
| .endm
 | |
| 
 | |
| .macro crypt1 m0 m1
 | |
| 	crypt \m0 \m1 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
 | |
| .endm
 | |
| 
 | |
| .macro crypt2 m0 m1
 | |
| 	crypt \m0 \m1 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
 | |
| .endm
 | |
| 
 | |
| .macro crypt3 m0 m1
 | |
| 	crypt \m0 \m1 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
 | |
| .endm
 | |
| 
 | |
| .macro crypt4 m0 m1
 | |
| 	crypt \m0 \m1 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
 | |
| .endm
 | |
| 
 | |
| .macro crypt5 m0 m1
 | |
| 	crypt \m0 \m1 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
 | |
| .endm
 | |
| 
 | |
| .macro crypt6 m0 m1
 | |
| 	crypt \m0 \m1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
 | |
| .endm
 | |
| 
 | |
| .macro crypt7 m0 m1
 | |
| 	crypt \m0 \m1 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
 | |
| .endm
 | |
| 
 | |
| .macro encrypt_block a i
 | |
| 	movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
 | |
| 	movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
 | |
| 	movdqa MSG0, T0
 | |
| 	movdqa MSG1, T1
 | |
| 	crypt\i T0, T1
 | |
| 	movdq\a T0, (\i * 0x20 + 0x00)(DST)
 | |
| 	movdq\a T1, (\i * 0x20 + 0x10)(DST)
 | |
| 
 | |
| 	update\i
 | |
| 
 | |
| 	sub $0x20, LEN
 | |
| 	cmp $0x20, LEN
 | |
| 	jl .Lenc_out_\i
 | |
| .endm
 | |
| 
 | |
| .macro decrypt_block a i
 | |
| 	movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
 | |
| 	movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
 | |
| 	crypt\i MSG0, MSG1
 | |
| 	movdq\a MSG0, (\i * 0x20 + 0x00)(DST)
 | |
| 	movdq\a MSG1, (\i * 0x20 + 0x10)(DST)
 | |
| 
 | |
| 	update\i
 | |
| 
 | |
| 	sub $0x20, LEN
 | |
| 	cmp $0x20, LEN
 | |
| 	jl .Ldec_out_\i
 | |
| .endm
 | |
| 
 | |
| /*
 | |
|  * void crypto_aegis128l_aesni_enc(void *state, unsigned int length,
 | |
|  *                                 const void *src, void *dst);
 | |
|  */
 | |
| ENTRY(crypto_aegis128l_aesni_enc)
 | |
| 	FRAME_BEGIN
 | |
| 
 | |
| 	cmp $0x20, LEN
 | |
| 	jb .Lenc_out
 | |
| 
 | |
| 	state_load
 | |
| 
 | |
| 	mov  SRC, %r8
 | |
| 	or   DST, %r8
 | |
| 	and $0xf, %r8
 | |
| 	jnz .Lenc_u_loop
 | |
| 
 | |
| .align 8
 | |
| .Lenc_a_loop:
 | |
| 	encrypt_block a 0
 | |
| 	encrypt_block a 1
 | |
| 	encrypt_block a 2
 | |
| 	encrypt_block a 3
 | |
| 	encrypt_block a 4
 | |
| 	encrypt_block a 5
 | |
| 	encrypt_block a 6
 | |
| 	encrypt_block a 7
 | |
| 
 | |
| 	add $0x100, SRC
 | |
| 	add $0x100, DST
 | |
| 	jmp .Lenc_a_loop
 | |
| 
 | |
| .align 8
 | |
| .Lenc_u_loop:
 | |
| 	encrypt_block u 0
 | |
| 	encrypt_block u 1
 | |
| 	encrypt_block u 2
 | |
| 	encrypt_block u 3
 | |
| 	encrypt_block u 4
 | |
| 	encrypt_block u 5
 | |
| 	encrypt_block u 6
 | |
| 	encrypt_block u 7
 | |
| 
 | |
| 	add $0x100, SRC
 | |
| 	add $0x100, DST
 | |
| 	jmp .Lenc_u_loop
 | |
| 
 | |
| .Lenc_out_0:
 | |
| 	state_store0
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Lenc_out_1:
 | |
| 	state_store1
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Lenc_out_2:
 | |
| 	state_store2
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Lenc_out_3:
 | |
| 	state_store3
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Lenc_out_4:
 | |
| 	state_store4
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Lenc_out_5:
 | |
| 	state_store5
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Lenc_out_6:
 | |
| 	state_store6
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Lenc_out_7:
 | |
| 	state_store7
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Lenc_out:
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| ENDPROC(crypto_aegis128l_aesni_enc)
 | |
| 
 | |
| /*
 | |
|  * void crypto_aegis128l_aesni_enc_tail(void *state, unsigned int length,
 | |
|  *                                      const void *src, void *dst);
 | |
|  */
 | |
| ENTRY(crypto_aegis128l_aesni_enc_tail)
 | |
| 	FRAME_BEGIN
 | |
| 
 | |
| 	state_load
 | |
| 
 | |
| 	/* encrypt message: */
 | |
| 	call __load_partial
 | |
| 
 | |
| 	movdqa MSG0, T0
 | |
| 	movdqa MSG1, T1
 | |
| 	crypt0 T0, T1
 | |
| 
 | |
| 	call __store_partial
 | |
| 
 | |
| 	update0
 | |
| 
 | |
| 	state_store0
 | |
| 
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| ENDPROC(crypto_aegis128l_aesni_enc_tail)
 | |
| 
 | |
| /*
 | |
|  * void crypto_aegis128l_aesni_dec(void *state, unsigned int length,
 | |
|  *                                 const void *src, void *dst);
 | |
|  */
 | |
| ENTRY(crypto_aegis128l_aesni_dec)
 | |
| 	FRAME_BEGIN
 | |
| 
 | |
| 	cmp $0x20, LEN
 | |
| 	jb .Ldec_out
 | |
| 
 | |
| 	state_load
 | |
| 
 | |
| 	mov  SRC, %r8
 | |
| 	or   DST, %r8
 | |
| 	and $0xF, %r8
 | |
| 	jnz .Ldec_u_loop
 | |
| 
 | |
| .align 8
 | |
| .Ldec_a_loop:
 | |
| 	decrypt_block a 0
 | |
| 	decrypt_block a 1
 | |
| 	decrypt_block a 2
 | |
| 	decrypt_block a 3
 | |
| 	decrypt_block a 4
 | |
| 	decrypt_block a 5
 | |
| 	decrypt_block a 6
 | |
| 	decrypt_block a 7
 | |
| 
 | |
| 	add $0x100, SRC
 | |
| 	add $0x100, DST
 | |
| 	jmp .Ldec_a_loop
 | |
| 
 | |
| .align 8
 | |
| .Ldec_u_loop:
 | |
| 	decrypt_block u 0
 | |
| 	decrypt_block u 1
 | |
| 	decrypt_block u 2
 | |
| 	decrypt_block u 3
 | |
| 	decrypt_block u 4
 | |
| 	decrypt_block u 5
 | |
| 	decrypt_block u 6
 | |
| 	decrypt_block u 7
 | |
| 
 | |
| 	add $0x100, SRC
 | |
| 	add $0x100, DST
 | |
| 	jmp .Ldec_u_loop
 | |
| 
 | |
| .Ldec_out_0:
 | |
| 	state_store0
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Ldec_out_1:
 | |
| 	state_store1
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Ldec_out_2:
 | |
| 	state_store2
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Ldec_out_3:
 | |
| 	state_store3
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Ldec_out_4:
 | |
| 	state_store4
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Ldec_out_5:
 | |
| 	state_store5
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Ldec_out_6:
 | |
| 	state_store6
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Ldec_out_7:
 | |
| 	state_store7
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| 
 | |
| .Ldec_out:
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| ENDPROC(crypto_aegis128l_aesni_dec)
 | |
| 
 | |
| /*
 | |
|  * void crypto_aegis128l_aesni_dec_tail(void *state, unsigned int length,
 | |
|  *                                      const void *src, void *dst);
 | |
|  */
 | |
| ENTRY(crypto_aegis128l_aesni_dec_tail)
 | |
| 	FRAME_BEGIN
 | |
| 
 | |
| 	state_load
 | |
| 
 | |
| 	/* decrypt message: */
 | |
| 	call __load_partial
 | |
| 
 | |
| 	crypt0 MSG0, MSG1
 | |
| 
 | |
| 	movdqa MSG0, T0
 | |
| 	movdqa MSG1, T1
 | |
| 	call __store_partial
 | |
| 
 | |
| 	/* mask with byte count: */
 | |
| 	movq LEN, T0
 | |
| 	punpcklbw T0, T0
 | |
| 	punpcklbw T0, T0
 | |
| 	punpcklbw T0, T0
 | |
| 	punpcklbw T0, T0
 | |
| 	movdqa T0, T1
 | |
| 	movdqa .Laegis128l_counter0, T2
 | |
| 	movdqa .Laegis128l_counter1, T3
 | |
| 	pcmpgtb T2, T0
 | |
| 	pcmpgtb T3, T1
 | |
| 	pand T0, MSG0
 | |
| 	pand T1, MSG1
 | |
| 
 | |
| 	update0
 | |
| 
 | |
| 	state_store0
 | |
| 
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| ENDPROC(crypto_aegis128l_aesni_dec_tail)
 | |
| 
 | |
| /*
 | |
|  * void crypto_aegis128l_aesni_final(void *state, void *tag_xor,
 | |
|  *                                   u64 assoclen, u64 cryptlen);
 | |
|  */
 | |
| ENTRY(crypto_aegis128l_aesni_final)
 | |
| 	FRAME_BEGIN
 | |
| 
 | |
| 	state_load
 | |
| 
 | |
| 	/* prepare length block: */
 | |
| 	movq %rdx, MSG0
 | |
| 	movq %rcx, T0
 | |
| 	pslldq $8, T0
 | |
| 	pxor T0, MSG0
 | |
| 	psllq $3, MSG0 /* multiply by 8 (to get bit count) */
 | |
| 
 | |
| 	pxor STATE2, MSG0
 | |
| 	movdqa MSG0, MSG1
 | |
| 
 | |
| 	/* update state: */
 | |
| 	update0
 | |
| 	update1
 | |
| 	update2
 | |
| 	update3
 | |
| 	update4
 | |
| 	update5
 | |
| 	update6
 | |
| 
 | |
| 	/* xor tag: */
 | |
| 	movdqu (%rsi), T0
 | |
| 
 | |
| 	pxor STATE1, T0
 | |
| 	pxor STATE2, T0
 | |
| 	pxor STATE3, T0
 | |
| 	pxor STATE4, T0
 | |
| 	pxor STATE5, T0
 | |
| 	pxor STATE6, T0
 | |
| 	pxor STATE7, T0
 | |
| 
 | |
| 	movdqu T0, (%rsi)
 | |
| 
 | |
| 	FRAME_END
 | |
| 	ret
 | |
| ENDPROC(crypto_aegis128l_aesni_final)
 | 
