631 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			631 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| /*
 | |
|  * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
 | |
|  *
 | |
|  * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or modify it
 | |
|  * under the terms of the GNU General Public License as published by the Free
 | |
|  * Software Foundation; either version 2 of the License, or (at your option)
 | |
|  * any later version.
 | |
|  *
 | |
|  */
 | |
| 
 | |
| #include <asm/ppc_asm.h>
 | |
| #include "aes-spe-regs.h"
 | |
| 
 | |
| #ifdef __BIG_ENDIAN__			/* Macros for big endian builds	*/
 | |
| 
 | |
| #define LOAD_DATA(reg, off) \
 | |
| 	lwz		reg,off(rSP);	/* load with offset		*/
 | |
| #define SAVE_DATA(reg, off) \
 | |
| 	stw		reg,off(rDP);	/* save with offset		*/
 | |
| #define NEXT_BLOCK \
 | |
| 	addi		rSP,rSP,16;	/* increment pointers per bloc	*/ \
 | |
| 	addi		rDP,rDP,16;
 | |
| #define LOAD_IV(reg, off) \
 | |
| 	lwz		reg,off(rIP);	/* IV loading with offset	*/
 | |
| #define SAVE_IV(reg, off) \
 | |
| 	stw		reg,off(rIP);	/* IV saving with offset	*/
 | |
| #define START_IV			/* nothing to reset		*/
 | |
| #define CBC_DEC 16			/* CBC decrement per block	*/
 | |
| #define CTR_DEC 1			/* CTR decrement one byte	*/
 | |
| 
 | |
| #else					/* Macros for little endian	*/
 | |
| 
 | |
| #define LOAD_DATA(reg, off) \
 | |
| 	lwbrx		reg,0,rSP;	/* load reversed		*/ \
 | |
| 	addi		rSP,rSP,4;	/* and increment pointer	*/
 | |
| #define SAVE_DATA(reg, off) \
 | |
| 	stwbrx		reg,0,rDP;	/* save reversed		*/ \
 | |
| 	addi		rDP,rDP,4;	/* and increment pointer	*/
 | |
| #define NEXT_BLOCK			/* nothing todo			*/
 | |
| #define LOAD_IV(reg, off) \
 | |
| 	lwbrx		reg,0,rIP;	/* load reversed		*/ \
 | |
| 	addi		rIP,rIP,4;	/* and increment pointer	*/
 | |
| #define SAVE_IV(reg, off) \
 | |
| 	stwbrx		reg,0,rIP;	/* load reversed		*/ \
 | |
| 	addi		rIP,rIP,4;	/* and increment pointer	*/
 | |
| #define START_IV \
 | |
| 	subi		rIP,rIP,16;	/* must reset pointer		*/
 | |
| #define CBC_DEC 32			/* 2 blocks because of incs	*/
 | |
| #define CTR_DEC 17			/* 1 block because of incs	*/
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #define SAVE_0_REGS
 | |
| #define LOAD_0_REGS
 | |
| 
 | |
| #define SAVE_4_REGS \
 | |
| 	stw		rI0,96(r1);	/* save 32 bit registers	*/ \
 | |
| 	stw		rI1,100(r1);					   \
 | |
| 	stw		rI2,104(r1);					   \
 | |
| 	stw		rI3,108(r1);
 | |
| 
 | |
| #define LOAD_4_REGS \
 | |
| 	lwz		rI0,96(r1);	/* restore 32 bit registers	*/ \
 | |
| 	lwz		rI1,100(r1);					   \
 | |
| 	lwz		rI2,104(r1);					   \
 | |
| 	lwz		rI3,108(r1);
 | |
| 
 | |
| #define SAVE_8_REGS \
 | |
| 	SAVE_4_REGS							   \
 | |
| 	stw		rG0,112(r1);	/* save 32 bit registers	*/ \
 | |
| 	stw		rG1,116(r1);					   \
 | |
| 	stw		rG2,120(r1);					   \
 | |
| 	stw		rG3,124(r1);
 | |
| 
 | |
| #define LOAD_8_REGS \
 | |
| 	LOAD_4_REGS							   \
 | |
| 	lwz		rG0,112(r1);	/* restore 32 bit registers	*/ \
 | |
| 	lwz		rG1,116(r1);					   \
 | |
| 	lwz		rG2,120(r1);					   \
 | |
| 	lwz		rG3,124(r1);
 | |
| 
 | |
| #define INITIALIZE_CRYPT(tab,nr32bitregs) \
 | |
| 	mflr		r0;						   \
 | |
| 	stwu		r1,-160(r1);	/* create stack frame		*/ \
 | |
| 	lis		rT0,tab@h;	/* en-/decryption table pointer	*/ \
 | |
| 	stw		r0,8(r1);	/* save link register		*/ \
 | |
| 	ori		rT0,rT0,tab@l;					   \
 | |
| 	evstdw		r14,16(r1);					   \
 | |
| 	mr		rKS,rKP;					   \
 | |
| 	evstdw		r15,24(r1);	/* We must save non volatile	*/ \
 | |
| 	evstdw		r16,32(r1);	/* registers. Take the chance	*/ \
 | |
| 	evstdw		r17,40(r1);	/* and save the SPE part too	*/ \
 | |
| 	evstdw		r18,48(r1);					   \
 | |
| 	evstdw		r19,56(r1);					   \
 | |
| 	evstdw		r20,64(r1);					   \
 | |
| 	evstdw		r21,72(r1);					   \
 | |
| 	evstdw		r22,80(r1);					   \
 | |
| 	evstdw		r23,88(r1);					   \
 | |
| 	SAVE_##nr32bitregs##_REGS
 | |
| 
 | |
| #define FINALIZE_CRYPT(nr32bitregs) \
 | |
| 	lwz		r0,8(r1);					   \
 | |
| 	evldw		r14,16(r1);	/* restore SPE registers	*/ \
 | |
| 	evldw		r15,24(r1);					   \
 | |
| 	evldw		r16,32(r1);					   \
 | |
| 	evldw		r17,40(r1);					   \
 | |
| 	evldw		r18,48(r1);					   \
 | |
| 	evldw		r19,56(r1);					   \
 | |
| 	evldw		r20,64(r1);					   \
 | |
| 	evldw		r21,72(r1);					   \
 | |
| 	evldw		r22,80(r1);					   \
 | |
| 	evldw		r23,88(r1);					   \
 | |
| 	LOAD_##nr32bitregs##_REGS					   \
 | |
| 	mtlr		r0;		/* restore link register	*/ \
 | |
| 	xor		r0,r0,r0;					   \
 | |
| 	stw		r0,16(r1);	/* delete sensitive data	*/ \
 | |
| 	stw		r0,24(r1);	/* that we might have pushed	*/ \
 | |
| 	stw		r0,32(r1);	/* from other context that runs	*/ \
 | |
| 	stw		r0,40(r1);	/* the same code		*/ \
 | |
| 	stw		r0,48(r1);					   \
 | |
| 	stw		r0,56(r1);					   \
 | |
| 	stw		r0,64(r1);					   \
 | |
| 	stw		r0,72(r1);					   \
 | |
| 	stw		r0,80(r1);					   \
 | |
| 	stw		r0,88(r1);					   \
 | |
| 	addi		r1,r1,160;	/* cleanup stack frame		*/
 | |
| 
 | |
| #define ENDIAN_SWAP(t0, t1, s0, s1) \
 | |
| 	rotrwi		t0,s0,8;	/* swap endianness for 2 GPRs	*/ \
 | |
| 	rotrwi		t1,s1,8;					   \
 | |
| 	rlwimi		t0,s0,8,8,15;					   \
 | |
| 	rlwimi		t1,s1,8,8,15;					   \
 | |
| 	rlwimi		t0,s0,8,24,31;					   \
 | |
| 	rlwimi		t1,s1,8,24,31;
 | |
| 
 | |
| #define GF128_MUL(d0, d1, d2, d3, t0) \
 | |
| 	li		t0,0x87;	/* multiplication in GF128	*/ \
 | |
| 	cmpwi		d3,-1;						   \
 | |
| 	iselgt		t0,0,t0;					   \
 | |
| 	rlwimi		d3,d2,0,0,0;	/* propagate "carry" bits	*/ \
 | |
| 	rotlwi		d3,d3,1;					   \
 | |
| 	rlwimi		d2,d1,0,0,0;					   \
 | |
| 	rotlwi		d2,d2,1;					   \
 | |
| 	rlwimi		d1,d0,0,0,0;					   \
 | |
| 	slwi		d0,d0,1;	/* shift left 128 bit		*/ \
 | |
| 	rotlwi		d1,d1,1;					   \
 | |
| 	xor		d0,d0,t0;
 | |
| 
 | |
| #define START_KEY(d0, d1, d2, d3) \
 | |
| 	lwz		rW0,0(rKP);					   \
 | |
| 	mtctr		rRR;						   \
 | |
| 	lwz		rW1,4(rKP);					   \
 | |
| 	lwz		rW2,8(rKP);					   \
 | |
| 	lwz		rW3,12(rKP);					   \
 | |
| 	xor		rD0,d0,rW0;					   \
 | |
| 	xor		rD1,d1,rW1;					   \
 | |
| 	xor		rD2,d2,rW2;					   \
 | |
| 	xor		rD3,d3,rW3;
 | |
| 
 | |
| /*
 | |
|  * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
 | |
|  *		   u32 rounds)
 | |
|  *
 | |
|  * called from glue layer to encrypt a single 16 byte block
 | |
|  * round values are AES128 = 4, AES192 = 5, AES256 = 6
 | |
|  *
 | |
|  */
 | |
| _GLOBAL(ppc_encrypt_aes)
 | |
| 	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
 | |
| 	LOAD_DATA(rD0, 0)
 | |
| 	LOAD_DATA(rD1, 4)
 | |
| 	LOAD_DATA(rD2, 8)
 | |
| 	LOAD_DATA(rD3, 12)
 | |
| 	START_KEY(rD0, rD1, rD2, rD3)
 | |
| 	bl		ppc_encrypt_block
 | |
| 	xor		rD0,rD0,rW0
 | |
| 	SAVE_DATA(rD0, 0)
 | |
| 	xor		rD1,rD1,rW1
 | |
| 	SAVE_DATA(rD1, 4)
 | |
| 	xor		rD2,rD2,rW2
 | |
| 	SAVE_DATA(rD2, 8)
 | |
| 	xor		rD3,rD3,rW3
 | |
| 	SAVE_DATA(rD3, 12)
 | |
| 	FINALIZE_CRYPT(0)
 | |
| 	blr
 | |
| 
 | |
| /*
 | |
|  * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
 | |
|  *		   u32 rounds)
 | |
|  *
 | |
|  * called from glue layer to decrypt a single 16 byte block
 | |
|  * round values are AES128 = 4, AES192 = 5, AES256 = 6
 | |
|  *
 | |
|  */
 | |
| _GLOBAL(ppc_decrypt_aes)
 | |
| 	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
 | |
| 	LOAD_DATA(rD0, 0)
 | |
| 	addi		rT1,rT0,4096
 | |
| 	LOAD_DATA(rD1, 4)
 | |
| 	LOAD_DATA(rD2, 8)
 | |
| 	LOAD_DATA(rD3, 12)
 | |
| 	START_KEY(rD0, rD1, rD2, rD3)
 | |
| 	bl		ppc_decrypt_block
 | |
| 	xor		rD0,rD0,rW0
 | |
| 	SAVE_DATA(rD0, 0)
 | |
| 	xor		rD1,rD1,rW1
 | |
| 	SAVE_DATA(rD1, 4)
 | |
| 	xor		rD2,rD2,rW2
 | |
| 	SAVE_DATA(rD2, 8)
 | |
| 	xor		rD3,rD3,rW3
 | |
| 	SAVE_DATA(rD3, 12)
 | |
| 	FINALIZE_CRYPT(0)
 | |
| 	blr
 | |
| 
 | |
| /*
 | |
|  * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
 | |
|  *		   u32 rounds, u32 bytes);
 | |
|  *
 | |
|  * called from glue layer to encrypt multiple blocks via ECB
 | |
|  * Bytes must be larger or equal 16 and only whole blocks are
 | |
|  * processed. round values are AES128 = 4, AES192 = 5 and
 | |
|  * AES256 = 6
 | |
|  *
 | |
|  */
 | |
| _GLOBAL(ppc_encrypt_ecb)
 | |
| 	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
 | |
| ppc_encrypt_ecb_loop:
 | |
| 	LOAD_DATA(rD0, 0)
 | |
| 	mr		rKP,rKS
 | |
| 	LOAD_DATA(rD1, 4)
 | |
| 	subi		rLN,rLN,16
 | |
| 	LOAD_DATA(rD2, 8)
 | |
| 	cmpwi		rLN,15
 | |
| 	LOAD_DATA(rD3, 12)
 | |
| 	START_KEY(rD0, rD1, rD2, rD3)
 | |
| 	bl		ppc_encrypt_block
 | |
| 	xor		rD0,rD0,rW0
 | |
| 	SAVE_DATA(rD0, 0)
 | |
| 	xor		rD1,rD1,rW1
 | |
| 	SAVE_DATA(rD1, 4)
 | |
| 	xor		rD2,rD2,rW2
 | |
| 	SAVE_DATA(rD2, 8)
 | |
| 	xor		rD3,rD3,rW3
 | |
| 	SAVE_DATA(rD3, 12)
 | |
| 	NEXT_BLOCK
 | |
| 	bt		gt,ppc_encrypt_ecb_loop
 | |
| 	FINALIZE_CRYPT(0)
 | |
| 	blr
 | |
| 
 | |
| /*
 | |
|  * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
 | |
|  *		   u32 rounds, u32 bytes);
 | |
|  *
 | |
|  * called from glue layer to decrypt multiple blocks via ECB
 | |
|  * Bytes must be larger or equal 16 and only whole blocks are
 | |
|  * processed. round values are AES128 = 4, AES192 = 5 and
 | |
|  * AES256 = 6
 | |
|  *
 | |
|  */
 | |
| _GLOBAL(ppc_decrypt_ecb)
 | |
| 	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
 | |
| 	addi		rT1,rT0,4096
 | |
| ppc_decrypt_ecb_loop:
 | |
| 	LOAD_DATA(rD0, 0)
 | |
| 	mr		rKP,rKS
 | |
| 	LOAD_DATA(rD1, 4)
 | |
| 	subi		rLN,rLN,16
 | |
| 	LOAD_DATA(rD2, 8)
 | |
| 	cmpwi		rLN,15
 | |
| 	LOAD_DATA(rD3, 12)
 | |
| 	START_KEY(rD0, rD1, rD2, rD3)
 | |
| 	bl		ppc_decrypt_block
 | |
| 	xor		rD0,rD0,rW0
 | |
| 	SAVE_DATA(rD0, 0)
 | |
| 	xor		rD1,rD1,rW1
 | |
| 	SAVE_DATA(rD1, 4)
 | |
| 	xor		rD2,rD2,rW2
 | |
| 	SAVE_DATA(rD2, 8)
 | |
| 	xor		rD3,rD3,rW3
 | |
| 	SAVE_DATA(rD3, 12)
 | |
| 	NEXT_BLOCK
 | |
| 	bt		gt,ppc_decrypt_ecb_loop
 | |
| 	FINALIZE_CRYPT(0)
 | |
| 	blr
 | |
| 
 | |
| /*
 | |
|  * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
 | |
|  *		   32 rounds, u32 bytes, u8 *iv);
 | |
|  *
 | |
|  * called from glue layer to encrypt multiple blocks via CBC
 | |
|  * Bytes must be larger or equal 16 and only whole blocks are
 | |
|  * processed. round values are AES128 = 4, AES192 = 5 and
 | |
|  * AES256 = 6
 | |
|  *
 | |
|  */
 | |
| _GLOBAL(ppc_encrypt_cbc)
 | |
| 	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
 | |
| 	LOAD_IV(rI0, 0)
 | |
| 	LOAD_IV(rI1, 4)
 | |
| 	LOAD_IV(rI2, 8)
 | |
| 	LOAD_IV(rI3, 12)
 | |
| ppc_encrypt_cbc_loop:
 | |
| 	LOAD_DATA(rD0, 0)
 | |
| 	mr		rKP,rKS
 | |
| 	LOAD_DATA(rD1, 4)
 | |
| 	subi		rLN,rLN,16
 | |
| 	LOAD_DATA(rD2, 8)
 | |
| 	cmpwi		rLN,15
 | |
| 	LOAD_DATA(rD3, 12)
 | |
| 	xor		rD0,rD0,rI0
 | |
| 	xor		rD1,rD1,rI1
 | |
| 	xor		rD2,rD2,rI2
 | |
| 	xor		rD3,rD3,rI3
 | |
| 	START_KEY(rD0, rD1, rD2, rD3)
 | |
| 	bl		ppc_encrypt_block
 | |
| 	xor		rI0,rD0,rW0
 | |
| 	SAVE_DATA(rI0, 0)
 | |
| 	xor		rI1,rD1,rW1
 | |
| 	SAVE_DATA(rI1, 4)
 | |
| 	xor		rI2,rD2,rW2
 | |
| 	SAVE_DATA(rI2, 8)
 | |
| 	xor		rI3,rD3,rW3
 | |
| 	SAVE_DATA(rI3, 12)
 | |
| 	NEXT_BLOCK
 | |
| 	bt		gt,ppc_encrypt_cbc_loop
 | |
| 	START_IV
 | |
| 	SAVE_IV(rI0, 0)
 | |
| 	SAVE_IV(rI1, 4)
 | |
| 	SAVE_IV(rI2, 8)
 | |
| 	SAVE_IV(rI3, 12)
 | |
| 	FINALIZE_CRYPT(4)
 | |
| 	blr
 | |
| 
 | |
| /*
 | |
|  * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
 | |
|  *		   u32 rounds, u32 bytes, u8 *iv);
 | |
|  *
 | |
|  * called from glue layer to decrypt multiple blocks via CBC
 | |
|  * round values are AES128 = 4, AES192 = 5, AES256 = 6
 | |
|  *
 | |
|  */
 | |
| _GLOBAL(ppc_decrypt_cbc)
 | |
| 	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
 | |
| 	li		rT1,15
 | |
| 	LOAD_IV(rI0, 0)
 | |
| 	andc		rLN,rLN,rT1
 | |
| 	LOAD_IV(rI1, 4)
 | |
| 	subi		rLN,rLN,16
 | |
| 	LOAD_IV(rI2, 8)
 | |
| 	add		rSP,rSP,rLN	/* reverse processing		*/
 | |
| 	LOAD_IV(rI3, 12)
 | |
| 	add		rDP,rDP,rLN
 | |
| 	LOAD_DATA(rD0, 0)
 | |
| 	addi		rT1,rT0,4096
 | |
| 	LOAD_DATA(rD1, 4)
 | |
| 	LOAD_DATA(rD2, 8)
 | |
| 	LOAD_DATA(rD3, 12)
 | |
| 	START_IV
 | |
| 	SAVE_IV(rD0, 0)
 | |
| 	SAVE_IV(rD1, 4)
 | |
| 	SAVE_IV(rD2, 8)
 | |
| 	cmpwi		rLN,16
 | |
| 	SAVE_IV(rD3, 12)
 | |
| 	bt		lt,ppc_decrypt_cbc_end
 | |
| ppc_decrypt_cbc_loop:
 | |
| 	mr		rKP,rKS
 | |
| 	START_KEY(rD0, rD1, rD2, rD3)
 | |
| 	bl		ppc_decrypt_block
 | |
| 	subi		rLN,rLN,16
 | |
| 	subi		rSP,rSP,CBC_DEC
 | |
| 	xor		rW0,rD0,rW0
 | |
| 	LOAD_DATA(rD0, 0)
 | |
| 	xor		rW1,rD1,rW1
 | |
| 	LOAD_DATA(rD1, 4)
 | |
| 	xor		rW2,rD2,rW2
 | |
| 	LOAD_DATA(rD2, 8)
 | |
| 	xor		rW3,rD3,rW3
 | |
| 	LOAD_DATA(rD3, 12)
 | |
| 	xor		rW0,rW0,rD0
 | |
| 	SAVE_DATA(rW0, 0)
 | |
| 	xor		rW1,rW1,rD1
 | |
| 	SAVE_DATA(rW1, 4)
 | |
| 	xor		rW2,rW2,rD2
 | |
| 	SAVE_DATA(rW2, 8)
 | |
| 	xor		rW3,rW3,rD3
 | |
| 	SAVE_DATA(rW3, 12)
 | |
| 	cmpwi		rLN,15
 | |
| 	subi		rDP,rDP,CBC_DEC
 | |
| 	bt		gt,ppc_decrypt_cbc_loop
 | |
| ppc_decrypt_cbc_end:
 | |
| 	mr		rKP,rKS
 | |
| 	START_KEY(rD0, rD1, rD2, rD3)
 | |
| 	bl		ppc_decrypt_block
 | |
| 	xor		rW0,rW0,rD0
 | |
| 	xor		rW1,rW1,rD1
 | |
| 	xor		rW2,rW2,rD2
 | |
| 	xor		rW3,rW3,rD3
 | |
| 	xor		rW0,rW0,rI0	/* decrypt with initial IV	*/
 | |
| 	SAVE_DATA(rW0, 0)
 | |
| 	xor		rW1,rW1,rI1
 | |
| 	SAVE_DATA(rW1, 4)
 | |
| 	xor		rW2,rW2,rI2
 | |
| 	SAVE_DATA(rW2, 8)
 | |
| 	xor		rW3,rW3,rI3
 | |
| 	SAVE_DATA(rW3, 12)
 | |
| 	FINALIZE_CRYPT(4)
 | |
| 	blr
 | |
| 
 | |
| /*
 | |
|  * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
 | |
|  *		 u32 rounds, u32 bytes, u8 *iv);
 | |
|  *
 | |
|  * called from glue layer to encrypt/decrypt multiple blocks
 | |
|  * via CTR. Number of bytes does not need to be a multiple of
 | |
|  * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
 | |
|  *
 | |
|  */
 | |
| _GLOBAL(ppc_crypt_ctr)
 | |
| 	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
 | |
| 	LOAD_IV(rI0, 0)
 | |
| 	LOAD_IV(rI1, 4)
 | |
| 	LOAD_IV(rI2, 8)
 | |
| 	cmpwi		rLN,16
 | |
| 	LOAD_IV(rI3, 12)
 | |
| 	START_IV
 | |
| 	bt		lt,ppc_crypt_ctr_partial
 | |
| ppc_crypt_ctr_loop:
 | |
| 	mr		rKP,rKS
 | |
| 	START_KEY(rI0, rI1, rI2, rI3)
 | |
| 	bl		ppc_encrypt_block
 | |
| 	xor		rW0,rD0,rW0
 | |
| 	xor		rW1,rD1,rW1
 | |
| 	xor		rW2,rD2,rW2
 | |
| 	xor		rW3,rD3,rW3
 | |
| 	LOAD_DATA(rD0, 0)
 | |
| 	subi		rLN,rLN,16
 | |
| 	LOAD_DATA(rD1, 4)
 | |
| 	LOAD_DATA(rD2, 8)
 | |
| 	LOAD_DATA(rD3, 12)
 | |
| 	xor		rD0,rD0,rW0
 | |
| 	SAVE_DATA(rD0, 0)
 | |
| 	xor		rD1,rD1,rW1
 | |
| 	SAVE_DATA(rD1, 4)
 | |
| 	xor		rD2,rD2,rW2
 | |
| 	SAVE_DATA(rD2, 8)
 | |
| 	xor		rD3,rD3,rW3
 | |
| 	SAVE_DATA(rD3, 12)
 | |
| 	addic		rI3,rI3,1	/* increase counter			*/
 | |
| 	addze		rI2,rI2
 | |
| 	addze		rI1,rI1
 | |
| 	addze		rI0,rI0
 | |
| 	NEXT_BLOCK
 | |
| 	cmpwi		rLN,15
 | |
| 	bt		gt,ppc_crypt_ctr_loop
 | |
| ppc_crypt_ctr_partial:
 | |
| 	cmpwi		rLN,0
 | |
| 	bt		eq,ppc_crypt_ctr_end
 | |
| 	mr		rKP,rKS
 | |
| 	START_KEY(rI0, rI1, rI2, rI3)
 | |
| 	bl		ppc_encrypt_block
 | |
| 	xor		rW0,rD0,rW0
 | |
| 	SAVE_IV(rW0, 0)
 | |
| 	xor		rW1,rD1,rW1
 | |
| 	SAVE_IV(rW1, 4)
 | |
| 	xor		rW2,rD2,rW2
 | |
| 	SAVE_IV(rW2, 8)
 | |
| 	xor		rW3,rD3,rW3
 | |
| 	SAVE_IV(rW3, 12)
 | |
| 	mtctr		rLN
 | |
| 	subi		rIP,rIP,CTR_DEC
 | |
| 	subi		rSP,rSP,1
 | |
| 	subi		rDP,rDP,1
 | |
| ppc_crypt_ctr_xorbyte:
 | |
| 	lbzu		rW4,1(rIP)	/* bytewise xor for partial block	*/
 | |
| 	lbzu		rW5,1(rSP)
 | |
| 	xor		rW4,rW4,rW5
 | |
| 	stbu		rW4,1(rDP)
 | |
| 	bdnz		ppc_crypt_ctr_xorbyte
 | |
| 	subf		rIP,rLN,rIP
 | |
| 	addi		rIP,rIP,1
 | |
| 	addic		rI3,rI3,1
 | |
| 	addze		rI2,rI2
 | |
| 	addze		rI1,rI1
 | |
| 	addze		rI0,rI0
 | |
| ppc_crypt_ctr_end:
 | |
| 	SAVE_IV(rI0, 0)
 | |
| 	SAVE_IV(rI1, 4)
 | |
| 	SAVE_IV(rI2, 8)
 | |
| 	SAVE_IV(rI3, 12)
 | |
| 	FINALIZE_CRYPT(4)
 | |
| 	blr
 | |
| 
 | |
| /*
 | |
|  * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
 | |
|  *		   u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
 | |
|  *
 | |
|  * called from glue layer to encrypt multiple blocks via XTS
 | |
|  * If key_twk is given, the initial IV encryption will be
 | |
|  * processed too. Round values are AES128 = 4, AES192 = 5,
 | |
|  * AES256 = 6
 | |
|  *
 | |
|  */
 | |
| _GLOBAL(ppc_encrypt_xts)
 | |
| 	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
 | |
| 	LOAD_IV(rI0, 0)
 | |
| 	LOAD_IV(rI1, 4)
 | |
| 	LOAD_IV(rI2, 8)
 | |
| 	cmpwi		rKT,0
 | |
| 	LOAD_IV(rI3, 12)
 | |
| 	bt		eq,ppc_encrypt_xts_notweak
 | |
| 	mr		rKP,rKT
 | |
| 	START_KEY(rI0, rI1, rI2, rI3)
 | |
| 	bl		ppc_encrypt_block
 | |
| 	xor		rI0,rD0,rW0
 | |
| 	xor		rI1,rD1,rW1
 | |
| 	xor		rI2,rD2,rW2
 | |
| 	xor		rI3,rD3,rW3
 | |
| ppc_encrypt_xts_notweak:
 | |
| 	ENDIAN_SWAP(rG0, rG1, rI0, rI1)
 | |
| 	ENDIAN_SWAP(rG2, rG3, rI2, rI3)
 | |
| ppc_encrypt_xts_loop:
 | |
| 	LOAD_DATA(rD0, 0)
 | |
| 	mr		rKP,rKS
 | |
| 	LOAD_DATA(rD1, 4)
 | |
| 	subi		rLN,rLN,16
 | |
| 	LOAD_DATA(rD2, 8)
 | |
| 	LOAD_DATA(rD3, 12)
 | |
| 	xor		rD0,rD0,rI0
 | |
| 	xor		rD1,rD1,rI1
 | |
| 	xor		rD2,rD2,rI2
 | |
| 	xor		rD3,rD3,rI3
 | |
| 	START_KEY(rD0, rD1, rD2, rD3)
 | |
| 	bl		ppc_encrypt_block
 | |
| 	xor		rD0,rD0,rW0
 | |
| 	xor		rD1,rD1,rW1
 | |
| 	xor		rD2,rD2,rW2
 | |
| 	xor		rD3,rD3,rW3
 | |
| 	xor		rD0,rD0,rI0
 | |
| 	SAVE_DATA(rD0, 0)
 | |
| 	xor		rD1,rD1,rI1
 | |
| 	SAVE_DATA(rD1, 4)
 | |
| 	xor		rD2,rD2,rI2
 | |
| 	SAVE_DATA(rD2, 8)
 | |
| 	xor		rD3,rD3,rI3
 | |
| 	SAVE_DATA(rD3, 12)
 | |
| 	GF128_MUL(rG0, rG1, rG2, rG3, rW0)
 | |
| 	ENDIAN_SWAP(rI0, rI1, rG0, rG1)
 | |
| 	ENDIAN_SWAP(rI2, rI3, rG2, rG3)
 | |
| 	cmpwi		rLN,0
 | |
| 	NEXT_BLOCK
 | |
| 	bt		gt,ppc_encrypt_xts_loop
 | |
| 	START_IV
 | |
| 	SAVE_IV(rI0, 0)
 | |
| 	SAVE_IV(rI1, 4)
 | |
| 	SAVE_IV(rI2, 8)
 | |
| 	SAVE_IV(rI3, 12)
 | |
| 	FINALIZE_CRYPT(8)
 | |
| 	blr
 | |
| 
 | |
| /*
 | |
|  * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
 | |
|  *		   u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
 | |
|  *
 | |
|  * called from glue layer to decrypt multiple blocks via XTS
 | |
|  * If key_twk is given, the initial IV encryption will be
 | |
|  * processed too. Round values are AES128 = 4, AES192 = 5,
 | |
|  * AES256 = 6
 | |
|  *
 | |
|  */
 | |
| _GLOBAL(ppc_decrypt_xts)
 | |
| 	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
 | |
| 	LOAD_IV(rI0, 0)
 | |
| 	addi		rT1,rT0,4096
 | |
| 	LOAD_IV(rI1, 4)
 | |
| 	LOAD_IV(rI2, 8)
 | |
| 	cmpwi		rKT,0
 | |
| 	LOAD_IV(rI3, 12)
 | |
| 	bt		eq,ppc_decrypt_xts_notweak
 | |
| 	subi		rT0,rT0,4096
 | |
| 	mr		rKP,rKT
 | |
| 	START_KEY(rI0, rI1, rI2, rI3)
 | |
| 	bl		ppc_encrypt_block
 | |
| 	xor		rI0,rD0,rW0
 | |
| 	xor		rI1,rD1,rW1
 | |
| 	xor		rI2,rD2,rW2
 | |
| 	xor		rI3,rD3,rW3
 | |
| 	addi		rT0,rT0,4096
 | |
| ppc_decrypt_xts_notweak:
 | |
| 	ENDIAN_SWAP(rG0, rG1, rI0, rI1)
 | |
| 	ENDIAN_SWAP(rG2, rG3, rI2, rI3)
 | |
| ppc_decrypt_xts_loop:
 | |
| 	LOAD_DATA(rD0, 0)
 | |
| 	mr		rKP,rKS
 | |
| 	LOAD_DATA(rD1, 4)
 | |
| 	subi		rLN,rLN,16
 | |
| 	LOAD_DATA(rD2, 8)
 | |
| 	LOAD_DATA(rD3, 12)
 | |
| 	xor		rD0,rD0,rI0
 | |
| 	xor		rD1,rD1,rI1
 | |
| 	xor		rD2,rD2,rI2
 | |
| 	xor		rD3,rD3,rI3
 | |
| 	START_KEY(rD0, rD1, rD2, rD3)
 | |
| 	bl		ppc_decrypt_block
 | |
| 	xor		rD0,rD0,rW0
 | |
| 	xor		rD1,rD1,rW1
 | |
| 	xor		rD2,rD2,rW2
 | |
| 	xor		rD3,rD3,rW3
 | |
| 	xor		rD0,rD0,rI0
 | |
| 	SAVE_DATA(rD0, 0)
 | |
| 	xor		rD1,rD1,rI1
 | |
| 	SAVE_DATA(rD1, 4)
 | |
| 	xor		rD2,rD2,rI2
 | |
| 	SAVE_DATA(rD2, 8)
 | |
| 	xor		rD3,rD3,rI3
 | |
| 	SAVE_DATA(rD3, 12)
 | |
| 	GF128_MUL(rG0, rG1, rG2, rG3, rW0)
 | |
| 	ENDIAN_SWAP(rI0, rI1, rG0, rG1)
 | |
| 	ENDIAN_SWAP(rI2, rI3, rG2, rG3)
 | |
| 	cmpwi		rLN,0
 | |
| 	NEXT_BLOCK
 | |
| 	bt		gt,ppc_decrypt_xts_loop
 | |
| 	START_IV
 | |
| 	SAVE_IV(rI0, 0)
 | |
| 	SAVE_IV(rI1, 4)
 | |
| 	SAVE_IV(rI2, 8)
 | |
| 	SAVE_IV(rI3, 12)
 | |
| 	FINALIZE_CRYPT(8)
 | |
| 	blr
 | 
