MINI Sh3ll

Path : /usr/src/linux-headers-5.15.0-138/drivers/crypto/vmx/
File Upload :
Current File : //usr/src/linux-headers-5.15.0-138/drivers/crypto/vmx/aesp8-ppc.pl

#! /usr/bin/env perl
# SPDX-License-Identifier: GPL-2.0

# This code is taken from CRYPTOGAMs[1] and is included here using the option
# in the license to distribute the code under the GPL. Therefore this program
# is free software; you can redistribute it and/or modify it under the terms of
# the GNU General Public License version 2 as published by the Free Software
# Foundation.
#
# [1] https://www.openssl.org/~appro/cryptogams/

# Copyright (c) 2006-2017, CRYPTOGAMS by <[email protected]>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
#       * Redistributions of source code must retain copyright notices,
#         this list of conditions and the following disclaimer.
#
#       * Redistributions in binary form must reproduce the above
#         copyright notice, this list of conditions and the following
#         disclaimer in the documentation and/or other materials
#         provided with the distribution.
#
#       * Neither the name of the CRYPTOGAMS nor the names of its
#         copyright holder and contributors may be used to endorse or
#         promote products derived from this software without specific
#         prior written permission.
#
# ALTERNATIVELY, provided that this notice is retained in full, this
# product may be distributed under the terms of the GNU General Public
# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
# those given above.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# ====================================================================
# Written by Andy Polyakov <[email protected]> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see https://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# This module implements support for AES instructions as per PowerISA
# specification version 2.07, first implemented by POWER8 processor.
# The module is endian-agnostic in sense that it supports both big-
# and little-endian cases. Data alignment in parallelizable modes is
# handled with VSX loads and stores, which implies MSR.VSX flag being
# set. It should also be noted that ISA specification doesn't prohibit
# alignment exceptions for these instructions on page boundaries.
# Initially alignment was handled in pure AltiVec/VMX way [when data
# is aligned programmatically, which in turn guarantees exception-
# free execution], but it turned to hamper performance when vcipher
# instructions are interleaved. It's reckoned that eventual
# misalignment penalties at page boundaries are in average lower
# than additional overhead in pure AltiVec approach.
#
# May 2016
#
# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
# systems were measured.
#
######################################################################
# Current large-block performance in cycles per byte processed with
# 128-bit key (less is better).
#
#		CBC en-/decrypt	CTR	XTS
# POWER8[le]	3.96/0.72	0.74	1.1
# POWER8[be]	3.75/0.65	0.66	1.0

$flavour = shift;

if ($flavour =~ /64/) {
	$SIZE_T	=8;
	$LRSAVE	=2*$SIZE_T;
	$STU	="stdu";
	$POP	="ld";
	$PUSH	="std";
	$UCMP	="cmpld";
	$SHL	="sldi";
} elsif ($flavour =~ /32/) {
	$SIZE_T	=4;
	$LRSAVE	=$SIZE_T;
	$STU	="stwu";
	$POP	="lwz";
	$PUSH	="stw";
	$UCMP	="cmplw";
	$SHL	="slwi";
} else { die "nonsense $flavour"; }

$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;

$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
die "can't locate ppc-xlate.pl";

open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";

$FRAME=8*$SIZE_T;
$prefix="aes_p8";

$sp="r1";
$vrsave="r12";

#########################################################################
{{{	# Key setup procedures						#
my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));

$code.=<<___;
.machine	"any"

.text

.align	7
rcon:
.long	0x01000000, 0x01000000, 0x01000000, 0x01000000	?rev
.long	0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000	?rev
.long	0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c	?rev
.long	0,0,0,0						?asis
Lconsts:
	mflr	r0
	bcl	20,31,\$+4
	mflr	$ptr	 #vvvvv "distance between . and rcon
	addi	$ptr,$ptr,-0x48
	mtlr	r0
	blr
	.long	0
	.byte	0,12,0x14,0,0,0,0,0
.asciz	"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"

.globl	.${prefix}_set_encrypt_key
Lset_encrypt_key:
	mflr		r11
	$PUSH		r11,$LRSAVE($sp)

	li		$ptr,-1
	${UCMP}i	$inp,0
	beq-		Lenc_key_abort		# if ($inp==0) return -1;
	${UCMP}i	$out,0
	beq-		Lenc_key_abort		# if ($out==0) return -1;
	li		$ptr,-2
	cmpwi		$bits,128
	blt-		Lenc_key_abort
	cmpwi		$bits,256
	bgt-		Lenc_key_abort
	andi.		r0,$bits,0x3f
	bne-		Lenc_key_abort

	lis		r0,0xfff0
	mfspr		$vrsave,256
	mtspr		256,r0

	bl		Lconsts
	mtlr		r11

	neg		r9,$inp
	lvx		$in0,0,$inp
	addi		$inp,$inp,15		# 15 is not typo
	lvsr		$key,0,r9		# borrow $key
	li		r8,0x20
	cmpwi		$bits,192
	lvx		$in1,0,$inp
	le?vspltisb	$mask,0x0f		# borrow $mask
	lvx		$rcon,0,$ptr
	le?vxor		$key,$key,$mask		# adjust for byte swap
	lvx		$mask,r8,$ptr
	addi		$ptr,$ptr,0x10
	vperm		$in0,$in0,$in1,$key	# align [and byte swap in LE]
	li		$cnt,8
	vxor		$zero,$zero,$zero
	mtctr		$cnt

	?lvsr		$outperm,0,$out
	vspltisb	$outmask,-1
	lvx		$outhead,0,$out
	?vperm		$outmask,$zero,$outmask,$outperm

	blt		Loop128
	addi		$inp,$inp,8
	beq		L192
	addi		$inp,$inp,8
	b		L256

.align	4
Loop128:
	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
	vsldoi		$tmp,$zero,$in0,12	# >>32
	 vperm		$outtail,$in0,$in0,$outperm	# rotate
	 vsel		$stage,$outhead,$outtail,$outmask
	 vmr		$outhead,$outtail
	vcipherlast	$key,$key,$rcon
	 stvx		$stage,0,$out
	 addi		$out,$out,16

	vxor		$in0,$in0,$tmp
	vsldoi		$tmp,$zero,$tmp,12	# >>32
	vxor		$in0,$in0,$tmp
	vsldoi		$tmp,$zero,$tmp,12	# >>32
	vxor		$in0,$in0,$tmp
	 vadduwm	$rcon,$rcon,$rcon
	vxor		$in0,$in0,$key
	bdnz		Loop128

	lvx		$rcon,0,$ptr		# last two round keys

	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
	vsldoi		$tmp,$zero,$in0,12	# >>32
	 vperm		$outtail,$in0,$in0,$outperm	# rotate
	 vsel		$stage,$outhead,$outtail,$outmask
	 vmr		$outhead,$outtail
	vcipherlast	$key,$key,$rcon
	 stvx		$stage,0,$out
	 addi		$out,$out,16

	vxor		$in0,$in0,$tmp
	vsldoi		$tmp,$zero,$tmp,12	# >>32
	vxor		$in0,$in0,$tmp
	vsldoi		$tmp,$zero,$tmp,12	# >>32
	vxor		$in0,$in0,$tmp
	 vadduwm	$rcon,$rcon,$rcon
	vxor		$in0,$in0,$key

	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
	vsldoi		$tmp,$zero,$in0,12	# >>32
	 vperm		$outtail,$in0,$in0,$outperm	# rotate
	 vsel		$stage,$outhead,$outtail,$outmask
	 vmr		$outhead,$outtail
	vcipherlast	$key,$key,$rcon
	 stvx		$stage,0,$out
	 addi		$out,$out,16

	vxor		$in0,$in0,$tmp
	vsldoi		$tmp,$zero,$tmp,12	# >>32
	vxor		$in0,$in0,$tmp
	vsldoi		$tmp,$zero,$tmp,12	# >>32
	vxor		$in0,$in0,$tmp
	vxor		$in0,$in0,$key
	 vperm		$outtail,$in0,$in0,$outperm	# rotate
	 vsel		$stage,$outhead,$outtail,$outmask
	 vmr		$outhead,$outtail
	 stvx		$stage,0,$out

	addi		$inp,$out,15		# 15 is not typo
	addi		$out,$out,0x50

	li		$rounds,10
	b		Ldone

.align	4
L192:
	lvx		$tmp,0,$inp
	li		$cnt,4
	 vperm		$outtail,$in0,$in0,$outperm	# rotate
	 vsel		$stage,$outhead,$outtail,$outmask
	 vmr		$outhead,$outtail
	 stvx		$stage,0,$out
	 addi		$out,$out,16
	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
	vspltisb	$key,8			# borrow $key
	mtctr		$cnt
	vsububm		$mask,$mask,$key	# adjust the mask

Loop192:
	vperm		$key,$in1,$in1,$mask	# roate-n-splat
	vsldoi		$tmp,$zero,$in0,12	# >>32
	vcipherlast	$key,$key,$rcon

	vxor		$in0,$in0,$tmp
	vsldoi		$tmp,$zero,$tmp,12	# >>32
	vxor		$in0,$in0,$tmp
	vsldoi		$tmp,$zero,$tmp,12	# >>32
	vxor		$in0,$in0,$tmp

	 vsldoi		$stage,$zero,$in1,8
	vspltw		$tmp,$in0,3
	vxor		$tmp,$tmp,$in1
	vsldoi		$in1,$zero,$in1,12	# >>32
	 vadduwm	$rcon,$rcon,$rcon
	vxor		$in1,$in1,$tmp
	vxor		$in0,$in0,$key
	vxor		$in1,$in1,$key
	 vsldoi		$stage,$stage,$in0,8

	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
	vsldoi		$tmp,$zero,$in0,12	# >>32
	 vperm		$outtail,$stage,$stage,$outperm	# rotate
	 vsel		$stage,$outhead,$outtail,$outmask
	 vmr		$outhead,$outtail
	vcipherlast	$key,$key,$rcon
	 stvx		$stage,0,$out
	 addi		$out,$out,16

	 vsldoi		$stage,$in0,$in1,8
	vxor		$in0,$in0,$tmp
	vsldoi		$tmp,$zero,$tmp,12	# >>32
	 vperm		$outtail,$stage,$stage,$outperm	# rotate
	 vsel		$stage,$outhead,$outtail,$outmask
	 vmr		$outhead,$outtail
	vxor		$in0,$in0,$tmp
	vsldoi		$tmp,$zero,$tmp,12	# >>32
	vxor		$in0,$in0,$tmp
	 stvx		$stage,0,$out
	 addi		$out,$out,16

	vspltw		$tmp,$in0,3
	vxor		$tmp,$tmp,$in1
	vsldoi		$in1,$zero,$in1,12	# >>32
	 vadduwm	$rcon,$rcon,$rcon
	vxor		$in1,$in1,$tmp
	vxor		$in0,$in0,$key
	vxor		$in1,$in1,$key
	 vperm		$outtail,$in0,$in0,$outperm	# rotate
	 vsel		$stage,$outhead,$outtail,$outmask
	 vmr		$outhead,$outtail
	 stvx		$stage,0,$out
	 addi		$inp,$out,15		# 15 is not typo
	 addi		$out,$out,16
	bdnz		Loop192

	li		$rounds,12
	addi		$out,$out,0x20
	b		Ldone

.align	4
L256:
	lvx		$tmp,0,$inp
	li		$cnt,7
	li		$rounds,14
	 vperm		$outtail,$in0,$in0,$outperm	# rotate
	 vsel		$stage,$outhead,$outtail,$outmask
	 vmr		$outhead,$outtail
	 stvx		$stage,0,$out
	 addi		$out,$out,16
	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
	mtctr		$cnt

Loop256:
	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
	vsldoi		$tmp,$zero,$in0,12	# >>32
	 vperm		$outtail,$in1,$in1,$outperm	# rotate
	 vsel		$stage,$outhead,$outtail,$outmask
	 vmr		$outhead,$outtail
	vcipherlast	$key,$key,$rcon
	 stvx		$stage,0,$out
	 addi		$out,$out,16

	vxor		$in0,$in0,$tmp
	vsldoi		$tmp,$zero,$tmp,12	# >>32
	vxor		$in0,$in0,$tmp
	vsldoi		$tmp,$zero,$tmp,12	# >>32
	vxor		$in0,$in0,$tmp
	 vadduwm	$rcon,$rcon,$rcon
	vxor		$in0,$in0,$key
	 vperm		$outtail,$in0,$in0,$outperm	# rotate
	 vsel		$stage,$outhead,$outtail,$outmask
	 vmr		$outhead,$outtail
	 stvx		$stage,0,$out
	 addi		$inp,$out,15		# 15 is not typo
	 addi		$out,$out,16
	bdz		Ldone

	vspltw		$key,$in0,3		# just splat
	vsldoi		$tmp,$zero,$in1,12	# >>32
	vsbox		$key,$key

	vxor		$in1,$in1,$tmp
	vsldoi		$tmp,$zero,$tmp,12	# >>32
	vxor		$in1,$in1,$tmp
	vsldoi		$tmp,$zero,$tmp,12	# >>32
	vxor		$in1,$in1,$tmp

	vxor		$in1,$in1,$key
	b		Loop256

.align	4
Ldone:
	lvx		$in1,0,$inp		# redundant in aligned case
	vsel		$in1,$outhead,$in1,$outmask
	stvx		$in1,0,$inp
	li		$ptr,0
	mtspr		256,$vrsave
	stw		$rounds,0($out)

Lenc_key_abort:
	mr		r3,$ptr
	blr
	.long		0
	.byte		0,12,0x14,1,0,0,3,0
	.long		0
.size	.${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key

.globl	.${prefix}_set_decrypt_key
	$STU		$sp,-$FRAME($sp)
	mflr		r10
	$PUSH		r10,$FRAME+$LRSAVE($sp)
	bl		Lset_encrypt_key
	mtlr		r10

	cmpwi		r3,0
	bne-		Ldec_key_abort

	slwi		$cnt,$rounds,4
	subi		$inp,$out,240		# first round key
	srwi		$rounds,$rounds,1
	add		$out,$inp,$cnt		# last round key
	mtctr		$rounds

Ldeckey:
	lwz		r0, 0($inp)
	lwz		r6, 4($inp)
	lwz		r7, 8($inp)
	lwz		r8, 12($inp)
	addi		$inp,$inp,16
	lwz		r9, 0($out)
	lwz		r10,4($out)
	lwz		r11,8($out)
	lwz		r12,12($out)
	stw		r0, 0($out)
	stw		r6, 4($out)
	stw		r7, 8($out)
	stw		r8, 12($out)
	subi		$out,$out,16
	stw		r9, -16($inp)
	stw		r10,-12($inp)
	stw		r11,-8($inp)
	stw		r12,-4($inp)
	bdnz		Ldeckey

	xor		r3,r3,r3		# return value
Ldec_key_abort:
	addi		$sp,$sp,$FRAME
	blr
	.long		0
	.byte		0,12,4,1,0x80,0,3,0
	.long		0
.size	.${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
___
}}}
#########################################################################
{{{	# Single block en- and decrypt procedures			#
sub gen_block () {
my $dir = shift;
my $n   = $dir eq "de" ? "n" : "";
my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));

$code.=<<___;
.globl	.${prefix}_${dir}crypt
	lwz		$rounds,240($key)
	lis		r0,0xfc00
	mfspr		$vrsave,256
	li		$idx,15			# 15 is not typo
	mtspr		256,r0

	lvx		v0,0,$inp
	neg		r11,$out
	lvx		v1,$idx,$inp
	lvsl		v2,0,$inp		# inpperm
	le?vspltisb	v4,0x0f
	?lvsl		v3,0,r11		# outperm
	le?vxor		v2,v2,v4
	li		$idx,16
	vperm		v0,v0,v1,v2		# align [and byte swap in LE]
	lvx		v1,0,$key
	?lvsl		v5,0,$key		# keyperm
	srwi		$rounds,$rounds,1
	lvx		v2,$idx,$key
	addi		$idx,$idx,16
	subi		$rounds,$rounds,1
	?vperm		v1,v1,v2,v5		# align round key

	vxor		v0,v0,v1
	lvx		v1,$idx,$key
	addi		$idx,$idx,16
	mtctr		$rounds

Loop_${dir}c:
	?vperm		v2,v2,v1,v5
	v${n}cipher	v0,v0,v2
	lvx		v2,$idx,$key
	addi		$idx,$idx,16
	?vperm		v1,v1,v2,v5
	v${n}cipher	v0,v0,v1
	lvx		v1,$idx,$key
	addi		$idx,$idx,16
	bdnz		Loop_${dir}c

	?vperm		v2,v2,v1,v5
	v${n}cipher	v0,v0,v2
	lvx		v2,$idx,$key
	?vperm		v1,v1,v2,v5
	v${n}cipherlast	v0,v0,v1

	vspltisb	v2,-1
	vxor		v1,v1,v1
	li		$idx,15			# 15 is not typo
	?vperm		v2,v1,v2,v3		# outmask
	le?vxor		v3,v3,v4
	lvx		v1,0,$out		# outhead
	vperm		v0,v0,v0,v3		# rotate [and byte swap in LE]
	vsel		v1,v1,v0,v2
	lvx		v4,$idx,$out
	stvx		v1,0,$out
	vsel		v0,v0,v4,v2
	stvx		v0,$idx,$out

	mtspr		256,$vrsave
	blr
	.long		0
	.byte		0,12,0x14,0,0,0,3,0
	.long		0
.size	.${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
___
}
&gen_block("en");
&gen_block("de");
}}}
#########################################################################
{{{	# CBC en- and decrypt procedures				#
my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
my ($rndkey0,$rndkey1,$inout,$tmp)=		map("v$_",(0..3));
my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
						map("v$_",(4..10));
$code.=<<___;
.globl	.${prefix}_cbc_encrypt
	${UCMP}i	$len,16
	bltlr-

	cmpwi		$enc,0			# test direction
	lis		r0,0xffe0
	mfspr		$vrsave,256
	mtspr		256,r0

	li		$idx,15
	vxor		$rndkey0,$rndkey0,$rndkey0
	le?vspltisb	$tmp,0x0f

	lvx		$ivec,0,$ivp		# load [unaligned] iv
	lvsl		$inpperm,0,$ivp
	lvx		$inptail,$idx,$ivp
	le?vxor		$inpperm,$inpperm,$tmp
	vperm		$ivec,$ivec,$inptail,$inpperm

	neg		r11,$inp
	?lvsl		$keyperm,0,$key		# prepare for unaligned key
	lwz		$rounds,240($key)

	lvsr		$inpperm,0,r11		# prepare for unaligned load
	lvx		$inptail,0,$inp
	addi		$inp,$inp,15		# 15 is not typo
	le?vxor		$inpperm,$inpperm,$tmp

	?lvsr		$outperm,0,$out		# prepare for unaligned store
	vspltisb	$outmask,-1
	lvx		$outhead,0,$out
	?vperm		$outmask,$rndkey0,$outmask,$outperm
	le?vxor		$outperm,$outperm,$tmp

	srwi		$rounds,$rounds,1
	li		$idx,16
	subi		$rounds,$rounds,1
	beq		Lcbc_dec

Lcbc_enc:
	vmr		$inout,$inptail
	lvx		$inptail,0,$inp
	addi		$inp,$inp,16
	mtctr		$rounds
	subi		$len,$len,16		# len-=16

	lvx		$rndkey0,0,$key
	 vperm		$inout,$inout,$inptail,$inpperm
	lvx		$rndkey1,$idx,$key
	addi		$idx,$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vxor		$inout,$inout,$rndkey0
	lvx		$rndkey0,$idx,$key
	addi		$idx,$idx,16
	vxor		$inout,$inout,$ivec

Loop_cbc_enc:
	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
	vcipher		$inout,$inout,$rndkey1
	lvx		$rndkey1,$idx,$key
	addi		$idx,$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vcipher		$inout,$inout,$rndkey0
	lvx		$rndkey0,$idx,$key
	addi		$idx,$idx,16
	bdnz		Loop_cbc_enc

	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
	vcipher		$inout,$inout,$rndkey1
	lvx		$rndkey1,$idx,$key
	li		$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vcipherlast	$ivec,$inout,$rndkey0
	${UCMP}i	$len,16

	vperm		$tmp,$ivec,$ivec,$outperm
	vsel		$inout,$outhead,$tmp,$outmask
	vmr		$outhead,$tmp
	stvx		$inout,0,$out
	addi		$out,$out,16
	bge		Lcbc_enc

	b		Lcbc_done

.align	4
Lcbc_dec:
	${UCMP}i	$len,128
	bge		_aesp8_cbc_decrypt8x
	vmr		$tmp,$inptail
	lvx		$inptail,0,$inp
	addi		$inp,$inp,16
	mtctr		$rounds
	subi		$len,$len,16		# len-=16

	lvx		$rndkey0,0,$key
	 vperm		$tmp,$tmp,$inptail,$inpperm
	lvx		$rndkey1,$idx,$key
	addi		$idx,$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vxor		$inout,$tmp,$rndkey0
	lvx		$rndkey0,$idx,$key
	addi		$idx,$idx,16

Loop_cbc_dec:
	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
	vncipher	$inout,$inout,$rndkey1
	lvx		$rndkey1,$idx,$key
	addi		$idx,$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vncipher	$inout,$inout,$rndkey0
	lvx		$rndkey0,$idx,$key
	addi		$idx,$idx,16
	bdnz		Loop_cbc_dec

	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
	vncipher	$inout,$inout,$rndkey1
	lvx		$rndkey1,$idx,$key
	li		$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vncipherlast	$inout,$inout,$rndkey0
	${UCMP}i	$len,16

	vxor		$inout,$inout,$ivec
	vmr		$ivec,$tmp
	vperm		$tmp,$inout,$inout,$outperm
	vsel		$inout,$outhead,$tmp,$outmask
	vmr		$outhead,$tmp
	stvx		$inout,0,$out
	addi		$out,$out,16
	bge		Lcbc_dec

Lcbc_done:
	addi		$out,$out,-1
	lvx		$inout,0,$out		# redundant in aligned case
	vsel		$inout,$outhead,$inout,$outmask
	stvx		$inout,0,$out

	neg		$enc,$ivp		# write [unaligned] iv
	li		$idx,15			# 15 is not typo
	vxor		$rndkey0,$rndkey0,$rndkey0
	vspltisb	$outmask,-1
	le?vspltisb	$tmp,0x0f
	?lvsl		$outperm,0,$enc
	?vperm		$outmask,$rndkey0,$outmask,$outperm
	le?vxor		$outperm,$outperm,$tmp
	lvx		$outhead,0,$ivp
	vperm		$ivec,$ivec,$ivec,$outperm
	vsel		$inout,$outhead,$ivec,$outmask
	lvx		$inptail,$idx,$ivp
	stvx		$inout,0,$ivp
	vsel		$inout,$ivec,$inptail,$outmask
	stvx		$inout,$idx,$ivp

	mtspr		256,$vrsave
	blr
	.long		0
	.byte		0,12,0x14,0,0,0,6,0
	.long		0
___
#########################################################################
{{	# Optimized CBC decrypt procedure				#
my $key_="r11";
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
			# v26-v31 last 6 round keys
my ($tmp,$keyperm)=($in3,$in4);	# aliases with "caller", redundant assignment

$code.=<<___;
.align	5
_aesp8_cbc_decrypt8x:
	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
	li		r10,`$FRAME+8*16+15`
	li		r11,`$FRAME+8*16+31`
	stvx		v20,r10,$sp		# ABI says so
	addi		r10,r10,32
	stvx		v21,r11,$sp
	addi		r11,r11,32
	stvx		v22,r10,$sp
	addi		r10,r10,32
	stvx		v23,r11,$sp
	addi		r11,r11,32
	stvx		v24,r10,$sp
	addi		r10,r10,32
	stvx		v25,r11,$sp
	addi		r11,r11,32
	stvx		v26,r10,$sp
	addi		r10,r10,32
	stvx		v27,r11,$sp
	addi		r11,r11,32
	stvx		v28,r10,$sp
	addi		r10,r10,32
	stvx		v29,r11,$sp
	addi		r11,r11,32
	stvx		v30,r10,$sp
	stvx		v31,r11,$sp
	li		r0,-1
	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
	li		$x10,0x10
	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
	li		$x20,0x20
	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
	li		$x30,0x30
	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
	li		$x40,0x40
	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
	li		$x50,0x50
	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
	li		$x60,0x60
	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
	li		$x70,0x70
	mtspr		256,r0

	subi		$rounds,$rounds,3	# -4 in total
	subi		$len,$len,128		# bias

	lvx		$rndkey0,$x00,$key	# load key schedule
	lvx		v30,$x10,$key
	addi		$key,$key,0x20
	lvx		v31,$x00,$key
	?vperm		$rndkey0,$rndkey0,v30,$keyperm
	addi		$key_,$sp,$FRAME+15
	mtctr		$rounds

Load_cbc_dec_key:
	?vperm		v24,v30,v31,$keyperm
	lvx		v30,$x10,$key
	addi		$key,$key,0x20
	stvx		v24,$x00,$key_		# off-load round[1]
	?vperm		v25,v31,v30,$keyperm
	lvx		v31,$x00,$key
	stvx		v25,$x10,$key_		# off-load round[2]
	addi		$key_,$key_,0x20
	bdnz		Load_cbc_dec_key

	lvx		v26,$x10,$key
	?vperm		v24,v30,v31,$keyperm
	lvx		v27,$x20,$key
	stvx		v24,$x00,$key_		# off-load round[3]
	?vperm		v25,v31,v26,$keyperm
	lvx		v28,$x30,$key
	stvx		v25,$x10,$key_		# off-load round[4]
	addi		$key_,$sp,$FRAME+15	# rewind $key_
	?vperm		v26,v26,v27,$keyperm
	lvx		v29,$x40,$key
	?vperm		v27,v27,v28,$keyperm
	lvx		v30,$x50,$key
	?vperm		v28,v28,v29,$keyperm
	lvx		v31,$x60,$key
	?vperm		v29,v29,v30,$keyperm
	lvx		$out0,$x70,$key		# borrow $out0
	?vperm		v30,v30,v31,$keyperm
	lvx		v24,$x00,$key_		# pre-load round[1]
	?vperm		v31,v31,$out0,$keyperm
	lvx		v25,$x10,$key_		# pre-load round[2]

	#lvx		$inptail,0,$inp		# "caller" already did this
	#addi		$inp,$inp,15		# 15 is not typo
	subi		$inp,$inp,15		# undo "caller"

	 le?li		$idx,8
	lvx_u		$in0,$x00,$inp		# load first 8 "words"
	 le?lvsl	$inpperm,0,$idx
	 le?vspltisb	$tmp,0x0f
	lvx_u		$in1,$x10,$inp
	 le?vxor	$inpperm,$inpperm,$tmp	# transform for lvx_u/stvx_u
	lvx_u		$in2,$x20,$inp
	 le?vperm	$in0,$in0,$in0,$inpperm
	lvx_u		$in3,$x30,$inp
	 le?vperm	$in1,$in1,$in1,$inpperm
	lvx_u		$in4,$x40,$inp
	 le?vperm	$in2,$in2,$in2,$inpperm
	vxor		$out0,$in0,$rndkey0
	lvx_u		$in5,$x50,$inp
	 le?vperm	$in3,$in3,$in3,$inpperm
	vxor		$out1,$in1,$rndkey0
	lvx_u		$in6,$x60,$inp
	 le?vperm	$in4,$in4,$in4,$inpperm
	vxor		$out2,$in2,$rndkey0
	lvx_u		$in7,$x70,$inp
	addi		$inp,$inp,0x80
	 le?vperm	$in5,$in5,$in5,$inpperm
	vxor		$out3,$in3,$rndkey0
	 le?vperm	$in6,$in6,$in6,$inpperm
	vxor		$out4,$in4,$rndkey0
	 le?vperm	$in7,$in7,$in7,$inpperm
	vxor		$out5,$in5,$rndkey0
	vxor		$out6,$in6,$rndkey0
	vxor		$out7,$in7,$rndkey0

	mtctr		$rounds
	b		Loop_cbc_dec8x
.align	5
Loop_cbc_dec8x:
	vncipher	$out0,$out0,v24
	vncipher	$out1,$out1,v24
	vncipher	$out2,$out2,v24
	vncipher	$out3,$out3,v24
	vncipher	$out4,$out4,v24
	vncipher	$out5,$out5,v24
	vncipher	$out6,$out6,v24
	vncipher	$out7,$out7,v24
	lvx		v24,$x20,$key_		# round[3]
	addi		$key_,$key_,0x20

	vncipher	$out0,$out0,v25
	vncipher	$out1,$out1,v25
	vncipher	$out2,$out2,v25
	vncipher	$out3,$out3,v25
	vncipher	$out4,$out4,v25
	vncipher	$out5,$out5,v25
	vncipher	$out6,$out6,v25
	vncipher	$out7,$out7,v25
	lvx		v25,$x10,$key_		# round[4]
	bdnz		Loop_cbc_dec8x

	subic		$len,$len,128		# $len-=128
	vncipher	$out0,$out0,v24
	vncipher	$out1,$out1,v24
	vncipher	$out2,$out2,v24
	vncipher	$out3,$out3,v24
	vncipher	$out4,$out4,v24
	vncipher	$out5,$out5,v24
	vncipher	$out6,$out6,v24
	vncipher	$out7,$out7,v24

	subfe.		r0,r0,r0		# borrow?-1:0
	vncipher	$out0,$out0,v25
	vncipher	$out1,$out1,v25
	vncipher	$out2,$out2,v25
	vncipher	$out3,$out3,v25
	vncipher	$out4,$out4,v25
	vncipher	$out5,$out5,v25
	vncipher	$out6,$out6,v25
	vncipher	$out7,$out7,v25

	and		r0,r0,$len
	vncipher	$out0,$out0,v26
	vncipher	$out1,$out1,v26
	vncipher	$out2,$out2,v26
	vncipher	$out3,$out3,v26
	vncipher	$out4,$out4,v26
	vncipher	$out5,$out5,v26
	vncipher	$out6,$out6,v26
	vncipher	$out7,$out7,v26

	add		$inp,$inp,r0		# $inp is adjusted in such
						# way that at exit from the
						# loop inX-in7 are loaded
						# with last "words"
	vncipher	$out0,$out0,v27
	vncipher	$out1,$out1,v27
	vncipher	$out2,$out2,v27
	vncipher	$out3,$out3,v27
	vncipher	$out4,$out4,v27
	vncipher	$out5,$out5,v27
	vncipher	$out6,$out6,v27
	vncipher	$out7,$out7,v27

	addi		$key_,$sp,$FRAME+15	# rewind $key_
	vncipher	$out0,$out0,v28
	vncipher	$out1,$out1,v28
	vncipher	$out2,$out2,v28
	vncipher	$out3,$out3,v28
	vncipher	$out4,$out4,v28
	vncipher	$out5,$out5,v28
	vncipher	$out6,$out6,v28
	vncipher	$out7,$out7,v28
	lvx		v24,$x00,$key_		# re-pre-load round[1]

	vncipher	$out0,$out0,v29
	vncipher	$out1,$out1,v29
	vncipher	$out2,$out2,v29
	vncipher	$out3,$out3,v29
	vncipher	$out4,$out4,v29
	vncipher	$out5,$out5,v29
	vncipher	$out6,$out6,v29
	vncipher	$out7,$out7,v29
	lvx		v25,$x10,$key_		# re-pre-load round[2]

	vncipher	$out0,$out0,v30
	 vxor		$ivec,$ivec,v31		# xor with last round key
	vncipher	$out1,$out1,v30
	 vxor		$in0,$in0,v31
	vncipher	$out2,$out2,v30
	 vxor		$in1,$in1,v31
	vncipher	$out3,$out3,v30
	 vxor		$in2,$in2,v31
	vncipher	$out4,$out4,v30
	 vxor		$in3,$in3,v31
	vncipher	$out5,$out5,v30
	 vxor		$in4,$in4,v31
	vncipher	$out6,$out6,v30
	 vxor		$in5,$in5,v31
	vncipher	$out7,$out7,v30
	 vxor		$in6,$in6,v31

	vncipherlast	$out0,$out0,$ivec
	vncipherlast	$out1,$out1,$in0
	 lvx_u		$in0,$x00,$inp		# load next input block
	vncipherlast	$out2,$out2,$in1
	 lvx_u		$in1,$x10,$inp
	vncipherlast	$out3,$out3,$in2
	 le?vperm	$in0,$in0,$in0,$inpperm
	 lvx_u		$in2,$x20,$inp
	vncipherlast	$out4,$out4,$in3
	 le?vperm	$in1,$in1,$in1,$inpperm
	 lvx_u		$in3,$x30,$inp
	vncipherlast	$out5,$out5,$in4
	 le?vperm	$in2,$in2,$in2,$inpperm
	 lvx_u		$in4,$x40,$inp
	vncipherlast	$out6,$out6,$in5
	 le?vperm	$in3,$in3,$in3,$inpperm
	 lvx_u		$in5,$x50,$inp
	vncipherlast	$out7,$out7,$in6
	 le?vperm	$in4,$in4,$in4,$inpperm
	 lvx_u		$in6,$x60,$inp
	vmr		$ivec,$in7
	 le?vperm	$in5,$in5,$in5,$inpperm
	 lvx_u		$in7,$x70,$inp
	 addi		$inp,$inp,0x80

	le?vperm	$out0,$out0,$out0,$inpperm
	le?vperm	$out1,$out1,$out1,$inpperm
	stvx_u		$out0,$x00,$out
	 le?vperm	$in6,$in6,$in6,$inpperm
	 vxor		$out0,$in0,$rndkey0
	le?vperm	$out2,$out2,$out2,$inpperm
	stvx_u		$out1,$x10,$out
	 le?vperm	$in7,$in7,$in7,$inpperm
	 vxor		$out1,$in1,$rndkey0
	le?vperm	$out3,$out3,$out3,$inpperm
	stvx_u		$out2,$x20,$out
	 vxor		$out2,$in2,$rndkey0
	le?vperm	$out4,$out4,$out4,$inpperm
	stvx_u		$out3,$x30,$out
	 vxor		$out3,$in3,$rndkey0
	le?vperm	$out5,$out5,$out5,$inpperm
	stvx_u		$out4,$x40,$out
	 vxor		$out4,$in4,$rndkey0
	le?vperm	$out6,$out6,$out6,$inpperm
	stvx_u		$out5,$x50,$out
	 vxor		$out5,$in5,$rndkey0
	le?vperm	$out7,$out7,$out7,$inpperm
	stvx_u		$out6,$x60,$out
	 vxor		$out6,$in6,$rndkey0
	stvx_u		$out7,$x70,$out
	addi		$out,$out,0x80
	 vxor		$out7,$in7,$rndkey0

	mtctr		$rounds
	beq		Loop_cbc_dec8x		# did $len-=128 borrow?

	addic.		$len,$len,128
	beq		Lcbc_dec8x_done
	nop
	nop

Loop_cbc_dec8x_tail:				# up to 7 "words" tail...
	vncipher	$out1,$out1,v24
	vncipher	$out2,$out2,v24
	vncipher	$out3,$out3,v24
	vncipher	$out4,$out4,v24
	vncipher	$out5,$out5,v24
	vncipher	$out6,$out6,v24
	vncipher	$out7,$out7,v24
	lvx		v24,$x20,$key_		# round[3]
	addi		$key_,$key_,0x20

	vncipher	$out1,$out1,v25
	vncipher	$out2,$out2,v25
	vncipher	$out3,$out3,v25
	vncipher	$out4,$out4,v25
	vncipher	$out5,$out5,v25
	vncipher	$out6,$out6,v25
	vncipher	$out7,$out7,v25
	lvx		v25,$x10,$key_		# round[4]
	bdnz		Loop_cbc_dec8x_tail

	vncipher	$out1,$out1,v24
	vncipher	$out2,$out2,v24
	vncipher	$out3,$out3,v24
	vncipher	$out4,$out4,v24
	vncipher	$out5,$out5,v24
	vncipher	$out6,$out6,v24
	vncipher	$out7,$out7,v24

	vncipher	$out1,$out1,v25
	vncipher	$out2,$out2,v25
	vncipher	$out3,$out3,v25
	vncipher	$out4,$out4,v25
	vncipher	$out5,$out5,v25
	vncipher	$out6,$out6,v25
	vncipher	$out7,$out7,v25

	vncipher	$out1,$out1,v26
	vncipher	$out2,$out2,v26
	vncipher	$out3,$out3,v26
	vncipher	$out4,$out4,v26
	vncipher	$out5,$out5,v26
	vncipher	$out6,$out6,v26
	vncipher	$out7,$out7,v26

	vncipher	$out1,$out1,v27
	vncipher	$out2,$out2,v27
	vncipher	$out3,$out3,v27
	vncipher	$out4,$out4,v27
	vncipher	$out5,$out5,v27
	vncipher	$out6,$out6,v27
	vncipher	$out7,$out7,v27

	vncipher	$out1,$out1,v28
	vncipher	$out2,$out2,v28
	vncipher	$out3,$out3,v28
	vncipher	$out4,$out4,v28
	vncipher	$out5,$out5,v28
	vncipher	$out6,$out6,v28
	vncipher	$out7,$out7,v28

	vncipher	$out1,$out1,v29
	vncipher	$out2,$out2,v29
	vncipher	$out3,$out3,v29
	vncipher	$out4,$out4,v29
	vncipher	$out5,$out5,v29
	vncipher	$out6,$out6,v29
	vncipher	$out7,$out7,v29

	vncipher	$out1,$out1,v30
	 vxor		$ivec,$ivec,v31		# last round key
	vncipher	$out2,$out2,v30
	 vxor		$in1,$in1,v31
	vncipher	$out3,$out3,v30
	 vxor		$in2,$in2,v31
	vncipher	$out4,$out4,v30
	 vxor		$in3,$in3,v31
	vncipher	$out5,$out5,v30
	 vxor		$in4,$in4,v31
	vncipher	$out6,$out6,v30
	 vxor		$in5,$in5,v31
	vncipher	$out7,$out7,v30
	 vxor		$in6,$in6,v31

	cmplwi		$len,32			# switch($len)
	blt		Lcbc_dec8x_one
	nop
	beq		Lcbc_dec8x_two
	cmplwi		$len,64
	blt		Lcbc_dec8x_three
	nop
	beq		Lcbc_dec8x_four
	cmplwi		$len,96
	blt		Lcbc_dec8x_five
	nop
	beq		Lcbc_dec8x_six

Lcbc_dec8x_seven:
	vncipherlast	$out1,$out1,$ivec
	vncipherlast	$out2,$out2,$in1
	vncipherlast	$out3,$out3,$in2
	vncipherlast	$out4,$out4,$in3
	vncipherlast	$out5,$out5,$in4
	vncipherlast	$out6,$out6,$in5
	vncipherlast	$out7,$out7,$in6
	vmr		$ivec,$in7

	le?vperm	$out1,$out1,$out1,$inpperm
	le?vperm	$out2,$out2,$out2,$inpperm
	stvx_u		$out1,$x00,$out
	le?vperm	$out3,$out3,$out3,$inpperm
	stvx_u		$out2,$x10,$out
	le?vperm	$out4,$out4,$out4,$inpperm
	stvx_u		$out3,$x20,$out
	le?vperm	$out5,$out5,$out5,$inpperm
	stvx_u		$out4,$x30,$out
	le?vperm	$out6,$out6,$out6,$inpperm
	stvx_u		$out5,$x40,$out
	le?vperm	$out7,$out7,$out7,$inpperm
	stvx_u		$out6,$x50,$out
	stvx_u		$out7,$x60,$out
	addi		$out,$out,0x70
	b		Lcbc_dec8x_done

.align	5
Lcbc_dec8x_six:
	vncipherlast	$out2,$out2,$ivec
	vncipherlast	$out3,$out3,$in2
	vncipherlast	$out4,$out4,$in3
	vncipherlast	$out5,$out5,$in4
	vncipherlast	$out6,$out6,$in5
	vncipherlast	$out7,$out7,$in6
	vmr		$ivec,$in7

	le?vperm	$out2,$out2,$out2,$inpperm
	le?vperm	$out3,$out3,$out3,$inpperm
	stvx_u		$out2,$x00,$out
	le?vperm	$out4,$out4,$out4,$inpperm
	stvx_u		$out3,$x10,$out
	le?vperm	$out5,$out5,$out5,$inpperm
	stvx_u		$out4,$x20,$out
	le?vperm	$out6,$out6,$out6,$inpperm
	stvx_u		$out5,$x30,$out
	le?vperm	$out7,$out7,$out7,$inpperm
	stvx_u		$out6,$x40,$out
	stvx_u		$out7,$x50,$out
	addi		$out,$out,0x60
	b		Lcbc_dec8x_done

.align	5
Lcbc_dec8x_five:
	vncipherlast	$out3,$out3,$ivec
	vncipherlast	$out4,$out4,$in3
	vncipherlast	$out5,$out5,$in4
	vncipherlast	$out6,$out6,$in5
	vncipherlast	$out7,$out7,$in6
	vmr		$ivec,$in7

	le?vperm	$out3,$out3,$out3,$inpperm
	le?vperm	$out4,$out4,$out4,$inpperm
	stvx_u		$out3,$x00,$out
	le?vperm	$out5,$out5,$out5,$inpperm
	stvx_u		$out4,$x10,$out
	le?vperm	$out6,$out6,$out6,$inpperm
	stvx_u		$out5,$x20,$out
	le?vperm	$out7,$out7,$out7,$inpperm
	stvx_u		$out6,$x30,$out
	stvx_u		$out7,$x40,$out
	addi		$out,$out,0x50
	b		Lcbc_dec8x_done

.align	5
Lcbc_dec8x_four:
	vncipherlast	$out4,$out4,$ivec
	vncipherlast	$out5,$out5,$in4
	vncipherlast	$out6,$out6,$in5
	vncipherlast	$out7,$out7,$in6
	vmr		$ivec,$in7

	le?vperm	$out4,$out4,$out4,$inpperm
	le?vperm	$out5,$out5,$out5,$inpperm
	stvx_u		$out4,$x00,$out
	le?vperm	$out6,$out6,$out6,$inpperm
	stvx_u		$out5,$x10,$out
	le?vperm	$out7,$out7,$out7,$inpperm
	stvx_u		$out6,$x20,$out
	stvx_u		$out7,$x30,$out
	addi		$out,$out,0x40
	b		Lcbc_dec8x_done

.align	5
Lcbc_dec8x_three:
	vncipherlast	$out5,$out5,$ivec
	vncipherlast	$out6,$out6,$in5
	vncipherlast	$out7,$out7,$in6
	vmr		$ivec,$in7

	le?vperm	$out5,$out5,$out5,$inpperm
	le?vperm	$out6,$out6,$out6,$inpperm
	stvx_u		$out5,$x00,$out
	le?vperm	$out7,$out7,$out7,$inpperm
	stvx_u		$out6,$x10,$out
	stvx_u		$out7,$x20,$out
	addi		$out,$out,0x30
	b		Lcbc_dec8x_done

.align	5
Lcbc_dec8x_two:
	vncipherlast	$out6,$out6,$ivec
	vncipherlast	$out7,$out7,$in6
	vmr		$ivec,$in7

	le?vperm	$out6,$out6,$out6,$inpperm
	le?vperm	$out7,$out7,$out7,$inpperm
	stvx_u		$out6,$x00,$out
	stvx_u		$out7,$x10,$out
	addi		$out,$out,0x20
	b		Lcbc_dec8x_done

.align	5
Lcbc_dec8x_one:
	vncipherlast	$out7,$out7,$ivec
	vmr		$ivec,$in7

	le?vperm	$out7,$out7,$out7,$inpperm
	stvx_u		$out7,0,$out
	addi		$out,$out,0x10

Lcbc_dec8x_done:
	le?vperm	$ivec,$ivec,$ivec,$inpperm
	stvx_u		$ivec,0,$ivp		# write [unaligned] iv

	li		r10,`$FRAME+15`
	li		r11,`$FRAME+31`
	stvx		$inpperm,r10,$sp	# wipe copies of round keys
	addi		r10,r10,32
	stvx		$inpperm,r11,$sp
	addi		r11,r11,32
	stvx		$inpperm,r10,$sp
	addi		r10,r10,32
	stvx		$inpperm,r11,$sp
	addi		r11,r11,32
	stvx		$inpperm,r10,$sp
	addi		r10,r10,32
	stvx		$inpperm,r11,$sp
	addi		r11,r11,32
	stvx		$inpperm,r10,$sp
	addi		r10,r10,32
	stvx		$inpperm,r11,$sp
	addi		r11,r11,32

	mtspr		256,$vrsave
	lvx		v20,r10,$sp		# ABI says so
	addi		r10,r10,32
	lvx		v21,r11,$sp
	addi		r11,r11,32
	lvx		v22,r10,$sp
	addi		r10,r10,32
	lvx		v23,r11,$sp
	addi		r11,r11,32
	lvx		v24,r10,$sp
	addi		r10,r10,32
	lvx		v25,r11,$sp
	addi		r11,r11,32
	lvx		v26,r10,$sp
	addi		r10,r10,32
	lvx		v27,r11,$sp
	addi		r11,r11,32
	lvx		v28,r10,$sp
	addi		r10,r10,32
	lvx		v29,r11,$sp
	addi		r11,r11,32
	lvx		v30,r10,$sp
	lvx		v31,r11,$sp
	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
	blr
	.long		0
	.byte		0,12,0x14,0,0x80,6,6,0
	.long		0
.size	.${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
___
}}	}}}

#########################################################################
{{{	# CTR procedure[s]						#

####################### WARNING: Here be dragons! #######################
#
# This code is written as 'ctr32', based on a 32-bit counter used
# upstream. The kernel does *not* use a 32-bit counter. The kernel uses
# a 128-bit counter.
#
# This leads to subtle changes from the upstream code: the counter
# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
# both the bulk (8 blocks at a time) path, and in the individual block
# path. Be aware of this when doing updates.
#
# See:
# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
# https://github.com/openssl/openssl/pull/8942
#
#########################################################################
my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
my ($rndkey0,$rndkey1,$inout,$tmp)=		map("v$_",(0..3));
my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
						map("v$_",(4..11));
my $dat=$tmp;

$code.=<<___;
.globl	.${prefix}_ctr32_encrypt_blocks
	${UCMP}i	$len,1
	bltlr-

	lis		r0,0xfff0
	mfspr		$vrsave,256
	mtspr		256,r0

	li		$idx,15
	vxor		$rndkey0,$rndkey0,$rndkey0
	le?vspltisb	$tmp,0x0f

	lvx		$ivec,0,$ivp		# load [unaligned] iv
	lvsl		$inpperm,0,$ivp
	lvx		$inptail,$idx,$ivp
	 vspltisb	$one,1
	le?vxor		$inpperm,$inpperm,$tmp
	vperm		$ivec,$ivec,$inptail,$inpperm
	 vsldoi		$one,$rndkey0,$one,1

	neg		r11,$inp
	?lvsl		$keyperm,0,$key		# prepare for unaligned key
	lwz		$rounds,240($key)

	lvsr		$inpperm,0,r11		# prepare for unaligned load
	lvx		$inptail,0,$inp
	addi		$inp,$inp,15		# 15 is not typo
	le?vxor		$inpperm,$inpperm,$tmp

	srwi		$rounds,$rounds,1
	li		$idx,16
	subi		$rounds,$rounds,1

	${UCMP}i	$len,8
	bge		_aesp8_ctr32_encrypt8x

	?lvsr		$outperm,0,$out		# prepare for unaligned store
	vspltisb	$outmask,-1
	lvx		$outhead,0,$out
	?vperm		$outmask,$rndkey0,$outmask,$outperm
	le?vxor		$outperm,$outperm,$tmp

	lvx		$rndkey0,0,$key
	mtctr		$rounds
	lvx		$rndkey1,$idx,$key
	addi		$idx,$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vxor		$inout,$ivec,$rndkey0
	lvx		$rndkey0,$idx,$key
	addi		$idx,$idx,16
	b		Loop_ctr32_enc

.align	5
Loop_ctr32_enc:
	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
	vcipher		$inout,$inout,$rndkey1
	lvx		$rndkey1,$idx,$key
	addi		$idx,$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vcipher		$inout,$inout,$rndkey0
	lvx		$rndkey0,$idx,$key
	addi		$idx,$idx,16
	bdnz		Loop_ctr32_enc

	vadduqm		$ivec,$ivec,$one	# Kernel change for 128-bit
	 vmr		$dat,$inptail
	 lvx		$inptail,0,$inp
	 addi		$inp,$inp,16
	 subic.		$len,$len,1		# blocks--

	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
	vcipher		$inout,$inout,$rndkey1
	lvx		$rndkey1,$idx,$key
	 vperm		$dat,$dat,$inptail,$inpperm
	 li		$idx,16
	?vperm		$rndkey1,$rndkey0,$rndkey1,$keyperm
	 lvx		$rndkey0,0,$key
	vxor		$dat,$dat,$rndkey1	# last round key
	vcipherlast	$inout,$inout,$dat

	 lvx		$rndkey1,$idx,$key
	 addi		$idx,$idx,16
	vperm		$inout,$inout,$inout,$outperm
	vsel		$dat,$outhead,$inout,$outmask
	 mtctr		$rounds
	 ?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vmr		$outhead,$inout
	 vxor		$inout,$ivec,$rndkey0
	 lvx		$rndkey0,$idx,$key
	 addi		$idx,$idx,16
	stvx		$dat,0,$out
	addi		$out,$out,16
	bne		Loop_ctr32_enc

	addi		$out,$out,-1
	lvx		$inout,0,$out		# redundant in aligned case
	vsel		$inout,$outhead,$inout,$outmask
	stvx		$inout,0,$out

	mtspr		256,$vrsave
	blr
	.long		0
	.byte		0,12,0x14,0,0,0,6,0
	.long		0
___
#########################################################################
{{	# Optimized CTR procedure					#
my $key_="r11";
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
			# v26-v31 last 6 round keys
my ($tmp,$keyperm)=($in3,$in4);	# aliases with "caller", redundant assignment
my ($two,$three,$four)=($outhead,$outperm,$outmask);

$code.=<<___;
.align	5
_aesp8_ctr32_encrypt8x:
	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
	li		r10,`$FRAME+8*16+15`
	li		r11,`$FRAME+8*16+31`
	stvx		v20,r10,$sp		# ABI says so
	addi		r10,r10,32
	stvx		v21,r11,$sp
	addi		r11,r11,32
	stvx		v22,r10,$sp
	addi		r10,r10,32
	stvx		v23,r11,$sp
	addi		r11,r11,32
	stvx		v24,r10,$sp
	addi		r10,r10,32
	stvx		v25,r11,$sp
	addi		r11,r11,32
	stvx		v26,r10,$sp
	addi		r10,r10,32
	stvx		v27,r11,$sp
	addi		r11,r11,32
	stvx		v28,r10,$sp
	addi		r10,r10,32
	stvx		v29,r11,$sp
	addi		r11,r11,32
	stvx		v30,r10,$sp
	stvx		v31,r11,$sp
	li		r0,-1
	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
	li		$x10,0x10
	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
	li		$x20,0x20
	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
	li		$x30,0x30
	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
	li		$x40,0x40
	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
	li		$x50,0x50
	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
	li		$x60,0x60
	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
	li		$x70,0x70
	mtspr		256,r0

	subi		$rounds,$rounds,3	# -4 in total

	lvx		$rndkey0,$x00,$key	# load key schedule
	lvx		v30,$x10,$key
	addi		$key,$key,0x20
	lvx		v31,$x00,$key
	?vperm		$rndkey0,$rndkey0,v30,$keyperm
	addi		$key_,$sp,$FRAME+15
	mtctr		$rounds

Load_ctr32_enc_key:
	?vperm		v24,v30,v31,$keyperm
	lvx		v30,$x10,$key
	addi		$key,$key,0x20
	stvx		v24,$x00,$key_		# off-load round[1]
	?vperm		v25,v31,v30,$keyperm
	lvx		v31,$x00,$key
	stvx		v25,$x10,$key_		# off-load round[2]
	addi		$key_,$key_,0x20
	bdnz		Load_ctr32_enc_key

	lvx		v26,$x10,$key
	?vperm		v24,v30,v31,$keyperm
	lvx		v27,$x20,$key
	stvx		v24,$x00,$key_		# off-load round[3]
	?vperm		v25,v31,v26,$keyperm
	lvx		v28,$x30,$key
	stvx		v25,$x10,$key_		# off-load round[4]
	addi		$key_,$sp,$FRAME+15	# rewind $key_
	?vperm		v26,v26,v27,$keyperm
	lvx		v29,$x40,$key
	?vperm		v27,v27,v28,$keyperm
	lvx		v30,$x50,$key
	?vperm		v28,v28,v29,$keyperm
	lvx		v31,$x60,$key
	?vperm		v29,v29,v30,$keyperm
	lvx		$out0,$x70,$key		# borrow $out0
	?vperm		v30,v30,v31,$keyperm
	lvx		v24,$x00,$key_		# pre-load round[1]
	?vperm		v31,v31,$out0,$keyperm
	lvx		v25,$x10,$key_		# pre-load round[2]

	vadduqm		$two,$one,$one
	subi		$inp,$inp,15		# undo "caller"
	$SHL		$len,$len,4

	vadduqm		$out1,$ivec,$one	# counter values ...
	vadduqm		$out2,$ivec,$two	# (do all ctr adds as 128-bit)
	vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
	 le?li		$idx,8
	vadduqm		$out3,$out1,$two
	vxor		$out1,$out1,$rndkey0
	 le?lvsl	$inpperm,0,$idx
	vadduqm		$out4,$out2,$two
	vxor		$out2,$out2,$rndkey0
	 le?vspltisb	$tmp,0x0f
	vadduqm		$out5,$out3,$two
	vxor		$out3,$out3,$rndkey0
	 le?vxor	$inpperm,$inpperm,$tmp	# transform for lvx_u/stvx_u
	vadduqm		$out6,$out4,$two
	vxor		$out4,$out4,$rndkey0
	vadduqm		$out7,$out5,$two
	vxor		$out5,$out5,$rndkey0
	vadduqm		$ivec,$out6,$two	# next counter value
	vxor		$out6,$out6,$rndkey0
	vxor		$out7,$out7,$rndkey0

	mtctr		$rounds
	b		Loop_ctr32_enc8x
.align	5
Loop_ctr32_enc8x:
	vcipher 	$out0,$out0,v24
	vcipher 	$out1,$out1,v24
	vcipher 	$out2,$out2,v24
	vcipher 	$out3,$out3,v24
	vcipher 	$out4,$out4,v24
	vcipher 	$out5,$out5,v24
	vcipher 	$out6,$out6,v24
	vcipher 	$out7,$out7,v24
Loop_ctr32_enc8x_middle:
	lvx		v24,$x20,$key_		# round[3]
	addi		$key_,$key_,0x20

	vcipher 	$out0,$out0,v25
	vcipher 	$out1,$out1,v25
	vcipher 	$out2,$out2,v25
	vcipher 	$out3,$out3,v25
	vcipher 	$out4,$out4,v25
	vcipher 	$out5,$out5,v25
	vcipher 	$out6,$out6,v25
	vcipher 	$out7,$out7,v25
	lvx		v25,$x10,$key_		# round[4]
	bdnz		Loop_ctr32_enc8x

	subic		r11,$len,256		# $len-256, borrow $key_
	vcipher 	$out0,$out0,v24
	vcipher 	$out1,$out1,v24
	vcipher 	$out2,$out2,v24
	vcipher 	$out3,$out3,v24
	vcipher 	$out4,$out4,v24
	vcipher 	$out5,$out5,v24
	vcipher 	$out6,$out6,v24
	vcipher 	$out7,$out7,v24

	subfe		r0,r0,r0		# borrow?-1:0
	vcipher 	$out0,$out0,v25
	vcipher 	$out1,$out1,v25
	vcipher 	$out2,$out2,v25
	vcipher 	$out3,$out3,v25
	vcipher 	$out4,$out4,v25
	vcipher		$out5,$out5,v25
	vcipher		$out6,$out6,v25
	vcipher		$out7,$out7,v25

	and		r0,r0,r11
	addi		$key_,$sp,$FRAME+15	# rewind $key_
	vcipher		$out0,$out0,v26
	vcipher		$out1,$out1,v26
	vcipher		$out2,$out2,v26
	vcipher		$out3,$out3,v26
	vcipher		$out4,$out4,v26
	vcipher		$out5,$out5,v26
	vcipher		$out6,$out6,v26
	vcipher		$out7,$out7,v26
	lvx		v24,$x00,$key_		# re-pre-load round[1]

	subic		$len,$len,129		# $len-=129
	vcipher		$out0,$out0,v27
	addi		$len,$len,1		# $len-=128 really
	vcipher		$out1,$out1,v27
	vcipher		$out2,$out2,v27
	vcipher		$out3,$out3,v27
	vcipher		$out4,$out4,v27
	vcipher		$out5,$out5,v27
	vcipher		$out6,$out6,v27
	vcipher		$out7,$out7,v27
	lvx		v25,$x10,$key_		# re-pre-load round[2]

	vcipher		$out0,$out0,v28
	 lvx_u		$in0,$x00,$inp		# load input
	vcipher		$out1,$out1,v28
	 lvx_u		$in1,$x10,$inp
	vcipher		$out2,$out2,v28
	 lvx_u		$in2,$x20,$inp
	vcipher		$out3,$out3,v28
	 lvx_u		$in3,$x30,$inp
	vcipher		$out4,$out4,v28
	 lvx_u		$in4,$x40,$inp
	vcipher		$out5,$out5,v28
	 lvx_u		$in5,$x50,$inp
	vcipher		$out6,$out6,v28
	 lvx_u		$in6,$x60,$inp
	vcipher		$out7,$out7,v28
	 lvx_u		$in7,$x70,$inp
	 addi		$inp,$inp,0x80

	vcipher		$out0,$out0,v29
	 le?vperm	$in0,$in0,$in0,$inpperm
	vcipher		$out1,$out1,v29
	 le?vperm	$in1,$in1,$in1,$inpperm
	vcipher		$out2,$out2,v29
	 le?vperm	$in2,$in2,$in2,$inpperm
	vcipher		$out3,$out3,v29
	 le?vperm	$in3,$in3,$in3,$inpperm
	vcipher		$out4,$out4,v29
	 le?vperm	$in4,$in4,$in4,$inpperm
	vcipher		$out5,$out5,v29
	 le?vperm	$in5,$in5,$in5,$inpperm
	vcipher		$out6,$out6,v29
	 le?vperm	$in6,$in6,$in6,$inpperm
	vcipher		$out7,$out7,v29
	 le?vperm	$in7,$in7,$in7,$inpperm

	add		$inp,$inp,r0		# $inp is adjusted in such
						# way that at exit from the
						# loop inX-in7 are loaded
						# with last "words"
	subfe.		r0,r0,r0		# borrow?-1:0
	vcipher		$out0,$out0,v30
	 vxor		$in0,$in0,v31		# xor with last round key
	vcipher		$out1,$out1,v30
	 vxor		$in1,$in1,v31
	vcipher		$out2,$out2,v30
	 vxor		$in2,$in2,v31
	vcipher		$out3,$out3,v30
	 vxor		$in3,$in3,v31
	vcipher		$out4,$out4,v30
	 vxor		$in4,$in4,v31
	vcipher		$out5,$out5,v30
	 vxor		$in5,$in5,v31
	vcipher		$out6,$out6,v30
	 vxor		$in6,$in6,v31
	vcipher		$out7,$out7,v30
	 vxor		$in7,$in7,v31

	bne		Lctr32_enc8x_break	# did $len-129 borrow?

	vcipherlast	$in0,$out0,$in0
	vcipherlast	$in1,$out1,$in1
	 vadduqm	$out1,$ivec,$one	# counter values ...
	vcipherlast	$in2,$out2,$in2
	 vadduqm	$out2,$ivec,$two
	 vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
	vcipherlast	$in3,$out3,$in3
	 vadduqm	$out3,$out1,$two
	 vxor		$out1,$out1,$rndkey0
	vcipherlast	$in4,$out4,$in4
	 vadduqm	$out4,$out2,$two
	 vxor		$out2,$out2,$rndkey0
	vcipherlast	$in5,$out5,$in5
	 vadduqm	$out5,$out3,$two
	 vxor		$out3,$out3,$rndkey0
	vcipherlast	$in6,$out6,$in6
	 vadduqm	$out6,$out4,$two
	 vxor		$out4,$out4,$rndkey0
	vcipherlast	$in7,$out7,$in7
	 vadduqm	$out7,$out5,$two
	 vxor		$out5,$out5,$rndkey0
	le?vperm	$in0,$in0,$in0,$inpperm
	 vadduqm	$ivec,$out6,$two	# next counter value
	 vxor		$out6,$out6,$rndkey0
	le?vperm	$in1,$in1,$in1,$inpperm
	 vxor		$out7,$out7,$rndkey0
	mtctr		$rounds

	 vcipher	$out0,$out0,v24
	stvx_u		$in0,$x00,$out
	le?vperm	$in2,$in2,$in2,$inpperm
	 vcipher	$out1,$out1,v24
	stvx_u		$in1,$x10,$out
	le?vperm	$in3,$in3,$in3,$inpperm
	 vcipher	$out2,$out2,v24
	stvx_u		$in2,$x20,$out
	le?vperm	$in4,$in4,$in4,$inpperm
	 vcipher	$out3,$out3,v24
	stvx_u		$in3,$x30,$out
	le?vperm	$in5,$in5,$in5,$inpperm
	 vcipher	$out4,$out4,v24
	stvx_u		$in4,$x40,$out
	le?vperm	$in6,$in6,$in6,$inpperm
	 vcipher	$out5,$out5,v24
	stvx_u		$in5,$x50,$out
	le?vperm	$in7,$in7,$in7,$inpperm
	 vcipher	$out6,$out6,v24
	stvx_u		$in6,$x60,$out
	 vcipher	$out7,$out7,v24
	stvx_u		$in7,$x70,$out
	addi		$out,$out,0x80

	b		Loop_ctr32_enc8x_middle

.align	5
Lctr32_enc8x_break:
	cmpwi		$len,-0x60
	blt		Lctr32_enc8x_one
	nop
	beq		Lctr32_enc8x_two
	cmpwi		$len,-0x40
	blt		Lctr32_enc8x_three
	nop
	beq		Lctr32_enc8x_four
	cmpwi		$len,-0x20
	blt		Lctr32_enc8x_five
	nop
	beq		Lctr32_enc8x_six
	cmpwi		$len,0x00
	blt		Lctr32_enc8x_seven

Lctr32_enc8x_eight:
	vcipherlast	$out0,$out0,$in0
	vcipherlast	$out1,$out1,$in1
	vcipherlast	$out2,$out2,$in2
	vcipherlast	$out3,$out3,$in3
	vcipherlast	$out4,$out4,$in4
	vcipherlast	$out5,$out5,$in5
	vcipherlast	$out6,$out6,$in6
	vcipherlast	$out7,$out7,$in7

	le?vperm	$out0,$out0,$out0,$inpperm
	le?vperm	$out1,$out1,$out1,$inpperm
	stvx_u		$out0,$x00,$out
	le?vperm	$out2,$out2,$out2,$inpperm
	stvx_u		$out1,$x10,$out
	le?vperm	$out3,$out3,$out3,$inpperm
	stvx_u		$out2,$x20,$out
	le?vperm	$out4,$out4,$out4,$inpperm
	stvx_u		$out3,$x30,$out
	le?vperm	$out5,$out5,$out5,$inpperm
	stvx_u		$out4,$x40,$out
	le?vperm	$out6,$out6,$out6,$inpperm
	stvx_u		$out5,$x50,$out
	le?vperm	$out7,$out7,$out7,$inpperm
	stvx_u		$out6,$x60,$out
	stvx_u		$out7,$x70,$out
	addi		$out,$out,0x80
	b		Lctr32_enc8x_done

.align	5
Lctr32_enc8x_seven:
	vcipherlast	$out0,$out0,$in1
	vcipherlast	$out1,$out1,$in2
	vcipherlast	$out2,$out2,$in3
	vcipherlast	$out3,$out3,$in4
	vcipherlast	$out4,$out4,$in5
	vcipherlast	$out5,$out5,$in6
	vcipherlast	$out6,$out6,$in7

	le?vperm	$out0,$out0,$out0,$inpperm
	le?vperm	$out1,$out1,$out1,$inpperm
	stvx_u		$out0,$x00,$out
	le?vperm	$out2,$out2,$out2,$inpperm
	stvx_u		$out1,$x10,$out
	le?vperm	$out3,$out3,$out3,$inpperm
	stvx_u		$out2,$x20,$out
	le?vperm	$out4,$out4,$out4,$inpperm
	stvx_u		$out3,$x30,$out
	le?vperm	$out5,$out5,$out5,$inpperm
	stvx_u		$out4,$x40,$out
	le?vperm	$out6,$out6,$out6,$inpperm
	stvx_u		$out5,$x50,$out
	stvx_u		$out6,$x60,$out
	addi		$out,$out,0x70
	b		Lctr32_enc8x_done

.align	5
Lctr32_enc8x_six:
	vcipherlast	$out0,$out0,$in2
	vcipherlast	$out1,$out1,$in3
	vcipherlast	$out2,$out2,$in4
	vcipherlast	$out3,$out3,$in5
	vcipherlast	$out4,$out4,$in6
	vcipherlast	$out5,$out5,$in7

	le?vperm	$out0,$out0,$out0,$inpperm
	le?vperm	$out1,$out1,$out1,$inpperm
	stvx_u		$out0,$x00,$out
	le?vperm	$out2,$out2,$out2,$inpperm
	stvx_u		$out1,$x10,$out
	le?vperm	$out3,$out3,$out3,$inpperm
	stvx_u		$out2,$x20,$out
	le?vperm	$out4,$out4,$out4,$inpperm
	stvx_u		$out3,$x30,$out
	le?vperm	$out5,$out5,$out5,$inpperm
	stvx_u		$out4,$x40,$out
	stvx_u		$out5,$x50,$out
	addi		$out,$out,0x60
	b		Lctr32_enc8x_done

.align	5
Lctr32_enc8x_five:
	vcipherlast	$out0,$out0,$in3
	vcipherlast	$out1,$out1,$in4
	vcipherlast	$out2,$out2,$in5
	vcipherlast	$out3,$out3,$in6
	vcipherlast	$out4,$out4,$in7

	le?vperm	$out0,$out0,$out0,$inpperm
	le?vperm	$out1,$out1,$out1,$inpperm
	stvx_u		$out0,$x00,$out
	le?vperm	$out2,$out2,$out2,$inpperm
	stvx_u		$out1,$x10,$out
	le?vperm	$out3,$out3,$out3,$inpperm
	stvx_u		$out2,$x20,$out
	le?vperm	$out4,$out4,$out4,$inpperm
	stvx_u		$out3,$x30,$out
	stvx_u		$out4,$x40,$out
	addi		$out,$out,0x50
	b		Lctr32_enc8x_done

.align	5
Lctr32_enc8x_four:
	vcipherlast	$out0,$out0,$in4
	vcipherlast	$out1,$out1,$in5
	vcipherlast	$out2,$out2,$in6
	vcipherlast	$out3,$out3,$in7

	le?vperm	$out0,$out0,$out0,$inpperm
	le?vperm	$out1,$out1,$out1,$inpperm
	stvx_u		$out0,$x00,$out
	le?vperm	$out2,$out2,$out2,$inpperm
	stvx_u		$out1,$x10,$out
	le?vperm	$out3,$out3,$out3,$inpperm
	stvx_u		$out2,$x20,$out
	stvx_u		$out3,$x30,$out
	addi		$out,$out,0x40
	b		Lctr32_enc8x_done

.align	5
Lctr32_enc8x_three:
	vcipherlast	$out0,$out0,$in5
	vcipherlast	$out1,$out1,$in6
	vcipherlast	$out2,$out2,$in7

	le?vperm	$out0,$out0,$out0,$inpperm
	le?vperm	$out1,$out1,$out1,$inpperm
	stvx_u		$out0,$x00,$out
	le?vperm	$out2,$out2,$out2,$inpperm
	stvx_u		$out1,$x10,$out
	stvx_u		$out2,$x20,$out
	addi		$out,$out,0x30
	b		Lctr32_enc8x_done

.align	5
Lctr32_enc8x_two:
	vcipherlast	$out0,$out0,$in6
	vcipherlast	$out1,$out1,$in7

	le?vperm	$out0,$out0,$out0,$inpperm
	le?vperm	$out1,$out1,$out1,$inpperm
	stvx_u		$out0,$x00,$out
	stvx_u		$out1,$x10,$out
	addi		$out,$out,0x20
	b		Lctr32_enc8x_done

.align	5
Lctr32_enc8x_one:
	vcipherlast	$out0,$out0,$in7

	le?vperm	$out0,$out0,$out0,$inpperm
	stvx_u		$out0,0,$out
	addi		$out,$out,0x10

Lctr32_enc8x_done:
	li		r10,`$FRAME+15`
	li		r11,`$FRAME+31`
	stvx		$inpperm,r10,$sp	# wipe copies of round keys
	addi		r10,r10,32
	stvx		$inpperm,r11,$sp
	addi		r11,r11,32
	stvx		$inpperm,r10,$sp
	addi		r10,r10,32
	stvx		$inpperm,r11,$sp
	addi		r11,r11,32
	stvx		$inpperm,r10,$sp
	addi		r10,r10,32
	stvx		$inpperm,r11,$sp
	addi		r11,r11,32
	stvx		$inpperm,r10,$sp
	addi		r10,r10,32
	stvx		$inpperm,r11,$sp
	addi		r11,r11,32

	mtspr		256,$vrsave
	lvx		v20,r10,$sp		# ABI says so
	addi		r10,r10,32
	lvx		v21,r11,$sp
	addi		r11,r11,32
	lvx		v22,r10,$sp
	addi		r10,r10,32
	lvx		v23,r11,$sp
	addi		r11,r11,32
	lvx		v24,r10,$sp
	addi		r10,r10,32
	lvx		v25,r11,$sp
	addi		r11,r11,32
	lvx		v26,r10,$sp
	addi		r10,r10,32
	lvx		v27,r11,$sp
	addi		r11,r11,32
	lvx		v28,r10,$sp
	addi		r10,r10,32
	lvx		v29,r11,$sp
	addi		r11,r11,32
	lvx		v30,r10,$sp
	lvx		v31,r11,$sp
	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
	blr
	.long		0
	.byte		0,12,0x14,0,0x80,6,6,0
	.long		0
.size	.${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
___
}}	}}}

#########################################################################
{{{	# XTS procedures						#
# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len,	#
#                             const AES_KEY *key1, const AES_KEY *key2,	#
#                             [const] unsigned char iv[16]);		#
# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which	#
# input tweak value is assumed to be encrypted already, and last tweak	#
# value, one suitable for consecutive call on same chunk of data, is	#
# written back to original buffer. In addition, in "tweak chaining"	#
# mode only complete input blocks are processed.			#

my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =	map("r$_",(3..10));
my ($rndkey0,$rndkey1,$inout) =				map("v$_",(0..2));
my ($output,$inptail,$inpperm,$leperm,$keyperm) =	map("v$_",(3..7));
my ($tweak,$seven,$eighty7,$tmp,$tweak1) =		map("v$_",(8..12));
my $taillen = $key2;

   ($inp,$idx) = ($idx,$inp);				# reassign

$code.=<<___;
.globl	.${prefix}_xts_encrypt
	mr		$inp,r3				# reassign
	li		r3,-1
	${UCMP}i	$len,16
	bltlr-

	lis		r0,0xfff0
	mfspr		r12,256				# save vrsave
	li		r11,0
	mtspr		256,r0

	vspltisb	$seven,0x07			# 0x070707..07
	le?lvsl		$leperm,r11,r11
	le?vspltisb	$tmp,0x0f
	le?vxor		$leperm,$leperm,$seven

	li		$idx,15
	lvx		$tweak,0,$ivp			# load [unaligned] iv
	lvsl		$inpperm,0,$ivp
	lvx		$inptail,$idx,$ivp
	le?vxor		$inpperm,$inpperm,$tmp
	vperm		$tweak,$tweak,$inptail,$inpperm

	neg		r11,$inp
	lvsr		$inpperm,0,r11			# prepare for unaligned load
	lvx		$inout,0,$inp
	addi		$inp,$inp,15			# 15 is not typo
	le?vxor		$inpperm,$inpperm,$tmp

	${UCMP}i	$key2,0				# key2==NULL?
	beq		Lxts_enc_no_key2

	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
	lwz		$rounds,240($key2)
	srwi		$rounds,$rounds,1
	subi		$rounds,$rounds,1
	li		$idx,16

	lvx		$rndkey0,0,$key2
	lvx		$rndkey1,$idx,$key2
	addi		$idx,$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vxor		$tweak,$tweak,$rndkey0
	lvx		$rndkey0,$idx,$key2
	addi		$idx,$idx,16
	mtctr		$rounds

Ltweak_xts_enc:
	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
	vcipher		$tweak,$tweak,$rndkey1
	lvx		$rndkey1,$idx,$key2
	addi		$idx,$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vcipher		$tweak,$tweak,$rndkey0
	lvx		$rndkey0,$idx,$key2
	addi		$idx,$idx,16
	bdnz		Ltweak_xts_enc

	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
	vcipher		$tweak,$tweak,$rndkey1
	lvx		$rndkey1,$idx,$key2
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vcipherlast	$tweak,$tweak,$rndkey0

	li		$ivp,0				# don't chain the tweak
	b		Lxts_enc

Lxts_enc_no_key2:
	li		$idx,-16
	and		$len,$len,$idx			# in "tweak chaining"
							# mode only complete
							# blocks are processed
Lxts_enc:
	lvx		$inptail,0,$inp
	addi		$inp,$inp,16

	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
	lwz		$rounds,240($key1)
	srwi		$rounds,$rounds,1
	subi		$rounds,$rounds,1
	li		$idx,16

	vslb		$eighty7,$seven,$seven		# 0x808080..80
	vor		$eighty7,$eighty7,$seven	# 0x878787..87
	vspltisb	$tmp,1				# 0x010101..01
	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01

	${UCMP}i	$len,96
	bge		_aesp8_xts_encrypt6x

	andi.		$taillen,$len,15
	subic		r0,$len,32
	subi		$taillen,$taillen,16
	subfe		r0,r0,r0
	and		r0,r0,$taillen
	add		$inp,$inp,r0

	lvx		$rndkey0,0,$key1
	lvx		$rndkey1,$idx,$key1
	addi		$idx,$idx,16
	vperm		$inout,$inout,$inptail,$inpperm
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vxor		$inout,$inout,$tweak
	vxor		$inout,$inout,$rndkey0
	lvx		$rndkey0,$idx,$key1
	addi		$idx,$idx,16
	mtctr		$rounds
	b		Loop_xts_enc

.align	5
Loop_xts_enc:
	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
	vcipher		$inout,$inout,$rndkey1
	lvx		$rndkey1,$idx,$key1
	addi		$idx,$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vcipher		$inout,$inout,$rndkey0
	lvx		$rndkey0,$idx,$key1
	addi		$idx,$idx,16
	bdnz		Loop_xts_enc

	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
	vcipher		$inout,$inout,$rndkey1
	lvx		$rndkey1,$idx,$key1
	li		$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vxor		$rndkey0,$rndkey0,$tweak
	vcipherlast	$output,$inout,$rndkey0

	le?vperm	$tmp,$output,$output,$leperm
	be?nop
	le?stvx_u	$tmp,0,$out
	be?stvx_u	$output,0,$out
	addi		$out,$out,16

	subic.		$len,$len,16
	beq		Lxts_enc_done

	vmr		$inout,$inptail
	lvx		$inptail,0,$inp
	addi		$inp,$inp,16
	lvx		$rndkey0,0,$key1
	lvx		$rndkey1,$idx,$key1
	addi		$idx,$idx,16

	subic		r0,$len,32
	subfe		r0,r0,r0
	and		r0,r0,$taillen
	add		$inp,$inp,r0

	vsrab		$tmp,$tweak,$seven		# next tweak value
	vaddubm		$tweak,$tweak,$tweak
	vsldoi		$tmp,$tmp,$tmp,15
	vand		$tmp,$tmp,$eighty7
	vxor		$tweak,$tweak,$tmp

	vperm		$inout,$inout,$inptail,$inpperm
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vxor		$inout,$inout,$tweak
	vxor		$output,$output,$rndkey0	# just in case $len<16
	vxor		$inout,$inout,$rndkey0
	lvx		$rndkey0,$idx,$key1
	addi		$idx,$idx,16

	mtctr		$rounds
	${UCMP}i	$len,16
	bge		Loop_xts_enc

	vxor		$output,$output,$tweak
	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
	vspltisb	$tmp,-1
	vperm		$inptail,$inptail,$tmp,$inpperm
	vsel		$inout,$inout,$output,$inptail

	subi		r11,$out,17
	subi		$out,$out,16
	mtctr		$len
	li		$len,16
Loop_xts_enc_steal:
	lbzu		r0,1(r11)
	stb		r0,16(r11)
	bdnz		Loop_xts_enc_steal

	mtctr		$rounds
	b		Loop_xts_enc			# one more time...

Lxts_enc_done:
	${UCMP}i	$ivp,0
	beq		Lxts_enc_ret

	vsrab		$tmp,$tweak,$seven		# next tweak value
	vaddubm		$tweak,$tweak,$tweak
	vsldoi		$tmp,$tmp,$tmp,15
	vand		$tmp,$tmp,$eighty7
	vxor		$tweak,$tweak,$tmp

	le?vperm	$tweak,$tweak,$tweak,$leperm
	stvx_u		$tweak,0,$ivp

Lxts_enc_ret:
	mtspr		256,r12				# restore vrsave
	li		r3,0
	blr
	.long		0
	.byte		0,12,0x04,0,0x80,6,6,0
	.long		0
.size	.${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt

.globl	.${prefix}_xts_decrypt
	mr		$inp,r3				# reassign
	li		r3,-1
	${UCMP}i	$len,16
	bltlr-

	lis		r0,0xfff8
	mfspr		r12,256				# save vrsave
	li		r11,0
	mtspr		256,r0

	andi.		r0,$len,15
	neg		r0,r0
	andi.		r0,r0,16
	sub		$len,$len,r0

	vspltisb	$seven,0x07			# 0x070707..07
	le?lvsl		$leperm,r11,r11
	le?vspltisb	$tmp,0x0f
	le?vxor		$leperm,$leperm,$seven

	li		$idx,15
	lvx		$tweak,0,$ivp			# load [unaligned] iv
	lvsl		$inpperm,0,$ivp
	lvx		$inptail,$idx,$ivp
	le?vxor		$inpperm,$inpperm,$tmp
	vperm		$tweak,$tweak,$inptail,$inpperm

	neg		r11,$inp
	lvsr		$inpperm,0,r11			# prepare for unaligned load
	lvx		$inout,0,$inp
	addi		$inp,$inp,15			# 15 is not typo
	le?vxor		$inpperm,$inpperm,$tmp

	${UCMP}i	$key2,0				# key2==NULL?
	beq		Lxts_dec_no_key2

	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
	lwz		$rounds,240($key2)
	srwi		$rounds,$rounds,1
	subi		$rounds,$rounds,1
	li		$idx,16

	lvx		$rndkey0,0,$key2
	lvx		$rndkey1,$idx,$key2
	addi		$idx,$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vxor		$tweak,$tweak,$rndkey0
	lvx		$rndkey0,$idx,$key2
	addi		$idx,$idx,16
	mtctr		$rounds

Ltweak_xts_dec:
	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
	vcipher		$tweak,$tweak,$rndkey1
	lvx		$rndkey1,$idx,$key2
	addi		$idx,$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vcipher		$tweak,$tweak,$rndkey0
	lvx		$rndkey0,$idx,$key2
	addi		$idx,$idx,16
	bdnz		Ltweak_xts_dec

	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
	vcipher		$tweak,$tweak,$rndkey1
	lvx		$rndkey1,$idx,$key2
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vcipherlast	$tweak,$tweak,$rndkey0

	li		$ivp,0				# don't chain the tweak
	b		Lxts_dec

Lxts_dec_no_key2:
	neg		$idx,$len
	andi.		$idx,$idx,15
	add		$len,$len,$idx			# in "tweak chaining"
							# mode only complete
							# blocks are processed
Lxts_dec:
	lvx		$inptail,0,$inp
	addi		$inp,$inp,16

	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
	lwz		$rounds,240($key1)
	srwi		$rounds,$rounds,1
	subi		$rounds,$rounds,1
	li		$idx,16

	vslb		$eighty7,$seven,$seven		# 0x808080..80
	vor		$eighty7,$eighty7,$seven	# 0x878787..87
	vspltisb	$tmp,1				# 0x010101..01
	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01

	${UCMP}i	$len,96
	bge		_aesp8_xts_decrypt6x

	lvx		$rndkey0,0,$key1
	lvx		$rndkey1,$idx,$key1
	addi		$idx,$idx,16
	vperm		$inout,$inout,$inptail,$inpperm
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vxor		$inout,$inout,$tweak
	vxor		$inout,$inout,$rndkey0
	lvx		$rndkey0,$idx,$key1
	addi		$idx,$idx,16
	mtctr		$rounds

	${UCMP}i	$len,16
	blt		Ltail_xts_dec
	be?b		Loop_xts_dec

.align	5
Loop_xts_dec:
	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
	vncipher	$inout,$inout,$rndkey1
	lvx		$rndkey1,$idx,$key1
	addi		$idx,$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vncipher	$inout,$inout,$rndkey0
	lvx		$rndkey0,$idx,$key1
	addi		$idx,$idx,16
	bdnz		Loop_xts_dec

	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
	vncipher	$inout,$inout,$rndkey1
	lvx		$rndkey1,$idx,$key1
	li		$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vxor		$rndkey0,$rndkey0,$tweak
	vncipherlast	$output,$inout,$rndkey0

	le?vperm	$tmp,$output,$output,$leperm
	be?nop
	le?stvx_u	$tmp,0,$out
	be?stvx_u	$output,0,$out
	addi		$out,$out,16

	subic.		$len,$len,16
	beq		Lxts_dec_done

	vmr		$inout,$inptail
	lvx		$inptail,0,$inp
	addi		$inp,$inp,16
	lvx		$rndkey0,0,$key1
	lvx		$rndkey1,$idx,$key1
	addi		$idx,$idx,16

	vsrab		$tmp,$tweak,$seven		# next tweak value
	vaddubm		$tweak,$tweak,$tweak
	vsldoi		$tmp,$tmp,$tmp,15
	vand		$tmp,$tmp,$eighty7
	vxor		$tweak,$tweak,$tmp

	vperm		$inout,$inout,$inptail,$inpperm
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vxor		$inout,$inout,$tweak
	vxor		$inout,$inout,$rndkey0
	lvx		$rndkey0,$idx,$key1
	addi		$idx,$idx,16

	mtctr		$rounds
	${UCMP}i	$len,16
	bge		Loop_xts_dec

Ltail_xts_dec:
	vsrab		$tmp,$tweak,$seven		# next tweak value
	vaddubm		$tweak1,$tweak,$tweak
	vsldoi		$tmp,$tmp,$tmp,15
	vand		$tmp,$tmp,$eighty7
	vxor		$tweak1,$tweak1,$tmp

	subi		$inp,$inp,16
	add		$inp,$inp,$len

	vxor		$inout,$inout,$tweak		# :-(
	vxor		$inout,$inout,$tweak1		# :-)

Loop_xts_dec_short:
	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
	vncipher	$inout,$inout,$rndkey1
	lvx		$rndkey1,$idx,$key1
	addi		$idx,$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vncipher	$inout,$inout,$rndkey0
	lvx		$rndkey0,$idx,$key1
	addi		$idx,$idx,16
	bdnz		Loop_xts_dec_short

	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
	vncipher	$inout,$inout,$rndkey1
	lvx		$rndkey1,$idx,$key1
	li		$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vxor		$rndkey0,$rndkey0,$tweak1
	vncipherlast	$output,$inout,$rndkey0

	le?vperm	$tmp,$output,$output,$leperm
	be?nop
	le?stvx_u	$tmp,0,$out
	be?stvx_u	$output,0,$out

	vmr		$inout,$inptail
	lvx		$inptail,0,$inp
	#addi		$inp,$inp,16
	lvx		$rndkey0,0,$key1
	lvx		$rndkey1,$idx,$key1
	addi		$idx,$idx,16
	vperm		$inout,$inout,$inptail,$inpperm
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm

	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
	vspltisb	$tmp,-1
	vperm		$inptail,$inptail,$tmp,$inpperm
	vsel		$inout,$inout,$output,$inptail

	vxor		$rndkey0,$rndkey0,$tweak
	vxor		$inout,$inout,$rndkey0
	lvx		$rndkey0,$idx,$key1
	addi		$idx,$idx,16

	subi		r11,$out,1
	mtctr		$len
	li		$len,16
Loop_xts_dec_steal:
	lbzu		r0,1(r11)
	stb		r0,16(r11)
	bdnz		Loop_xts_dec_steal

	mtctr		$rounds
	b		Loop_xts_dec			# one more time...

Lxts_dec_done:
	${UCMP}i	$ivp,0
	beq		Lxts_dec_ret

	vsrab		$tmp,$tweak,$seven		# next tweak value
	vaddubm		$tweak,$tweak,$tweak
	vsldoi		$tmp,$tmp,$tmp,15
	vand		$tmp,$tmp,$eighty7
	vxor		$tweak,$tweak,$tmp

	le?vperm	$tweak,$tweak,$tweak,$leperm
	stvx_u		$tweak,0,$ivp

Lxts_dec_ret:
	mtspr		256,r12				# restore vrsave
	li		r3,0
	blr
	.long		0
	.byte		0,12,0x04,0,0x80,6,6,0
	.long		0
.size	.${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
___
#########################################################################
{{	# Optimized XTS procedures					#
my $key_=$key2;
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
    $x00=0 if ($flavour =~ /osx/);
my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
			# v26-v31 last 6 round keys
my ($keyperm)=($out0);	# aliases with "caller", redundant assignment
my $taillen=$x70;

$code.=<<___;
.align	5
_aesp8_xts_encrypt6x:
	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
	mflr		r11
	li		r7,`$FRAME+8*16+15`
	li		r3,`$FRAME+8*16+31`
	$PUSH		r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
	stvx		v20,r7,$sp		# ABI says so
	addi		r7,r7,32
	stvx		v21,r3,$sp
	addi		r3,r3,32
	stvx		v22,r7,$sp
	addi		r7,r7,32
	stvx		v23,r3,$sp
	addi		r3,r3,32
	stvx		v24,r7,$sp
	addi		r7,r7,32
	stvx		v25,r3,$sp
	addi		r3,r3,32
	stvx		v26,r7,$sp
	addi		r7,r7,32
	stvx		v27,r3,$sp
	addi		r3,r3,32
	stvx		v28,r7,$sp
	addi		r7,r7,32
	stvx		v29,r3,$sp
	addi		r3,r3,32
	stvx		v30,r7,$sp
	stvx		v31,r3,$sp
	li		r0,-1
	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
	li		$x10,0x10
	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
	li		$x20,0x20
	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
	li		$x30,0x30
	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
	li		$x40,0x40
	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
	li		$x50,0x50
	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
	li		$x60,0x60
	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
	li		$x70,0x70
	mtspr		256,r0

	subi		$rounds,$rounds,3	# -4 in total

	lvx		$rndkey0,$x00,$key1	# load key schedule
	lvx		v30,$x10,$key1
	addi		$key1,$key1,0x20
	lvx		v31,$x00,$key1
	?vperm		$rndkey0,$rndkey0,v30,$keyperm
	addi		$key_,$sp,$FRAME+15
	mtctr		$rounds

Load_xts_enc_key:
	?vperm		v24,v30,v31,$keyperm
	lvx		v30,$x10,$key1
	addi		$key1,$key1,0x20
	stvx		v24,$x00,$key_		# off-load round[1]
	?vperm		v25,v31,v30,$keyperm
	lvx		v31,$x00,$key1
	stvx		v25,$x10,$key_		# off-load round[2]
	addi		$key_,$key_,0x20
	bdnz		Load_xts_enc_key

	lvx		v26,$x10,$key1
	?vperm		v24,v30,v31,$keyperm
	lvx		v27,$x20,$key1
	stvx		v24,$x00,$key_		# off-load round[3]
	?vperm		v25,v31,v26,$keyperm
	lvx		v28,$x30,$key1
	stvx		v25,$x10,$key_		# off-load round[4]
	addi		$key_,$sp,$FRAME+15	# rewind $key_
	?vperm		v26,v26,v27,$keyperm
	lvx		v29,$x40,$key1
	?vperm		v27,v27,v28,$keyperm
	lvx		v30,$x50,$key1
	?vperm		v28,v28,v29,$keyperm
	lvx		v31,$x60,$key1
	?vperm		v29,v29,v30,$keyperm
	lvx		$twk5,$x70,$key1	# borrow $twk5
	?vperm		v30,v30,v31,$keyperm
	lvx		v24,$x00,$key_		# pre-load round[1]
	?vperm		v31,v31,$twk5,$keyperm
	lvx		v25,$x10,$key_		# pre-load round[2]

	 vperm		$in0,$inout,$inptail,$inpperm
	 subi		$inp,$inp,31		# undo "caller"
	vxor		$twk0,$tweak,$rndkey0
	vsrab		$tmp,$tweak,$seven	# next tweak value
	vaddubm		$tweak,$tweak,$tweak
	vsldoi		$tmp,$tmp,$tmp,15
	vand		$tmp,$tmp,$eighty7
	 vxor		$out0,$in0,$twk0
	vxor		$tweak,$tweak,$tmp

	 lvx_u		$in1,$x10,$inp
	vxor		$twk1,$tweak,$rndkey0
	vsrab		$tmp,$tweak,$seven	# next tweak value
	vaddubm		$tweak,$tweak,$tweak
	vsldoi		$tmp,$tmp,$tmp,15
	 le?vperm	$in1,$in1,$in1,$leperm
	vand		$tmp,$tmp,$eighty7
	 vxor		$out1,$in1,$twk1
	vxor		$tweak,$tweak,$tmp

	 lvx_u		$in2,$x20,$inp
	 andi.		$taillen,$len,15
	vxor		$twk2,$tweak,$rndkey0
	vsrab		$tmp,$tweak,$seven	# next tweak value
	vaddubm		$tweak,$tweak,$tweak
	vsldoi		$tmp,$tmp,$tmp,15
	 le?vperm	$in2,$in2,$in2,$leperm
	vand		$tmp,$tmp,$eighty7
	 vxor		$out2,$in2,$twk2
	vxor		$tweak,$tweak,$tmp

	 lvx_u		$in3,$x30,$inp
	 sub		$len,$len,$taillen
	vxor		$twk3,$tweak,$rndkey0
	vsrab		$tmp,$tweak,$seven	# next tweak value
	vaddubm		$tweak,$tweak,$tweak
	vsldoi		$tmp,$tmp,$tmp,15
	 le?vperm	$in3,$in3,$in3,$leperm
	vand		$tmp,$tmp,$eighty7
	 vxor		$out3,$in3,$twk3
	vxor		$tweak,$tweak,$tmp

	 lvx_u		$in4,$x40,$inp
	 subi		$len,$len,0x60
	vxor		$twk4,$tweak,$rndkey0
	vsrab		$tmp,$tweak,$seven	# next tweak value
	vaddubm		$tweak,$tweak,$tweak
	vsldoi		$tmp,$tmp,$tmp,15
	 le?vperm	$in4,$in4,$in4,$leperm
	vand		$tmp,$tmp,$eighty7
	 vxor		$out4,$in4,$twk4
	vxor		$tweak,$tweak,$tmp

	 lvx_u		$in5,$x50,$inp
	 addi		$inp,$inp,0x60
	vxor		$twk5,$tweak,$rndkey0
	vsrab		$tmp,$tweak,$seven	# next tweak value
	vaddubm		$tweak,$tweak,$tweak
	vsldoi		$tmp,$tmp,$tmp,15
	 le?vperm	$in5,$in5,$in5,$leperm
	vand		$tmp,$tmp,$eighty7
	 vxor		$out5,$in5,$twk5
	vxor		$tweak,$tweak,$tmp

	vxor		v31,v31,$rndkey0
	mtctr		$rounds
	b		Loop_xts_enc6x

.align	5
Loop_xts_enc6x:
	vcipher		$out0,$out0,v24
	vcipher		$out1,$out1,v24
	vcipher		$out2,$out2,v24
	vcipher		$out3,$out3,v24
	vcipher		$out4,$out4,v24
	vcipher		$out5,$out5,v24
	lvx		v24,$x20,$key_		# round[3]
	addi		$key_,$key_,0x20

	vcipher		$out0,$out0,v25
	vcipher		$out1,$out1,v25
	vcipher		$out2,$out2,v25
	vcipher		$out3,$out3,v25
	vcipher		$out4,$out4,v25
	vcipher		$out5,$out5,v25
	lvx		v25,$x10,$key_		# round[4]
	bdnz		Loop_xts_enc6x

	subic		$len,$len,96		# $len-=96
	 vxor		$in0,$twk0,v31		# xor with last round key
	vcipher		$out0,$out0,v24
	vcipher		$out1,$out1,v24
	 vsrab		$tmp,$tweak,$seven	# next tweak value
	 vxor		$twk0,$tweak,$rndkey0
	 vaddubm	$tweak,$tweak,$tweak
	vcipher		$out2,$out2,v24
	vcipher		$out3,$out3,v24
	 vsldoi		$tmp,$tmp,$tmp,15
	vcipher		$out4,$out4,v24
	vcipher		$out5,$out5,v24

	subfe.		r0,r0,r0		# borrow?-1:0
	 vand		$tmp,$tmp,$eighty7
	vcipher		$out0,$out0,v25
	vcipher		$out1,$out1,v25
	 vxor		$tweak,$tweak,$tmp
	vcipher		$out2,$out2,v25
	vcipher		$out3,$out3,v25
	 vxor		$in1,$twk1,v31
	 vsrab		$tmp,$tweak,$seven	# next tweak value
	 vxor		$twk1,$tweak,$rndkey0
	vcipher		$out4,$out4,v25
	vcipher		$out5,$out5,v25

	and		r0,r0,$len
	 vaddubm	$tweak,$tweak,$tweak
	 vsldoi		$tmp,$tmp,$tmp,15
	vcipher		$out0,$out0,v26
	vcipher		$out1,$out1,v26
	 vand		$tmp,$tmp,$eighty7
	vcipher		$out2,$out2,v26
	vcipher		$out3,$out3,v26
	 vxor		$tweak,$tweak,$tmp
	vcipher		$out4,$out4,v26
	vcipher		$out5,$out5,v26

	add		$inp,$inp,r0		# $inp is adjusted in such
						# way that at exit from the
						# loop inX-in5 are loaded
						# with last "words"
	 vxor		$in2,$twk2,v31
	 vsrab		$tmp,$tweak,$seven	# next tweak value
	 vxor		$twk2,$tweak,$rndkey0
	 vaddubm	$tweak,$tweak,$tweak
	vcipher		$out0,$out0,v27
	vcipher		$out1,$out1,v27
	 vsldoi		$tmp,$tmp,$tmp,15
	vcipher		$out2,$out2,v27
	vcipher		$out3,$out3,v27
	 vand		$tmp,$tmp,$eighty7
	vcipher		$out4,$out4,v27
	vcipher		$out5,$out5,v27

	addi		$key_,$sp,$FRAME+15	# rewind $key_
	 vxor		$tweak,$tweak,$tmp
	vcipher		$out0,$out0,v28
	vcipher		$out1,$out1,v28
	 vxor		$in3,$twk3,v31
	 vsrab		$tmp,$tweak,$seven	# next tweak value
	 vxor		$twk3,$tweak,$rndkey0
	vcipher		$out2,$out2,v28
	vcipher		$out3,$out3,v28
	 vaddubm	$tweak,$tweak,$tweak
	 vsldoi		$tmp,$tmp,$tmp,15
	vcipher		$out4,$out4,v28
	vcipher		$out5,$out5,v28
	lvx		v24,$x00,$key_		# re-pre-load round[1]
	 vand		$tmp,$tmp,$eighty7

	vcipher		$out0,$out0,v29
	vcipher		$out1,$out1,v29
	 vxor		$tweak,$tweak,$tmp
	vcipher		$out2,$out2,v29
	vcipher		$out3,$out3,v29
	 vxor		$in4,$twk4,v31
	 vsrab		$tmp,$tweak,$seven	# next tweak value
	 vxor		$twk4,$tweak,$rndkey0
	vcipher		$out4,$out4,v29
	vcipher		$out5,$out5,v29
	lvx		v25,$x10,$key_		# re-pre-load round[2]
	 vaddubm	$tweak,$tweak,$tweak
	 vsldoi		$tmp,$tmp,$tmp,15

	vcipher		$out0,$out0,v30
	vcipher		$out1,$out1,v30
	 vand		$tmp,$tmp,$eighty7
	vcipher		$out2,$out2,v30
	vcipher		$out3,$out3,v30
	 vxor		$tweak,$tweak,$tmp
	vcipher		$out4,$out4,v30
	vcipher		$out5,$out5,v30
	 vxor		$in5,$twk5,v31
	 vsrab		$tmp,$tweak,$seven	# next tweak value
	 vxor		$twk5,$tweak,$rndkey0

	vcipherlast	$out0,$out0,$in0
	 lvx_u		$in0,$x00,$inp		# load next input block
	 vaddubm	$tweak,$tweak,$tweak
	 vsldoi		$tmp,$tmp,$tmp,15
	vcipherlast	$out1,$out1,$in1
	 lvx_u		$in1,$x10,$inp
	vcipherlast	$out2,$out2,$in2
	 le?vperm	$in0,$in0,$in0,$leperm
	 lvx_u		$in2,$x20,$inp
	 vand		$tmp,$tmp,$eighty7
	vcipherlast	$out3,$out3,$in3
	 le?vperm	$in1,$in1,$in1,$leperm
	 lvx_u		$in3,$x30,$inp
	vcipherlast	$out4,$out4,$in4
	 le?vperm	$in2,$in2,$in2,$leperm
	 lvx_u		$in4,$x40,$inp
	 vxor		$tweak,$tweak,$tmp
	vcipherlast	$tmp,$out5,$in5		# last block might be needed
						# in stealing mode
	 le?vperm	$in3,$in3,$in3,$leperm
	 lvx_u		$in5,$x50,$inp
	 addi		$inp,$inp,0x60
	 le?vperm	$in4,$in4,$in4,$leperm
	 le?vperm	$in5,$in5,$in5,$leperm

	le?vperm	$out0,$out0,$out0,$leperm
	le?vperm	$out1,$out1,$out1,$leperm
	stvx_u		$out0,$x00,$out		# store output
	 vxor		$out0,$in0,$twk0
	le?vperm	$out2,$out2,$out2,$leperm
	stvx_u		$out1,$x10,$out
	 vxor		$out1,$in1,$twk1
	le?vperm	$out3,$out3,$out3,$leperm
	stvx_u		$out2,$x20,$out
	 vxor		$out2,$in2,$twk2
	le?vperm	$out4,$out4,$out4,$leperm
	stvx_u		$out3,$x30,$out
	 vxor		$out3,$in3,$twk3
	le?vperm	$out5,$tmp,$tmp,$leperm
	stvx_u		$out4,$x40,$out
	 vxor		$out4,$in4,$twk4
	le?stvx_u	$out5,$x50,$out
	be?stvx_u	$tmp, $x50,$out
	 vxor		$out5,$in5,$twk5
	addi		$out,$out,0x60

	mtctr		$rounds
	beq		Loop_xts_enc6x		# did $len-=96 borrow?

	addic.		$len,$len,0x60
	beq		Lxts_enc6x_zero
	cmpwi		$len,0x20
	blt		Lxts_enc6x_one
	nop
	beq		Lxts_enc6x_two
	cmpwi		$len,0x40
	blt		Lxts_enc6x_three
	nop
	beq		Lxts_enc6x_four

Lxts_enc6x_five:
	vxor		$out0,$in1,$twk0
	vxor		$out1,$in2,$twk1
	vxor		$out2,$in3,$twk2
	vxor		$out3,$in4,$twk3
	vxor		$out4,$in5,$twk4

	bl		_aesp8_xts_enc5x

	le?vperm	$out0,$out0,$out0,$leperm
	vmr		$twk0,$twk5		# unused tweak
	le?vperm	$out1,$out1,$out1,$leperm
	stvx_u		$out0,$x00,$out		# store output
	le?vperm	$out2,$out2,$out2,$leperm
	stvx_u		$out1,$x10,$out
	le?vperm	$out3,$out3,$out3,$leperm
	stvx_u		$out2,$x20,$out
	vxor		$tmp,$out4,$twk5	# last block prep for stealing
	le?vperm	$out4,$out4,$out4,$leperm
	stvx_u		$out3,$x30,$out
	stvx_u		$out4,$x40,$out
	addi		$out,$out,0x50
	bne		Lxts_enc6x_steal
	b		Lxts_enc6x_done

.align	4
Lxts_enc6x_four:
	vxor		$out0,$in2,$twk0
	vxor		$out1,$in3,$twk1
	vxor		$out2,$in4,$twk2
	vxor		$out3,$in5,$twk3
	vxor		$out4,$out4,$out4

	bl		_aesp8_xts_enc5x

	le?vperm	$out0,$out0,$out0,$leperm
	vmr		$twk0,$twk4		# unused tweak
	le?vperm	$out1,$out1,$out1,$leperm
	stvx_u		$out0,$x00,$out		# store output
	le?vperm	$out2,$out2,$out2,$leperm
	stvx_u		$out1,$x10,$out
	vxor		$tmp,$out3,$twk4	# last block prep for stealing
	le?vperm	$out3,$out3,$out3,$leperm
	stvx_u		$out2,$x20,$out
	stvx_u		$out3,$x30,$out
	addi		$out,$out,0x40
	bne		Lxts_enc6x_steal
	b		Lxts_enc6x_done

.align	4
Lxts_enc6x_three:
	vxor		$out0,$in3,$twk0
	vxor		$out1,$in4,$twk1
	vxor		$out2,$in5,$twk2
	vxor		$out3,$out3,$out3
	vxor		$out4,$out4,$out4

	bl		_aesp8_xts_enc5x

	le?vperm	$out0,$out0,$out0,$leperm
	vmr		$twk0,$twk3		# unused tweak
	le?vperm	$out1,$out1,$out1,$leperm
	stvx_u		$out0,$x00,$out		# store output
	vxor		$tmp,$out2,$twk3	# last block prep for stealing
	le?vperm	$out2,$out2,$out2,$leperm
	stvx_u		$out1,$x10,$out
	stvx_u		$out2,$x20,$out
	addi		$out,$out,0x30
	bne		Lxts_enc6x_steal
	b		Lxts_enc6x_done

.align	4
Lxts_enc6x_two:
	vxor		$out0,$in4,$twk0
	vxor		$out1,$in5,$twk1
	vxor		$out2,$out2,$out2
	vxor		$out3,$out3,$out3
	vxor		$out4,$out4,$out4

	bl		_aesp8_xts_enc5x

	le?vperm	$out0,$out0,$out0,$leperm
	vmr		$twk0,$twk2		# unused tweak
	vxor		$tmp,$out1,$twk2	# last block prep for stealing
	le?vperm	$out1,$out1,$out1,$leperm
	stvx_u		$out0,$x00,$out		# store output
	stvx_u		$out1,$x10,$out
	addi		$out,$out,0x20
	bne		Lxts_enc6x_steal
	b		Lxts_enc6x_done

.align	4
Lxts_enc6x_one:
	vxor		$out0,$in5,$twk0
	nop
Loop_xts_enc1x:
	vcipher		$out0,$out0,v24
	lvx		v24,$x20,$key_		# round[3]
	addi		$key_,$key_,0x20

	vcipher		$out0,$out0,v25
	lvx		v25,$x10,$key_		# round[4]
	bdnz		Loop_xts_enc1x

	add		$inp,$inp,$taillen
	cmpwi		$taillen,0
	vcipher		$out0,$out0,v24

	subi		$inp,$inp,16
	vcipher		$out0,$out0,v25

	lvsr		$inpperm,0,$taillen
	vcipher		$out0,$out0,v26

	lvx_u		$in0,0,$inp
	vcipher		$out0,$out0,v27

	addi		$key_,$sp,$FRAME+15	# rewind $key_
	vcipher		$out0,$out0,v28
	lvx		v24,$x00,$key_		# re-pre-load round[1]

	vcipher		$out0,$out0,v29
	lvx		v25,$x10,$key_		# re-pre-load round[2]
	 vxor		$twk0,$twk0,v31

	le?vperm	$in0,$in0,$in0,$leperm
	vcipher		$out0,$out0,v30

	vperm		$in0,$in0,$in0,$inpperm
	vcipherlast	$out0,$out0,$twk0

	vmr		$twk0,$twk1		# unused tweak
	vxor		$tmp,$out0,$twk1	# last block prep for stealing
	le?vperm	$out0,$out0,$out0,$leperm
	stvx_u		$out0,$x00,$out		# store output
	addi		$out,$out,0x10
	bne		Lxts_enc6x_steal
	b		Lxts_enc6x_done

.align	4
Lxts_enc6x_zero:
	cmpwi		$taillen,0
	beq		Lxts_enc6x_done

	add		$inp,$inp,$taillen
	subi		$inp,$inp,16
	lvx_u		$in0,0,$inp
	lvsr		$inpperm,0,$taillen	# $in5 is no more
	le?vperm	$in0,$in0,$in0,$leperm
	vperm		$in0,$in0,$in0,$inpperm
	vxor		$tmp,$tmp,$twk0
Lxts_enc6x_steal:
	vxor		$in0,$in0,$twk0
	vxor		$out0,$out0,$out0
	vspltisb	$out1,-1
	vperm		$out0,$out0,$out1,$inpperm
	vsel		$out0,$in0,$tmp,$out0	# $tmp is last block, remember?

	subi		r30,$out,17
	subi		$out,$out,16
	mtctr		$taillen
Loop_xts_enc6x_steal:
	lbzu		r0,1(r30)
	stb		r0,16(r30)
	bdnz		Loop_xts_enc6x_steal

	li		$taillen,0
	mtctr		$rounds
	b		Loop_xts_enc1x		# one more time...

.align	4
Lxts_enc6x_done:
	${UCMP}i	$ivp,0
	beq		Lxts_enc6x_ret

	vxor		$tweak,$twk0,$rndkey0
	le?vperm	$tweak,$tweak,$tweak,$leperm
	stvx_u		$tweak,0,$ivp

Lxts_enc6x_ret:
	mtlr		r11
	li		r10,`$FRAME+15`
	li		r11,`$FRAME+31`
	stvx		$seven,r10,$sp		# wipe copies of round keys
	addi		r10,r10,32
	stvx		$seven,r11,$sp
	addi		r11,r11,32
	stvx		$seven,r10,$sp
	addi		r10,r10,32
	stvx		$seven,r11,$sp
	addi		r11,r11,32
	stvx		$seven,r10,$sp
	addi		r10,r10,32
	stvx		$seven,r11,$sp
	addi		r11,r11,32
	stvx		$seven,r10,$sp
	addi		r10,r10,32
	stvx		$seven,r11,$sp
	addi		r11,r11,32

	mtspr		256,$vrsave
	lvx		v20,r10,$sp		# ABI says so
	addi		r10,r10,32
	lvx		v21,r11,$sp
	addi		r11,r11,32
	lvx		v22,r10,$sp
	addi		r10,r10,32
	lvx		v23,r11,$sp
	addi		r11,r11,32
	lvx		v24,r10,$sp
	addi		r10,r10,32
	lvx		v25,r11,$sp
	addi		r11,r11,32
	lvx		v26,r10,$sp
	addi		r10,r10,32
	lvx		v27,r11,$sp
	addi		r11,r11,32
	lvx		v28,r10,$sp
	addi		r10,r10,32
	lvx		v29,r11,$sp
	addi		r11,r11,32
	lvx		v30,r10,$sp
	lvx		v31,r11,$sp
	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
	blr
	.long		0
	.byte		0,12,0x04,1,0x80,6,6,0
	.long		0

.align	5
_aesp8_xts_enc5x:
	vcipher		$out0,$out0,v24
	vcipher		$out1,$out1,v24
	vcipher		$out2,$out2,v24
	vcipher		$out3,$out3,v24
	vcipher		$out4,$out4,v24
	lvx		v24,$x20,$key_		# round[3]
	addi		$key_,$key_,0x20

	vcipher		$out0,$out0,v25
	vcipher		$out1,$out1,v25
	vcipher		$out2,$out2,v25
	vcipher		$out3,$out3,v25
	vcipher		$out4,$out4,v25
	lvx		v25,$x10,$key_		# round[4]
	bdnz		_aesp8_xts_enc5x

	add		$inp,$inp,$taillen
	cmpwi		$taillen,0
	vcipher		$out0,$out0,v24
	vcipher		$out1,$out1,v24
	vcipher		$out2,$out2,v24
	vcipher		$out3,$out3,v24
	vcipher		$out4,$out4,v24

	subi		$inp,$inp,16
	vcipher		$out0,$out0,v25
	vcipher		$out1,$out1,v25
	vcipher		$out2,$out2,v25
	vcipher		$out3,$out3,v25
	vcipher		$out4,$out4,v25
	 vxor		$twk0,$twk0,v31

	vcipher		$out0,$out0,v26
	lvsr		$inpperm,r0,$taillen	# $in5 is no more
	vcipher		$out1,$out1,v26
	vcipher		$out2,$out2,v26
	vcipher		$out3,$out3,v26
	vcipher		$out4,$out4,v26
	 vxor		$in1,$twk1,v31

	vcipher		$out0,$out0,v27
	lvx_u		$in0,0,$inp
	vcipher		$out1,$out1,v27
	vcipher		$out2,$out2,v27
	vcipher		$out3,$out3,v27
	vcipher		$out4,$out4,v27
	 vxor		$in2,$twk2,v31

	addi		$key_,$sp,$FRAME+15	# rewind $key_
	vcipher		$out0,$out0,v28
	vcipher		$out1,$out1,v28
	vcipher		$out2,$out2,v28
	vcipher		$out3,$out3,v28
	vcipher		$out4,$out4,v28
	lvx		v24,$x00,$key_		# re-pre-load round[1]
	 vxor		$in3,$twk3,v31

	vcipher		$out0,$out0,v29
	le?vperm	$in0,$in0,$in0,$leperm
	vcipher		$out1,$out1,v29
	vcipher		$out2,$out2,v29
	vcipher		$out3,$out3,v29
	vcipher		$out4,$out4,v29
	lvx		v25,$x10,$key_		# re-pre-load round[2]
	 vxor		$in4,$twk4,v31

	vcipher		$out0,$out0,v30
	vperm		$in0,$in0,$in0,$inpperm
	vcipher		$out1,$out1,v30
	vcipher		$out2,$out2,v30
	vcipher		$out3,$out3,v30
	vcipher		$out4,$out4,v30

	vcipherlast	$out0,$out0,$twk0
	vcipherlast	$out1,$out1,$in1
	vcipherlast	$out2,$out2,$in2
	vcipherlast	$out3,$out3,$in3
	vcipherlast	$out4,$out4,$in4
	blr
        .long   	0
        .byte   	0,12,0x14,0,0,0,0,0

.align	5
_aesp8_xts_decrypt6x:
	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
	mflr		r11
	li		r7,`$FRAME+8*16+15`
	li		r3,`$FRAME+8*16+31`
	$PUSH		r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
	stvx		v20,r7,$sp		# ABI says so
	addi		r7,r7,32
	stvx		v21,r3,$sp
	addi		r3,r3,32
	stvx		v22,r7,$sp
	addi		r7,r7,32
	stvx		v23,r3,$sp
	addi		r3,r3,32
	stvx		v24,r7,$sp
	addi		r7,r7,32
	stvx		v25,r3,$sp
	addi		r3,r3,32
	stvx		v26,r7,$sp
	addi		r7,r7,32
	stvx		v27,r3,$sp
	addi		r3,r3,32
	stvx		v28,r7,$sp
	addi		r7,r7,32
	stvx		v29,r3,$sp
	addi		r3,r3,32
	stvx		v30,r7,$sp
	stvx		v31,r3,$sp
	li		r0,-1
	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
	li		$x10,0x10
	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
	li		$x20,0x20
	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
	li		$x30,0x30
	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
	li		$x40,0x40
	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
	li		$x50,0x50
	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
	li		$x60,0x60
	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
	li		$x70,0x70
	mtspr		256,r0

	subi		$rounds,$rounds,3	# -4 in total

	lvx		$rndkey0,$x00,$key1	# load key schedule
	lvx		v30,$x10,$key1
	addi		$key1,$key1,0x20
	lvx		v31,$x00,$key1
	?vperm		$rndkey0,$rndkey0,v30,$keyperm
	addi		$key_,$sp,$FRAME+15
	mtctr		$rounds

Load_xts_dec_key:
	?vperm		v24,v30,v31,$keyperm
	lvx		v30,$x10,$key1
	addi		$key1,$key1,0x20
	stvx		v24,$x00,$key_		# off-load round[1]
	?vperm		v25,v31,v30,$keyperm
	lvx		v31,$x00,$key1
	stvx		v25,$x10,$key_		# off-load round[2]
	addi		$key_,$key_,0x20
	bdnz		Load_xts_dec_key

	lvx		v26,$x10,$key1
	?vperm		v24,v30,v31,$keyperm
	lvx		v27,$x20,$key1
	stvx		v24,$x00,$key_		# off-load round[3]
	?vperm		v25,v31,v26,$keyperm
	lvx		v28,$x30,$key1
	stvx		v25,$x10,$key_		# off-load round[4]
	addi		$key_,$sp,$FRAME+15	# rewind $key_
	?vperm		v26,v26,v27,$keyperm
	lvx		v29,$x40,$key1
	?vperm		v27,v27,v28,$keyperm
	lvx		v30,$x50,$key1
	?vperm		v28,v28,v29,$keyperm
	lvx		v31,$x60,$key1
	?vperm		v29,v29,v30,$keyperm
	lvx		$twk5,$x70,$key1	# borrow $twk5
	?vperm		v30,v30,v31,$keyperm
	lvx		v24,$x00,$key_		# pre-load round[1]
	?vperm		v31,v31,$twk5,$keyperm
	lvx		v25,$x10,$key_		# pre-load round[2]

	 vperm		$in0,$inout,$inptail,$inpperm
	 subi		$inp,$inp,31		# undo "caller"
	vxor		$twk0,$tweak,$rndkey0
	vsrab		$tmp,$tweak,$seven	# next tweak value
	vaddubm		$tweak,$tweak,$tweak
	vsldoi		$tmp,$tmp,$tmp,15
	vand		$tmp,$tmp,$eighty7
	 vxor		$out0,$in0,$twk0
	vxor		$tweak,$tweak,$tmp

	 lvx_u		$in1,$x10,$inp
	vxor		$twk1,$tweak,$rndkey0
	vsrab		$tmp,$tweak,$seven	# next tweak value
	vaddubm		$tweak,$tweak,$tweak
	vsldoi		$tmp,$tmp,$tmp,15
	 le?vperm	$in1,$in1,$in1,$leperm
	vand		$tmp,$tmp,$eighty7
	 vxor		$out1,$in1,$twk1
	vxor		$tweak,$tweak,$tmp

	 lvx_u		$in2,$x20,$inp
	 andi.		$taillen,$len,15
	vxor		$twk2,$tweak,$rndkey0
	vsrab		$tmp,$tweak,$seven	# next tweak value
	vaddubm		$tweak,$tweak,$tweak
	vsldoi		$tmp,$tmp,$tmp,15
	 le?vperm	$in2,$in2,$in2,$leperm
	vand		$tmp,$tmp,$eighty7
	 vxor		$out2,$in2,$twk2
	vxor		$tweak,$tweak,$tmp

	 lvx_u		$in3,$x30,$inp
	 sub		$len,$len,$taillen
	vxor		$twk3,$tweak,$rndkey0
	vsrab		$tmp,$tweak,$seven	# next tweak value
	vaddubm		$tweak,$tweak,$tweak
	vsldoi		$tmp,$tmp,$tmp,15
	 le?vperm	$in3,$in3,$in3,$leperm
	vand		$tmp,$tmp,$eighty7
	 vxor		$out3,$in3,$twk3
	vxor		$tweak,$tweak,$tmp

	 lvx_u		$in4,$x40,$inp
	 subi		$len,$len,0x60
	vxor		$twk4,$tweak,$rndkey0
	vsrab		$tmp,$tweak,$seven	# next tweak value
	vaddubm		$tweak,$tweak,$tweak
	vsldoi		$tmp,$tmp,$tmp,15
	 le?vperm	$in4,$in4,$in4,$leperm
	vand		$tmp,$tmp,$eighty7
	 vxor		$out4,$in4,$twk4
	vxor		$tweak,$tweak,$tmp

	 lvx_u		$in5,$x50,$inp
	 addi		$inp,$inp,0x60
	vxor		$twk5,$tweak,$rndkey0
	vsrab		$tmp,$tweak,$seven	# next tweak value
	vaddubm		$tweak,$tweak,$tweak
	vsldoi		$tmp,$tmp,$tmp,15
	 le?vperm	$in5,$in5,$in5,$leperm
	vand		$tmp,$tmp,$eighty7
	 vxor		$out5,$in5,$twk5
	vxor		$tweak,$tweak,$tmp

	vxor		v31,v31,$rndkey0
	mtctr		$rounds
	b		Loop_xts_dec6x

.align	5
Loop_xts_dec6x:
	vncipher	$out0,$out0,v24
	vncipher	$out1,$out1,v24
	vncipher	$out2,$out2,v24
	vncipher	$out3,$out3,v24
	vncipher	$out4,$out4,v24
	vncipher	$out5,$out5,v24
	lvx		v24,$x20,$key_		# round[3]
	addi		$key_,$key_,0x20

	vncipher	$out0,$out0,v25
	vncipher	$out1,$out1,v25
	vncipher	$out2,$out2,v25
	vncipher	$out3,$out3,v25
	vncipher	$out4,$out4,v25
	vncipher	$out5,$out5,v25
	lvx		v25,$x10,$key_		# round[4]
	bdnz		Loop_xts_dec6x

	subic		$len,$len,96		# $len-=96
	 vxor		$in0,$twk0,v31		# xor with last round key
	vncipher	$out0,$out0,v24
	vncipher	$out1,$out1,v24
	 vsrab		$tmp,$tweak,$seven	# next tweak value
	 vxor		$twk0,$tweak,$rndkey0
	 vaddubm	$tweak,$tweak,$tweak
	vncipher	$out2,$out2,v24
	vncipher	$out3,$out3,v24
	 vsldoi		$tmp,$tmp,$tmp,15
	vncipher	$out4,$out4,v24
	vncipher	$out5,$out5,v24

	subfe.		r0,r0,r0		# borrow?-1:0
	 vand		$tmp,$tmp,$eighty7
	vncipher	$out0,$out0,v25
	vncipher	$out1,$out1,v25
	 vxor		$tweak,$tweak,$tmp
	vncipher	$out2,$out2,v25
	vncipher	$out3,$out3,v25
	 vxor		$in1,$twk1,v31
	 vsrab		$tmp,$tweak,$seven	# next tweak value
	 vxor		$twk1,$tweak,$rndkey0
	vncipher	$out4,$out4,v25
	vncipher	$out5,$out5,v25

	and		r0,r0,$len
	 vaddubm	$tweak,$tweak,$tweak
	 vsldoi		$tmp,$tmp,$tmp,15
	vncipher	$out0,$out0,v26
	vncipher	$out1,$out1,v26
	 vand		$tmp,$tmp,$eighty7
	vncipher	$out2,$out2,v26
	vncipher	$out3,$out3,v26
	 vxor		$tweak,$tweak,$tmp
	vncipher	$out4,$out4,v26
	vncipher	$out5,$out5,v26

	add		$inp,$inp,r0		# $inp is adjusted in such
						# way that at exit from the
						# loop inX-in5 are loaded
						# with last "words"
	 vxor		$in2,$twk2,v31
	 vsrab		$tmp,$tweak,$seven	# next tweak value
	 vxor		$twk2,$tweak,$rndkey0
	 vaddubm	$tweak,$tweak,$tweak
	vncipher	$out0,$out0,v27
	vncipher	$out1,$out1,v27
	 vsldoi		$tmp,$tmp,$tmp,15
	vncipher	$out2,$out2,v27
	vncipher	$out3,$out3,v27
	 vand		$tmp,$tmp,$eighty7
	vncipher	$out4,$out4,v27
	vncipher	$out5,$out5,v27

	addi		$key_,$sp,$FRAME+15	# rewind $key_
	 vxor		$tweak,$tweak,$tmp
	vncipher	$out0,$out0,v28
	vncipher	$out1,$out1,v28
	 vxor		$in3,$twk3,v31
	 vsrab		$tmp,$tweak,$seven	# next tweak value
	 vxor		$twk3,$tweak,$rndkey0
	vncipher	$out2,$out2,v28
	vncipher	$out3,$out3,v28
	 vaddubm	$tweak,$tweak,$tweak
	 vsldoi		$tmp,$tmp,$tmp,15
	vncipher	$out4,$out4,v28
	vncipher	$out5,$out5,v28
	lvx		v24,$x00,$key_		# re-pre-load round[1]
	 vand		$tmp,$tmp,$eighty7

	vncipher	$out0,$out0,v29
	vncipher	$out1,$out1,v29
	 vxor		$tweak,$tweak,$tmp
	vncipher	$out2,$out2,v29
	vncipher	$out3,$out3,v29
	 vxor		$in4,$twk4,v31
	 vsrab		$tmp,$tweak,$seven	# next tweak value
	 vxor		$twk4,$tweak,$rndkey0
	vncipher	$out4,$out4,v29
	vncipher	$out5,$out5,v29
	lvx		v25,$x10,$key_		# re-pre-load round[2]
	 vaddubm	$tweak,$tweak,$tweak
	 vsldoi		$tmp,$tmp,$tmp,15

	vncipher	$out0,$out0,v30
	vncipher	$out1,$out1,v30
	 vand		$tmp,$tmp,$eighty7
	vncipher	$out2,$out2,v30
	vncipher	$out3,$out3,v30
	 vxor		$tweak,$tweak,$tmp
	vncipher	$out4,$out4,v30
	vncipher	$out5,$out5,v30
	 vxor		$in5,$twk5,v31
	 vsrab		$tmp,$tweak,$seven	# next tweak value
	 vxor		$twk5,$tweak,$rndkey0

	vncipherlast	$out0,$out0,$in0
	 lvx_u		$in0,$x00,$inp		# load next input block
	 vaddubm	$tweak,$tweak,$tweak
	 vsldoi		$tmp,$tmp,$tmp,15
	vncipherlast	$out1,$out1,$in1
	 lvx_u		$in1,$x10,$inp
	vncipherlast	$out2,$out2,$in2
	 le?vperm	$in0,$in0,$in0,$leperm
	 lvx_u		$in2,$x20,$inp
	 vand		$tmp,$tmp,$eighty7
	vncipherlast	$out3,$out3,$in3
	 le?vperm	$in1,$in1,$in1,$leperm
	 lvx_u		$in3,$x30,$inp
	vncipherlast	$out4,$out4,$in4
	 le?vperm	$in2,$in2,$in2,$leperm
	 lvx_u		$in4,$x40,$inp
	 vxor		$tweak,$tweak,$tmp
	vncipherlast	$out5,$out5,$in5
	 le?vperm	$in3,$in3,$in3,$leperm
	 lvx_u		$in5,$x50,$inp
	 addi		$inp,$inp,0x60
	 le?vperm	$in4,$in4,$in4,$leperm
	 le?vperm	$in5,$in5,$in5,$leperm

	le?vperm	$out0,$out0,$out0,$leperm
	le?vperm	$out1,$out1,$out1,$leperm
	stvx_u		$out0,$x00,$out		# store output
	 vxor		$out0,$in0,$twk0
	le?vperm	$out2,$out2,$out2,$leperm
	stvx_u		$out1,$x10,$out
	 vxor		$out1,$in1,$twk1
	le?vperm	$out3,$out3,$out3,$leperm
	stvx_u		$out2,$x20,$out
	 vxor		$out2,$in2,$twk2
	le?vperm	$out4,$out4,$out4,$leperm
	stvx_u		$out3,$x30,$out
	 vxor		$out3,$in3,$twk3
	le?vperm	$out5,$out5,$out5,$leperm
	stvx_u		$out4,$x40,$out
	 vxor		$out4,$in4,$twk4
	stvx_u		$out5,$x50,$out
	 vxor		$out5,$in5,$twk5
	addi		$out,$out,0x60

	mtctr		$rounds
	beq		Loop_xts_dec6x		# did $len-=96 borrow?

	addic.		$len,$len,0x60
	beq		Lxts_dec6x_zero
	cmpwi		$len,0x20
	blt		Lxts_dec6x_one
	nop
	beq		Lxts_dec6x_two
	cmpwi		$len,0x40
	blt		Lxts_dec6x_three
	nop
	beq		Lxts_dec6x_four

Lxts_dec6x_five:
	vxor		$out0,$in1,$twk0
	vxor		$out1,$in2,$twk1
	vxor		$out2,$in3,$twk2
	vxor		$out3,$in4,$twk3
	vxor		$out4,$in5,$twk4

	bl		_aesp8_xts_dec5x

	le?vperm	$out0,$out0,$out0,$leperm
	vmr		$twk0,$twk5		# unused tweak
	vxor		$twk1,$tweak,$rndkey0
	le?vperm	$out1,$out1,$out1,$leperm
	stvx_u		$out0,$x00,$out		# store output
	vxor		$out0,$in0,$twk1
	le?vperm	$out2,$out2,$out2,$leperm
	stvx_u		$out1,$x10,$out
	le?vperm	$out3,$out3,$out3,$leperm
	stvx_u		$out2,$x20,$out
	le?vperm	$out4,$out4,$out4,$leperm
	stvx_u		$out3,$x30,$out
	stvx_u		$out4,$x40,$out
	addi		$out,$out,0x50
	bne		Lxts_dec6x_steal
	b		Lxts_dec6x_done

.align	4
Lxts_dec6x_four:
	vxor		$out0,$in2,$twk0
	vxor		$out1,$in3,$twk1
	vxor		$out2,$in4,$twk2
	vxor		$out3,$in5,$twk3
	vxor		$out4,$out4,$out4

	bl		_aesp8_xts_dec5x

	le?vperm	$out0,$out0,$out0,$leperm
	vmr		$twk0,$twk4		# unused tweak
	vmr		$twk1,$twk5
	le?vperm	$out1,$out1,$out1,$leperm
	stvx_u		$out0,$x00,$out		# store output
	vxor		$out0,$in0,$twk5
	le?vperm	$out2,$out2,$out2,$leperm
	stvx_u		$out1,$x10,$out
	le?vperm	$out3,$out3,$out3,$leperm
	stvx_u		$out2,$x20,$out
	stvx_u		$out3,$x30,$out
	addi		$out,$out,0x40
	bne		Lxts_dec6x_steal
	b		Lxts_dec6x_done

.align	4
Lxts_dec6x_three:
	vxor		$out0,$in3,$twk0
	vxor		$out1,$in4,$twk1
	vxor		$out2,$in5,$twk2
	vxor		$out3,$out3,$out3
	vxor		$out4,$out4,$out4

	bl		_aesp8_xts_dec5x

	le?vperm	$out0,$out0,$out0,$leperm
	vmr		$twk0,$twk3		# unused tweak
	vmr		$twk1,$twk4
	le?vperm	$out1,$out1,$out1,$leperm
	stvx_u		$out0,$x00,$out		# store output
	vxor		$out0,$in0,$twk4
	le?vperm	$out2,$out2,$out2,$leperm
	stvx_u		$out1,$x10,$out
	stvx_u		$out2,$x20,$out
	addi		$out,$out,0x30
	bne		Lxts_dec6x_steal
	b		Lxts_dec6x_done

.align	4
Lxts_dec6x_two:
	vxor		$out0,$in4,$twk0
	vxor		$out1,$in5,$twk1
	vxor		$out2,$out2,$out2
	vxor		$out3,$out3,$out3
	vxor		$out4,$out4,$out4

	bl		_aesp8_xts_dec5x

	le?vperm	$out0,$out0,$out0,$leperm
	vmr		$twk0,$twk2		# unused tweak
	vmr		$twk1,$twk3
	le?vperm	$out1,$out1,$out1,$leperm
	stvx_u		$out0,$x00,$out		# store output
	vxor		$out0,$in0,$twk3
	stvx_u		$out1,$x10,$out
	addi		$out,$out,0x20
	bne		Lxts_dec6x_steal
	b		Lxts_dec6x_done

.align	4
Lxts_dec6x_one:
	vxor		$out0,$in5,$twk0
	nop
Loop_xts_dec1x:
	vncipher	$out0,$out0,v24
	lvx		v24,$x20,$key_		# round[3]
	addi		$key_,$key_,0x20

	vncipher	$out0,$out0,v25
	lvx		v25,$x10,$key_		# round[4]
	bdnz		Loop_xts_dec1x

	subi		r0,$taillen,1
	vncipher	$out0,$out0,v24

	andi.		r0,r0,16
	cmpwi		$taillen,0
	vncipher	$out0,$out0,v25

	sub		$inp,$inp,r0
	vncipher	$out0,$out0,v26

	lvx_u		$in0,0,$inp
	vncipher	$out0,$out0,v27

	addi		$key_,$sp,$FRAME+15	# rewind $key_
	vncipher	$out0,$out0,v28
	lvx		v24,$x00,$key_		# re-pre-load round[1]

	vncipher	$out0,$out0,v29
	lvx		v25,$x10,$key_		# re-pre-load round[2]
	 vxor		$twk0,$twk0,v31

	le?vperm	$in0,$in0,$in0,$leperm
	vncipher	$out0,$out0,v30

	mtctr		$rounds
	vncipherlast	$out0,$out0,$twk0

	vmr		$twk0,$twk1		# unused tweak
	vmr		$twk1,$twk2
	le?vperm	$out0,$out0,$out0,$leperm
	stvx_u		$out0,$x00,$out		# store output
	addi		$out,$out,0x10
	vxor		$out0,$in0,$twk2
	bne		Lxts_dec6x_steal
	b		Lxts_dec6x_done

.align	4
Lxts_dec6x_zero:
	cmpwi		$taillen,0
	beq		Lxts_dec6x_done

	lvx_u		$in0,0,$inp
	le?vperm	$in0,$in0,$in0,$leperm
	vxor		$out0,$in0,$twk1
Lxts_dec6x_steal:
	vncipher	$out0,$out0,v24
	lvx		v24,$x20,$key_		# round[3]
	addi		$key_,$key_,0x20

	vncipher	$out0,$out0,v25
	lvx		v25,$x10,$key_		# round[4]
	bdnz		Lxts_dec6x_steal

	add		$inp,$inp,$taillen
	vncipher	$out0,$out0,v24

	cmpwi		$taillen,0
	vncipher	$out0,$out0,v25

	lvx_u		$in0,0,$inp
	vncipher	$out0,$out0,v26

	lvsr		$inpperm,0,$taillen	# $in5 is no more
	vncipher	$out0,$out0,v27

	addi		$key_,$sp,$FRAME+15	# rewind $key_
	vncipher	$out0,$out0,v28
	lvx		v24,$x00,$key_		# re-pre-load round[1]

	vncipher	$out0,$out0,v29
	lvx		v25,$x10,$key_		# re-pre-load round[2]
	 vxor		$twk1,$twk1,v31

	le?vperm	$in0,$in0,$in0,$leperm
	vncipher	$out0,$out0,v30

	vperm		$in0,$in0,$in0,$inpperm
	vncipherlast	$tmp,$out0,$twk1

	le?vperm	$out0,$tmp,$tmp,$leperm
	le?stvx_u	$out0,0,$out
	be?stvx_u	$tmp,0,$out

	vxor		$out0,$out0,$out0
	vspltisb	$out1,-1
	vperm		$out0,$out0,$out1,$inpperm
	vsel		$out0,$in0,$tmp,$out0
	vxor		$out0,$out0,$twk0

	subi		r30,$out,1
	mtctr		$taillen
Loop_xts_dec6x_steal:
	lbzu		r0,1(r30)
	stb		r0,16(r30)
	bdnz		Loop_xts_dec6x_steal

	li		$taillen,0
	mtctr		$rounds
	b		Loop_xts_dec1x		# one more time...

.align	4
Lxts_dec6x_done:
	${UCMP}i	$ivp,0
	beq		Lxts_dec6x_ret

	vxor		$tweak,$twk0,$rndkey0
	le?vperm	$tweak,$tweak,$tweak,$leperm
	stvx_u		$tweak,0,$ivp

Lxts_dec6x_ret:
	mtlr		r11
	li		r10,`$FRAME+15`
	li		r11,`$FRAME+31`
	stvx		$seven,r10,$sp		# wipe copies of round keys
	addi		r10,r10,32
	stvx		$seven,r11,$sp
	addi		r11,r11,32
	stvx		$seven,r10,$sp
	addi		r10,r10,32
	stvx		$seven,r11,$sp
	addi		r11,r11,32
	stvx		$seven,r10,$sp
	addi		r10,r10,32
	stvx		$seven,r11,$sp
	addi		r11,r11,32
	stvx		$seven,r10,$sp
	addi		r10,r10,32
	stvx		$seven,r11,$sp
	addi		r11,r11,32

	mtspr		256,$vrsave
	lvx		v20,r10,$sp		# ABI says so
	addi		r10,r10,32
	lvx		v21,r11,$sp
	addi		r11,r11,32
	lvx		v22,r10,$sp
	addi		r10,r10,32
	lvx		v23,r11,$sp
	addi		r11,r11,32
	lvx		v24,r10,$sp
	addi		r10,r10,32
	lvx		v25,r11,$sp
	addi		r11,r11,32
	lvx		v26,r10,$sp
	addi		r10,r10,32
	lvx		v27,r11,$sp
	addi		r11,r11,32
	lvx		v28,r10,$sp
	addi		r10,r10,32
	lvx		v29,r11,$sp
	addi		r11,r11,32
	lvx		v30,r10,$sp
	lvx		v31,r11,$sp
	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
	blr
	.long		0
	.byte		0,12,0x04,1,0x80,6,6,0
	.long		0

.align	5
_aesp8_xts_dec5x:
	vncipher	$out0,$out0,v24
	vncipher	$out1,$out1,v24
	vncipher	$out2,$out2,v24
	vncipher	$out3,$out3,v24
	vncipher	$out4,$out4,v24
	lvx		v24,$x20,$key_		# round[3]
	addi		$key_,$key_,0x20

	vncipher	$out0,$out0,v25
	vncipher	$out1,$out1,v25
	vncipher	$out2,$out2,v25
	vncipher	$out3,$out3,v25
	vncipher	$out4,$out4,v25
	lvx		v25,$x10,$key_		# round[4]
	bdnz		_aesp8_xts_dec5x

	subi		r0,$taillen,1
	vncipher	$out0,$out0,v24
	vncipher	$out1,$out1,v24
	vncipher	$out2,$out2,v24
	vncipher	$out3,$out3,v24
	vncipher	$out4,$out4,v24

	andi.		r0,r0,16
	cmpwi		$taillen,0
	vncipher	$out0,$out0,v25
	vncipher	$out1,$out1,v25
	vncipher	$out2,$out2,v25
	vncipher	$out3,$out3,v25
	vncipher	$out4,$out4,v25
	 vxor		$twk0,$twk0,v31

	sub		$inp,$inp,r0
	vncipher	$out0,$out0,v26
	vncipher	$out1,$out1,v26
	vncipher	$out2,$out2,v26
	vncipher	$out3,$out3,v26
	vncipher	$out4,$out4,v26
	 vxor		$in1,$twk1,v31

	vncipher	$out0,$out0,v27
	lvx_u		$in0,0,$inp
	vncipher	$out1,$out1,v27
	vncipher	$out2,$out2,v27
	vncipher	$out3,$out3,v27
	vncipher	$out4,$out4,v27
	 vxor		$in2,$twk2,v31

	addi		$key_,$sp,$FRAME+15	# rewind $key_
	vncipher	$out0,$out0,v28
	vncipher	$out1,$out1,v28
	vncipher	$out2,$out2,v28
	vncipher	$out3,$out3,v28
	vncipher	$out4,$out4,v28
	lvx		v24,$x00,$key_		# re-pre-load round[1]
	 vxor		$in3,$twk3,v31

	vncipher	$out0,$out0,v29
	le?vperm	$in0,$in0,$in0,$leperm
	vncipher	$out1,$out1,v29
	vncipher	$out2,$out2,v29
	vncipher	$out3,$out3,v29
	vncipher	$out4,$out4,v29
	lvx		v25,$x10,$key_		# re-pre-load round[2]
	 vxor		$in4,$twk4,v31

	vncipher	$out0,$out0,v30
	vncipher	$out1,$out1,v30
	vncipher	$out2,$out2,v30
	vncipher	$out3,$out3,v30
	vncipher	$out4,$out4,v30

	vncipherlast	$out0,$out0,$twk0
	vncipherlast	$out1,$out1,$in1
	vncipherlast	$out2,$out2,$in2
	vncipherlast	$out3,$out3,$in3
	vncipherlast	$out4,$out4,$in4
	mtctr		$rounds
	blr
        .long   	0
        .byte   	0,12,0x14,0,0,0,0,0
___
}}	}}}

my $consts=1;
foreach(split("\n",$code)) {
        s/\`([^\`]*)\`/eval($1)/geo;

	# constants table endian-specific conversion
	if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
	    my $conv=$3;
	    my @bytes=();

	    # convert to endian-agnostic format
	    if ($1 eq "long") {
	      foreach (split(/,\s*/,$2)) {
		my $l = /^0/?oct:int;
		push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
	      }
	    } else {
		@bytes = map(/^0/?oct:int,split(/,\s*/,$2));
	    }

	    # little-endian conversion
	    if ($flavour =~ /le$/o) {
		SWITCH: for($conv)  {
		    /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
		    /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
		}
	    }

	    #emit
	    print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
	    next;
	}
	$consts=0 if (m/Lconsts:/o);	# end of table

	# instructions prefixed with '?' are endian-specific and need
	# to be adjusted accordingly...
	if ($flavour =~ /le$/o) {	# little-endian
	    s/le\?//o		or
	    s/be\?/#be#/o	or
	    s/\?lvsr/lvsl/o	or
	    s/\?lvsl/lvsr/o	or
	    s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
	    s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
	    s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
	} else {			# big-endian
	    s/le\?/#le#/o	or
	    s/be\?//o		or
	    s/\?([a-z]+)/$1/o;
	}

        print $_,"\n";
}

close STDOUT;

OHA YOOOO
����JFIF��� ( %!1!%)+...383-7(-.+  ---+--------------------+-----7------+-7-----+---++����"����M!1AQaq�"2���Rr��#3Bb�s����CSc��$4���D���TdE������'1!AQ"2q�a���� ?�Z�L�[�����=D�6]�T mѰx$�6��@ۣ`�Itl �"��(6�Dst�2:��Fk���x���4��K�h}�l �?r��@��!�Q��Y��?��-� =��O�����(6����<A�x%B��<A�x%B��<(6�@��.���*%���$e�m��T�wi��~H�]F�Ѱx"�`�Ul��ꃁ���RPl�6�UIA�x(���#�B��zy%�<�L���mvN �ԭ6�Y$Qk �S��䮰�K6ף�x�+�T��L4���>�C=j�������p�|J�ǥ���b=���Y�6g9��F1��Y�vݩ�`��塏��>� � �ݨ,�����A�o�=W*���"��>����� \ �"݄(꧈�y���9�m���d�aAD�u&�T��D �@$BITU�"��D�D!BH�� � �UTu� �^c�?�[ND�K�`�\F'�jf��<�G�G��B�q]�����!tl�6�]\4mѰx"��<�6��B�֊�o4.�Ah�8QM,�y�����%cLh��y�c����!�8Tb���h�!p�q�t����EIA�x'Pl �KT N h6�J�P7�6Ԩ�6恰&��� �� � ����R���)m�`8�nC�J���E��%H� D�"T �n��W�s+���x���+g�?t��@�����;�>�o��0�����|Ћ0�����|"�J�%EBBBB!X�����|��X��̟s��Ӭ��H獎ŏ m׷�0���—���2q����s�'q]�����7�%����hp8EAYy�Ӗc��9%�A� _g�ٙ���}ӯ�Ul�Ƽl Ѓ�a�ۮ9�i�*��R"�*������:��j�zE+�H ����kB�2�e��~��Zd# ��0Vr�T�ev������Y�����-8]o��x�~)�9��}W:RF֟��P�A�� ���G+hH�P6�����:���Ԁ��I�O{Y�F��$U"H�#2��*J����L��L�B�*��T`���(�-:�R�H Z��"�B�Ihh��B�B�����urP�%� ��9��7v",�!�A�b�X�V6F��� ���^K�+��f��qm^��'�9�K� �����o��! ��P�%B����E��}Xo�U��(BXJ󥯢t��u�&�}Xj +%�7+�c� �\��t�9t p*)��L�Z��T��KTC�NGT�PH pQ�� ɚ^qB ��8!�*��� P��"T iHS���n��W�s+���x���,mtG��@��D~���� } tY������Y���4����!!@!@!@!@�a�^h��R���*��|!���Us;����n:��#���4-h�chW꼝���%�+Z�kA��E%��4“M$�����@y�q˓��ʽ�� $U��������eH�-;�a�ކ�&����*IB� �Z�w��;c��|�3JZ@��-��w�k������Q�Ϊ �g�d���I��G����8�N�G�����(R)�2�_�]3;]z7]�2�w�r����I�Iĭ=15���b~ 2�{cuO�'෎V�nyI)��1s�� ����i�lT*�ݠ�������H��p��^j�C�Q�B*(������(m�Wb ���Z�)P*D$EGimZU�ViZ��┵\�P�L���IW���Eh����[榚V�R8+l��zV<�B�M�j�V�pw�%�*�UKYǒ}�J�% ���(�����HM��NQ�S�toԝ�ܪZd�ল���,UP�J�=�Z m�T��-]��y��*k+���:�%J��V���X�i�o6�D38�h�=� �'G�$�@��X��H�P�~��X��e�Ã�����4���WS��x�3���q�˓V�S��k'�K�w�N�w�eb��,��bcw�1�� �ȃ�%����͖��Bd�J��*V�Y��.;Kh�� �*���1 X���-�� �OJ��$ sCU��H�Zj���N��e�m�zT�"T��%���8�(Q�4 雐��d8���j�$NH�'$@�� �a�< ᴖ��K��W ��5}��{��-�����w�}�,Y����䴣�,��S�|�R��BT D!�R ^��I *I4m%Gk�2&�y�m$�k;�7m��sW���:�q��!汖s��]�i�;(��ƣ�7_�Ve�o\㛜K�y���T/.yܝ�2! �AB(BD��ꦽ� �EX�w2��\�����^{Nɥ�=����lB�V���y ��t||�K$�v��Ȃ>* D��Q��z$�y��F�MqD��(���鍵M2 �G� ;[r*4�T�Rd�oV#�t+���P�A-��v�*�>��PhU ���-QJ��Y�mE;k�"�F�?%���R��&������G�ӳhx;�i��h���5��+�Cr��8���B�:�+BI�Ϯ�LOٳ��=�~��,��b�t�C�6p\����x ��«�!{�ҽhh��7<� ĊW���<�CNw�ai�@��ںf����j#^�Ny���\^rRU9�1u`�RC% T)SCM��VtR��U溢�f���i�|��Y/SpWF�V ��*�A�5)%T9����'B��O "�TTTQZTm�Dv] �����U����������R�����5�/B^�.�/���"rE�8B)�"�P�D!Km�<W��y�� |�[�m���,Y\A�]��7��f����ѻ,H�Zj[���eh� (N+P�U"N�*���ء6L� 뛐�������"T D!BBT�"�#��I~%�͑�W ���Q���w��] ���.���.��<���O��Zl�,��S�|���%F��9"H�Cj�66��4Wy��NTR��i Y��� '����|���c<�fژ��E�����\>.�|hH�Yem��&��"h!!*lehjӼ �s�HQ�b���� pi�Η|h�'�Rh��SP3 ۽�$0��P X?�w�-;5���4h�{� ���/U�v ���ְ\2o��e�����@(��+�u���Ē�����B^��&�M]�B� �"��D��@!@!@!,�lΖF��W=���^����da�cehi�����_��#d��[ 9_=�]�Ù^�$!�-p8�u�(B�/� ��A�!�����\�F4q��3����:�[�>�w�ك�[���]��<��[�3�'��M+�yMH�R D�P��(�P��AJ�b Qq�F�bq�ߪ:J�$j��8�-5 ��z@�#��K� ڕ�N ԺKړ�^y߉�Ԭ.�tv��n9��w���n�|s�Z��;q"{���9�! �BJ��BMe��=�`���֍�&�Ba�{� ���v ���@�> ���lp�6������uRh�"�i�,ɯ�79o�*�� ��V�&�[\v��:�bq k�|���\͒��1��]q��C �xi")��*0w��{��0�c��������߸ɢj�X�MQ����Y�R%B�D�P��!�$@$J��J��K�����r^�V~�� p�KWg���.��m 뛶���L�Z Bã� U; �H7�V�A��+�B��r R7!�%\���!�!�!�� �a��W�����+��Z�cw���]�C�D��}˽��]��g���>KM���?���� G�4HI&����i=,�Ge��*��o���ׯ r�c�RZ����$� ����fӿ.:��q'\j�M\㼕j�g�߃rh�� � S\ ��g���d%UkY� ��?Nn>�Y$ &�,�[/I�ZC��>�a��S�K��p ��� �Ƭ�QY�X��h!M�m�h�8]p���y%� C/gt����ڭX��� c�iW\�;'�i�� �atd���ā�7(6L`��]�=��hP��.�ss��X�9�X��ji�;��t��\��V���0f�87�U�?k��ww3W�|��=L@� ���OG��bv�Z���uj��AD�BEjubӨ&����F�Wgb�G���:�uT1��ni���y|�X�Etu������n�k�D��q57���g�A�n ��X]&����+����sD%�0�p��<��Vtm�����Z���9�^�Y*�(�{�/��j���sn}uz�����_n����������MErjΎ��[#N-5�5�a\��cu]m��M�WN�b�_ p�t�q�~ '��H�- y�@7%Oj�h�y/B��d�k�-{o�,-5��i�4toWx꘳�_�(E�LJ�í�m;�]t��V�^23I�{�h�g�43-zJ�ֽ��g�Z "'!N��:� N������Qku�8�3�n^���s,�6(� �nv��.�),��eƷK�\K����IPO�A��������e�K���ڌ6����yW�)֋��Z}�m{쾙��x���{hyn+/EY�l�W!-�$�U.��I��� ��� �� �/M�;��Pݎx�"~+��Ή1�&ѯ���=����#�ыv�$�[�"�R��To�v~)�U�˨ x0^q��S���^�d����ʠ�W� ��5�B���dy"��&�õ���c��g�+��9��ugh�ޖG���7������ �곗Hz;f�L��`8���{"��؛f�1��&�)J[�I������!n��v��b�Ik{�������ŋ���qo�s\��}ɛ\*KO<=h�L�2�U��Z� ���v���[O��8�@7$t����4S2r�ʬm���)18op�?��]1%�<��&71�k�.�s$.?-��s�ïZ� C�DjFC�w֠r+�@U4U��xY2����N�w�S�\S+�� P�0D��� ����R�>*D�3�֎m�Fu��v^k��,�9�-�V����M�k���Rw֏�Z�[Y�=���q:Aů%�>�����O�0�pmӅC�^뇍6��+E��N�f>29^L����=e�gi�-0�����e�W5��U�E�x�і��(ZKH�F(�Y�mrA;Gf\�H�z�G��iw� �^�r�y������-.��9��2|�%���ÌR�����J��� x��ab��KD�꾏!�k��D�s4o���F/9�2��fv��?y���z;WLC�T�*���l�"�b[�qi��A� ����Vf�җ�W'fA������� ¢�#h?%.�҆�lߊ�_��w� 6�uA�*V�;M�y�*�OY����1MZ6��g1�PsWiv�e���%B����@�B"�@$�TQF�т@p�q֫9��+��F��bF� VljWx�B�%���L����ۗ$��˒r������ � � d�p:KZ����+��Z�c{��r��D�ކ}˿h}��"be�I !���-[��n�{ft�J`�ь7f#�\c�{rڱ����T0уQ~���Q��h�����9���;��Ֆr�Ts�Y�dt��L�ٳx�R�i�<�B�g�=��^caؤs�EI��SAEɤ6�\��b�pn_�Z�U�u�� c\}Z�a��y��B�PZ���'�Ya���Ʈ��N�� ��I�C��,Vp��+A^Y,��,h*��[P�!�|�����I-,nn��& �x��u )�T�o��%��]8-x�m�dh́�sz]�/�#�}�{� �uq*͚��1=��g27�4�l�;���Rn�v�G�zܑ`�Z9���j� �ᲈ�X̜�]�$���OYɊT��2� %BAJ����� �:�uԮU�j_� z'�48d�T'��F�g�rI�Ƹ��+ :��p� '$涨�䳶J5�k�48Tk�[;c��K=��zՕXʛuS��"��Q�8l �m��ˋ��k+���ᴇ��$V _�%$g��k��]��GZ�t�^ʹ�쮣��k��m�2V:9׊8c෎v%�8O�&}�����]�\�G4?��3F ,�6I9�k��^�����m���TЕm � J�P� ��}�� ��Y���2��}�&���{��|(Y�o���J���v�qܶ��EAA����BG�o<�Y1�K+�����۽���^�dŹ���-�v�����P��It���sh�VŦ���ޫ.H�4+�9m�ώ�Q �H���%D�@�!@�%@AFkK��B�&hCm��8'&7.A9y�P�$J�B�P�*BJp< �t���͑�W�����E- ��#�h�8��� {K�s���Zgu���m=IU�v���;!�cϴ� ��J�"r\ 0 p��+�8T��e]ĵy�rߧ\'�4s\�ep����� <�ޑ}ƶ!��9�V�Mk]R;1���4L���'2j���B��F@c���f���e=�k�t�n�?X��2W����j����&��Ꜩ�n�WԮ�5� �����G���|h�m�(ݹ��@Z��VY$?�6�qy$�'���l�z��Õ��I���pK_ny�Tlq��<��.#��)��&���0�)#;s�`k��xn5ʞ�4�-�Y� ���'�kFe���k%������=�{c9���u�L�Ut��,�`g���iS)Z��Z@���l8B��P1�y��Z�ѩ�\�㏲Vk���&K,��I4����iX!��U,���7‰l����`t��E�I��ʺN�P�(����k!�}֒�\q��-m'%؝CBh����aTn[�%Z}�C��0P>bs$�)�4���x��9E�&�^EQf��!�mt�z��8��ձhR�Rw�ͨ��{>g��(��c�C���p�� �޺ S��n� l���$lgn�ۂ����I�{݉$�XnM��-{��Z��E-����n��*Si/`s?%��8��6�<����� \7A��WZR#���Ǔi�˹ V2*��B�� �� T�=3n�&юA�� ���%��_O� ->���G�EgJ�C��a?��tl@����6��m��Q���p�s(^���*�� � �Tfi :�b��Ed���׀�|c���8^a���F�[J�~7*@n�3��2��j��mik^��NyF�J�g�������6���*@��Pg�珢�C�g��*=5���R��g�=U_��;G�~)&��M���\<�F,�]�&��<\o�L�]����g �� r޿��������$kk��i(�!�ݥE�V�E�;ͨ���헮�����5���TiTM�z��+N����Ī��J�e{ݺ6 �n$�q<�Y�!�1�yAs����.�@��U�%+�έ��u\]J�`�Mdփ��C���;3��:&�^��7NK ���øS��]��'�7v�M;�ä\BD�1�P:�J#i��u��e���mZ���<���X?eew'8��� �=��փ��.����Ԯe�c�e�O)�KZ9 rW,�����a#[���U[�03\ޚ�\QU�3���L<⵶Zv�dP2���4d5����\f���f�b���н�~��x��e�I��J��e�M2���d��oݴ���\|2M��6U�.��RYt|���O~�Q��pV���։c��C����E:�9��q@��N�D�;� �y�;M�Z�3���BԷڒ� ���Sm>�t��q�O@��=��s�6����H���EAi����e��;��ղ� p�<1T�&��79�ǽ�1#�5��'xµ��eODB��Z^;@7o5���"����xZ+�!S6�<�&�5��z���xa�.������73k�6��Yc@@ � ���D� ��� �f����438D75�/?��.�%����� � ?�7�L�)�u�����ٙ�S�]��s+���?��r.�G��e���sBBB!Y�4]��) G���g OҴʡi�͐�6QԼ�' �2wT�~��p嬙�z�q��ˆ8a�-��f^ˆҀ���L����E4�GX^�5�(�R�a���>���F�v"! �5�~>�]�,�~*2��n�M���>�i�A �B1U����<�R&��ϊz2�*FV�ѵ����Fm������Z5���~�r�T-�=� �1�7���pP��N �otg���dǟGo޶D�l�U����T��hp8��ε���n#ӊ�[y98��(�N!![p5 P�D! �� $8��r�#r�/;� �ȪD rTԨ�J��f�y��i.w�#�n ��k��V���#�b7]���|�^l61�w�ܼO�\�=�:`��s:=3��H��zJ3ʾJ*�Ղ�f����V�8�c0�pV[�u�!5��*ZRJFw�|�(�Ù���{���5$�8p �d$�A�J��rI���4��Gm������V�2����#2��Y N ��C��zy���+e-���q��H�]<�9�W{��%&��g��2���[���sq �U���f�Ӎ�<.���@�e=�$h�y��r��� ��vޢ���1���{.�%E�I�6�����L,��s���F�e`�Z�w5�� ��?� ���uzR��0U��?́o�i#���t��)�u[#�{M�9�q��劘�?�OdqOџv8���q>�}�c��Y�:E��U��tj/�i��#�7�yQl�����4��vUg�~;�h�� �@'�\�09�9���S�Z�Ӕ�K]��KE�8�l{�?V��ܪ��{ւ�ٱ�ԕ��zd�U�tD��~�*.j��[Du0=��w'�kR�q��z5 x: Dߘ���k��WYh�Y�OUg ���t�w*�>u��du�L'P�����O%���iuմ���=�JpvnZcm�^��OٱC%܌�`�����Ee�L�u���غ�^�owȭ�<2�����W���v�����[�KK_P*�_i�ͮ\A�Fl��h ŭ��K��r�E��;FB�qSUs�'�W�'v�ߴp?tës��`�bm;b��H�bN���q�A���u���ki�Ԕ4A�J���"Ƌ��36C[�sLM�k���⺖��/����z�(J+{���+�|��8Q��\����͎���g��ޫ#-s\�F@�d��CR��4;��Y,⍾[4m�1��m�>��4�9B�� .�%�WTm�w�����Z։n��+�vD! :�ZO��7����t�.�\����u0 x��K>Et61�g$�B�!@!@!T ���%�̭/ ��]Ȁy+)IF>�����Ů����;�+Y�p��xzqY�3��!r|�L����͛��O�ތ�G�{��snV/9���v{Cd˲�~JE�*T�Q����[�G���z?{� �T��pg�H�;��ϊzBHP�m�6��`�>�n>��V�G� qEh��S� ��h�rZ߭�ю+$g�� ����xU:3�4��M{�f���^'�ܰ�}�7�y$�u�V��R�s@z�Z05��4d�i�&�Эe-��6�5���]�v�a# %dU']\|��vv���~����TuN`u ��I�gn'������C\#�|j�>�t��"�3�� ��$��̈́��1��K�yƤ��;�'⦉�A ��4�B1*�J���ft{I}fɓ�� �n��n!>ۦ!���\�n�i��& �s���+>]%yŐ]�����G�8f(�m,v6�{s=� `�8:�`�G�;�à����6��`��?S(V�"6觼}��WW�lQ�p��J�l�ST]��0V��LX?��j�,��ԝc}��O�;C�)�v���,toe/�Ԩ�D��hhFí] K&��H륎l�b�+'�.��Hޛl�> j�c�woI�����`+�Jݶ�D���z'S�÷/�r�_�[A�i��7v��w/��@ ~��T�pɼ� ^�)�z34��$D ��R=�l�[�j�a��FGT&�ŧ6��V��H�S�ttY]%o�3��y O�Y�4ⴹ�����D��F���94�zx���]{ܾ+7��*;P�nX�E I7t�XW���4�KD��i���epS��B72U{)B3cZ`nc;����բ��F�<�72Bמ������\k��F��?��$-�F�� ��Z,�fHۯhsM*�4��FY��@цbP�\�ÎW:s@JE�.�������b��N�D�$d��w�]49oi��Z|Ŏu���+}�WC�6b| Ǵ�P[��ps��Q��`�����=�G6�c� �R]N&�&�Fƣ������u�� i�Yњ[�������n���eu� ~,�=�k�4d��Bז�������;���Q�˴Ѱ1����()�yT}4�5�pd&�i5��n�ZvN������eÔ�br�~^�6�����v�m���:���Q=�8���O���Pw8��汴gL,Ґ�(�]{K��<�G�����+,o��3^�暃]��j �E �i#;�Դ v�iz3ƃ� Ap&X.Iڤч~�vNBAMy�uf [��u@�v��3�5��$�V���5�����9��i?�72:ր7U����S~�"y$��Rv�C�+�5�NЋh������ �mt�+�4��*$Ұ!�1�����z��:*��3�l�sX�1���h�J�g��`{NTj#h#Q���yL�y�ڜ1,}d��h�qh�p�Eq��g,�f�����(���]'�E]�����S?y���O��G˽G� ��?�͓�$�Ew�4{���I�i=��o#�Ը�(��Ru��[�L�ܾ݉���e��������_�g��?�\oY������U�}'����A� ��������DU�e�����M�o��/.S��o�aVsd��H��a�2v8�����V琽셤� .q�"m/P�$����u/i���V�;��:���­�2��W��.�{uÖ����>���ô1���'2w�i L2 �W(�}d�9����� H�A�= �!�) �d�\D�������ԥ�f���j F�AA�p 䴖u�G��XHd����L����xb7�Ly5�X�����׳�G������a�����4���t9=KjIµ+���c�x7�Q���ߊ�!T*��� :ߣ��nb)�h+E�����̓�k�k��+MU�X� ���=�Л�Է&����ݽ����;��]��Xwj:���b��C�Z�i�>9��sRf��h����� ��y�NF��%Mn\���*TԵD*Tڥ@�SP��SR�Ir< ാ��=���|��$��\&����ʟ� ���].=�������ĴsX���U�6�����*�]q�T���s�]¾�%��> Ș������غ�C)[�x$���WL�(,P�6�m�� mķ�5��MJ`QU����3A��k�W�6 ��� �����p8��5�G��ߑ�8�8�㹠rZ�hm nM��\�/��R�D�45�Z(� 70�B��Ktu�/=uƨ�:n;�y[�)�4���Hʂ6�[yƙō?�4��P��;��Ini���l�CJ��_I��j�,m��A�)N ;o�?�o�+�ש���BQ]t�i�h�RQKu!��j.�HIu�D�W�'�EٴT�il�}YV�>�{[��������uV?Gض���.>�Ol�/�+�݌@�;� ��Kr#��yͮ N��R��b�4��f�+�N�������q���๮��'Z�hid/-m�/�����y+)���4��,�X��p0dC^��A��jk�h�|.��6݆�lu2?T��р�55�MJ�3lWL]U���C�g ��o �W���~���˯P�@H��H�۲�(rjD��OE��E�����˻L�9r��J�q�����hΝ?�b�)9�%2��׍k�(��p��D��l��8„�m 3�J�U��b���� 鵝��C+����p��;���Ŧ��L�J�6��VF���_�U�z���ϴ���I�U��"�������.�@f�I��f��8�R.��i��6�X�V���Ү�Yg�u&GK,.oU#X�H܉qp7{�h����@)�$����j���`�I��5�lQ����⊪�y f� �CҤJ��!%�\��o�:#�V�DkpVI=���h9�n�� ˾�gcm20�����u�p�kȯSj���姫�z�N �և�K�lg}�a�ⴴ��8_#�Ɨ8�h�y�EzGz�#d�Z]V{��o1�6�2;nG�4� ��[�Ⴐ����$�W1�� cƾEJ��'��S��[��Z�(i=��j�skq����1�mOF�,� �h�ܕ��0��1�P�v�a��6�N��ӆI���Ln�1�1���滑�x�l�x7�v���y���X���-�)���W�]�uN�P� � ��y�(U� a���m h&k�����(������gk� �1��M(]��7�v� �xi�K,a„,y"������j��<6V���Wm�' �c[]N ����@ܹ'U1���R��@�SR�T$�J�� &G�\5�}��� �Z�w�O�yfy��7�\O�\�n���=��(�.��v�OK��:�����2Q�q�� x�.ӦhC��=IU�ƙ��h��' �)��R��E�g��cuG!�i�}X� [K*����+�m+���F-=a�H難�����5� �p�U\���fd��#��Nh!fݪT׶�*" adL����edr8��߼�d��PKt�{\ ^�{.i��P��$l)�[�Ǎi�j�Y���8 5>��e�p'�D,8G����C������Ň�5ql���+u � ����Tv7�`;FK�� c��Tƫ�zQ�>�3�;���1�V��N>R�Ň�G'/�?�ƚZ�ˍI$�IQT�NB�2>m�6�o�K��rU�!��Ev�BiT�"�BH� � \ڧ!w2�VSKVt�D��k��p$�L��F�<QoG�� �����׍�9�Ts^���̞&L��Esx5��4�E�0�����VX�u~6�t<��sṷ~�t����O ������xR����Ů�\��Qk��� �'<��jpx���Go��R\�S.���Ë�ӯ#�: ���~�0'���t��ׁtsK��hd��4����=���^��{Z����i�EAS<|j㗔Q�E� �� b4���<��-KL�_��J6юÛoj�\�b�2m�9��Y��G�_������m�/�B����lo�H�aR93�o �=����Y#|�IBZ�h�c�5�a����sՈek���F �*N��$`sH ��2 �U7���?w+��:�!�t\�p�CP]x��c(�A)��Sj��qm> ���0:�c\�=��|����V��1�Q� ��j:�1 9c*�tx:�i�hAe p"��i��k�T�no�;�)Ri�w��WF�upJ��@�SR�pB@��6�j�R�Bˑ�W�WHv����o�v�8�uO��G`W3W.7����}:q�i=*�GĀ�,�7%(6��I�ݓ����xM�`8)����$��$�WfY,���64��4h�����^;(u$��^rf�ơ�3䎉�u�V_xg��9����N�р��ʐ59Q���4V[�4��M��X�L� 4纥5J� ����k�r4vdh����O*U�"�!����3C$�"��+G㌟6�G  �����Yt�Վ��2sN���ɉq�@#.�暁#?)��j5)[°�S1I��#��p�C�KKi#��'ǵ�^���/��LK[�7� �s�9�;ҭ�J���x�J)^�U�{'��EI����T4T�}�k�+ �~#��{j=�ﭑ��t�����\�A�/g�;���k��O��V��훧-Ή�k0|��� s����y.|G��W8���<έ�-��BKc�e�;��[_�+%g'���o��i�*V�@�#�TM��Ց�9��6��w ���ed��R l�N��M�)J������G�V�L�k?UQPK�#��(�/F᳓!d��dpȜ(���n��E���t� qX�"��Y#��t�}�`��v����FY�=҂ )`4|���]��xS. ȥr��ZFI�t�>�ƍMh��%{x0���͗� P�B�� � JS$U*B ���@!@!@!@QB!6��� ��-ςVM/F�µ��A�� �4�,�Yt�}k���X��6^�P�{��ӆV�&��u�F�v�Qmtg���g0��-.0�ݺ���jk��{�'j���H�W=�{��8ԯ.vd�$�Ҭkݺ%-ls�����|�Яn�A����dx9�g�x�Y��Ln$���)Z�sV4�cO�Y�F<~G��b��~�?� ���m%��w�џvH�)��bgH��,��f�%��B^�}��O�o�)��ү�����e�}۟Խ���.�c�$p������H�׆��u���5�x K#��.��P��i�vG0y4+��~�?�<1߳�O�~��-k�^�!|lq���q�HJZ�AT/S�Np�UgDXoG��E>;Q�y;_=�Xsy��C!��a�v�u��'9���k8v9�#��n���4 qћ���q x�nz��|���b�,�i�� ����x�we��R�EB!@$)Rn������z�x�F�-�pBF�8%E BD� �J��J��9%�<���.�Uͅ��Y)Y�$��j���b�7=�;qB���y�o �u�sV�^���qǷZF�t@�}nA�D�� ��!�k����6�C!Ƅސ���c��$/b� ֆ�ZA�P�t�yS��!e��!�!�!�!U )a�s($��7�sc�+�+��W�6y��Z�s]�k�k��P��Nٺ�[� ������zZ��X�PZ?) ��;u�\_K#w�~8�N�+A»n��lQ�]��$���]�IIC��#�kN ^�����C�<�i36���~ � ͺ?�Y�չ$o�� �Ů����^�����@ � �ZG�S�a��� m k�w �0�AA)�Ǔ,z�8�&�>��V0�Ƶw�ܵ����(7%U��-%��c�l�u�C{��&Yܻ^L�f���sZ֊�8�� 䬷[m���O�i�?x�\�L�g.{�+��1���\Ʀ�@ i�NB�s ��龝F�Yfi�� )���Cy\���\d��{��s�4��O*�N⤞�u�j.uMv⚄/���|ܮ�P�*���R � �!P$J���PMN@!*D��� �!%�D"��}��:�f�R��E�z�х�}RFS+�w1�� �x/7�O[v��^�������J�).���`����Hm�ROz'1���0�\���S٧��Q��g?W��ub{i��]ą��Ykhu�����c}���]|�*�'��ix�϶h|�*7�����!|��X�<2��H�g��k���=����8��4k1[v C/N���{Z p�ؘ�Mݧk�-��4������9B�v��y9r�^ɡt�v������4�� �V�^S�+��z�:��\�.�w�1�}X�ö���j�3ӿ��t��O!�_��C����&]����o��4 �����+���ľ�&Jw�X> �KO"��`��V �n=�a\{k?�UT�8%_F>aP�"�R ��@���P �P�B (Q�)�Oc��ÁBi ��� � � T!�R �F4�՟ q!����MN �w"|��Jr��j�7Wol�����C���R���1����[�x�B$?_����c�*n���i�W��������/(�ұ���ͻ#���q�H�� �L��V��u��/�^Ž8�Q���s�t���Zߎ�&�������y5ɖ�eA�f�7B� Y'xP�CBq`��C0ƻT�{��G�,�M� �B �#%�=�_Z��l��6<;�W�^$һߣM'G>�Nl~�x��T�5�z*AN ��Q�v~*D( d�ge���:³J��Pv&�\ ��j�F��4ێ��;A�9;楆F����ӟ-�{��-Mk@� ��*]�n��+ pp�MB�L��x�ɻ��Tdؾ���d���Fv�T��*;BBBD T!UҖ��3kG�L<胗^.q�r��^7��Wkh�3SX����U��v���7K�8��}h�̖��=K7G<�%���KJ������}^�T��/����d�c�f�*rBBBBB ��A�zF� �HBT �zi�ʗZ"���ƽkxH�v�? �C��%y�Mz1�8�aoٻ�h~ONƹ;$�^#�u�6!5��^���O�W1�.�]7F�`H��bw��\�GԮ�ݛ�p��]"�x{.�o�� �R5�S�i���څo��+��X;���Z2�M�*����FѬs^�l��9㓼�P����9���*E��cHQ�k�f�ٸs�Ժe���� ���\/N����>n?%��t���3��ަ-������´TV��1ߕ�� �=����؎ �(��} z|�T! �BD��E�4�d�\��T�R%Z@�!�* p�@�P� ���f��HF�/ԦM�&���! P"���T�A�У�g?����5^ �m� ��w#�����ndδ>9�m�����Ƹm����{z�o�;E�G��ݐ�g�>��`^Ch��^9�I�q>k��}%2��;Ee��q �i�� Y��8Uk� {g�=�2Q�pAM��Gխl|26X��y�8�TsUKT���z3�Yl��E�4�FV�]��A�~J�{��Od`9����}�q#kA����su��{.���Nװ�5��#"4\,Ӵ�qb9�=hZ� hv]�k�5���0�x�հ�+)2 R!C,�vNН��Ɂ��G��J�#� ���5�-f�=��{l�o`EA�*�2^��S�e� ��8!W6��4a��#��%Mn\���P�*)P�*D U��Ii`�I94�>M�Z�����6�k$y�KCԳ��5gl��dv�s$���W�~3�B?t���xooL�WC�z�tm���>�;G�i�z��}��M,�}�a���S��5�M q�:r�`!@!@!@!@!@!@!@��E�8q'!�=2��՝���Լ�=K���:�e�aX�A�489�c�j����^����EA0B��t}�I/�������a����<�r�9-6j�:��������+9�,"�� �������D������F{�=������t���P>�s=�?�Wn ����1�;T�Ŵ���j3�8|v��E����OS�Z�� �����-i�p��׆֓Q�Nn?/qǃ��껅���m� ��.�9�p �dAȮ?���B��0��ג=�L�6X�G.���Uev���$v����U/N]�:�T�uB�8_O��g�B�*D�6L�`��Dȃ.k?*�*D-!P� � D�F D�(���P��b�:��KI����#��+l��TB �B繬cK��5�hĹ�4s)��~��.jt��p �P�MGjPb��;ݹL���Α�&�|Խj���Y/:�/7�`] .��j+�y�^��"�[,���i�fY�s�ye��66��$V��;�W��蟭۠�f��d����R÷�;�X�.�:�����7���p�+�ccs�m ���(��fM�m���[,P0E ��|&)#!���� W"3"���aӯ��~� ����������hk�7�.f����*RK�����ߢ�IK~Z��]- G�TDӫi9�����o�Me�ء�9�Dz�$�8z��`@��k#�2� �^�C��Ѥ:�F�ܤ�� u�E��C������_m H� ��v?�m��W���.���8C��w�q澴��������$�T��Q��D�kFƁ�:E�g���u�GP�����;���yu�at.+�(�0s�y�?���{*G:��*k�E@�Ȃ����47N�J��UƐ�/�i#ۈ9�{��W��~.��Y�](׀施�[MU��Xu��7� B�T!"BD T!!!�Al�2F:7�9��X�qS��V� �m��$�2{.��89������,�������#d�#��]�����"tR �ÛN�4�!p�j7� l���G,&7bЇ��G G�'X�e��e���i9��x��#��D&�G`q�qR�|x+C��L��C�ѵ�7�#�1��:��F�fR�Ni��,�� TҔ��v)-�@�dto&�:��x���5�J�^^^?{�g.櫝R�M,���'b�r�c_?%�&��O]�H�D3R��5�����OBT����P�(�R �f�P�D*�,d�>7�NrUB�'jICِ��63����&��J�QD�J�E�e릊��t��xҍ���ܫU�I},�G >�����äu�Hb���#m���c7�$M &����Y;��دH�9饭���m��fN�/�5�`Z{ з���%�=� _[�u�{�{Ndv����kk�M6�p�'8H�ėd�%K��{F��>��9ͺ�lkh��n�N׹� ��.��h#G��� ��o��j�<ׇ &���0���R�3+�N���� ��4��kE�5~��:��5���A8�k!�c�c���i4�4T8ePuQ3E}� ~���GՏZ޺id��n!ΒS�2�٧|n��D�E#�}6^iY�O$���z�<�L��@VcQ�} t�i;`|U������H ���KC������Z��[\�7������^�����87ch��4��V��mC��y� ����81�������SPv?C�^8-��G Lmc 4k]V����` kj��cm|�2="���Z&��ZZ�� Mpk�ZV��Լ������7n ]�6���]���ˡԢ����.��M ���4{��V9��Z] d�ɥ#'�P Ay��ю�M�2e{#�ѭ܅M7*q���MN�v��I�HsIii����FE\q��諒*�-� f4� �5,�G�ZN׹� v��ql�#���G��=�|�t�q�U�$�¤�P��z7n�Ŗ)+RX���]� �5�;A�\ѕ��RBOݼ=����O����g�y�ˤ�f^�u8X-�fk�Zj�H�j��7��[uSr��]е��6,�M�ё�渐�iZjä�'d�\3�[�{��\���Б ��H���}㸡%�����i��R���U����P R!���?O�H u�[�Ok�C�=nŠ{O`�PO�+WN�YX�a�狍��5�+*|^����?Լ��n���I�v�vC�W�N�wh���u�6�^���H�� �`+UP������B?�+�n!"TFu�:;�*�ԴCxo,�q��&�4���ne>����L릏��#}���g�$�W�d����O�ZkC��;�г��;�dZ�aݶ�J`���=�c0���=��if����oٵ��Ӭs�. �sIk�X�=�0~#X:��q�yG�� Sh�[���u� -5i�GQ�E��вZE�Dq�ym��ਨߗ���+0��d����h:�|��F^�Qg�Ϡc\�a�^k�+�tz����f��U����q+ج�6F۱��h�Ɔ��'�Ayv�o���=��U�Y鹅��U�5��4{\ñ�->}�����##N��8x�sX�x�;��)�鯣�,��+g~�U�-9r!y�����~�>�h$e]��4^�9�qˎ�:l��Lb*�஻s�f,��ZL{Z�-�D%BH�R!*PN�j�)� �,X���D�.m,Œz�褕�[��:6WW$m�P�*U� �Ϊա�DJ,�J�!@�ZU�1VV�J� Hn��=Vq�KUa�*��U��aP�*���iCf��O�M��8��XK٘���S�=��lK��P ����,�ס"U�� �p�r#"3 #�J�>� $��,��ٚ�h!TB :����[���P���! 8"�!�M�T ��@�H�i;A��R���}��|�PaO)|�?Q����;#Оj�A2��kO�T�2�Cv o* �5����=��Axr��N3Qab�v��ඊ�N�����f��q��B��hWV��o��[p�B�U�`�F�YB��#��-4�p޳Ee٥c��ֶ�?j{QKќ����I�t��\e!�`����H�$��J��ؽ���q�.�|~_�˗�c�v�tf�k�b����iFKOgaܷ!���y�`��Nd� ���њe�Q��jy���������' ��|����Ci�T�)�"��PF�U�=��;�pۭ��u A�*2T!!!! �+I��v� �=D�`%$TBH� �񦲵t����K^�#�r;]�w��Y�lk&�Ĝ�㋜w�S�T>8�E�� ���FPIJ��M(�J�PR%B)r� p ��7�*D���;;8�_+}��?G�e�G��_�v(Ve~��}<���^�����`� �0�賭?Gv����s�m<׬ ��˔�f��^'i腽�ٞ�?�1�ɦ�*�a�<$�H��9��{�������T.��/�c�G�F4۫ܭ��K߳Ŏ���w�hV�����b�X�J�:5�ɟ,����$]�����߻|3x����y�+wG�p�%�V�����څ�r�~\�Q��%��չYѥ HH�*�1MS���Z�HJ��� P�6(�T))�@%H��!Q,L�h�q�P�U*˕U��<.��>#B�e ����:+�M���F��\��X� xK H¦SmK��֧,��[��,Rk�u��{.��5����@!@ �n?h�)Sm�x�(Zi�ܒ� VZB* Z��?/r?i���X0�"����t��y��F?N'��r\�.��c7UmN��a{ ���<�YF7 aZl'<�v�SO�w����ixރ^�v����޶}۽ҹ�Ci\J�^�:�h�g�5k,�>�X���/�ෂۅBBR�V���ɴV�� KLiN�]e#�@91�۷l�2�4�R�Ay�cuos�4.Y�|�"�\Mf����ku2��N<<�ޓ�?��H�ok#�&���@r ��(8���+N=ƴ4T묝�G%�ڨ}W���ݩ�NsH�Q5m��o|��ä́� ���ݑݚ�,V�L��5�9�c�����tn��\#2{�5�~T\98e�;z8��>�N��r�b� ��as#K6qB J������{��֣$#"�vYu^�YM��B�g��w��[�EB�@BHUEz�I��;@��H1�O�B���+Zi[C�?�w��`�B�{}���A�@7�P�@4���ɒ�r�d�D\Ƽ6[v�c�T V��s�*+te����Xk�r�f����������� hoXt��v{��{�q�Z|���)����-�y��� ����������鈕��)�� q:SQ$��5��8c�x���E�„l$� m/�J�"��Z�֙���1܆�oV�ު��N�J^+��z�:�����gY٭n�Vz���_�ӄ�+�����n��I,�[o����� Y��5��\m�b��1/cĮ��1� sh�I�Գ����.�Թw�n�K���Wڻ���XGS�:�\cv�-��%gYtV�&�v �rc��Gy[���U�5��.�4pc��M� �Fgvc h��G�FG�޹p۴u���۷kZ�V�������Q�u��S��ҧ�i����[��)~�ޏ_��Z�~>2��N���S?[O ��y�jk��8�lƮ|���#���T]lт'{�X��E�@{����c���dk:�M� ��|6Ԧb6QX��! �!*D��Ui�*�, R!eVXj�0�R�t�BsR�Q��FS��#=���}A]��Ϣٱ���w��%罽��QB� u��� -�x�(Zm}��ђ*��ڢ�9"Kȼ���q�q�-.<�W17q�����j�2V�������ǀ}��w�9��Nfh��ƃ̯?5�v�&ݭ����/�` g�<�Ȟ�qh�(�s��� �Q�M&~���q���.{ޱ;Gp�\J�N��z��G��}�:��Ē7���Ի+�i�(ro����6��(�RP`�4�����:�h�f� ��Z䫎� �-�g���8��~ �����q�ʮYLq�9,��Ӏ�FPǍݧ?2v-;4 ����$�gz��g m5�����V�c#Ŗ[�T�u=�YV�mq&�qZeb�08 w��RX�t�����mܝ�( iy��#0ѫ��֛M+��7X/�6��V�5�2�=�8�F Iߏ%cGC� ���м�������i�oX�M�aROy����vn.Y�����ol���f������ra��c�Ifsy����p���pW肼�e�w^�p��PT�N[�XY.���i��&�����8+V;[%e���� � Yi��vEH�é�� �S�0���+�Ll������i���M%�Xv:R+��^Y�@ޖ���!���Ni�/���4d��T$�@�M��ȣ�n�"��0斜������h�̔ޕ�i]u��%�7��� ����|�:���+xr���sϋ ���0�#�ˢ���@��,;�1�cZoґ���Z�WmV[�1�rD潍����o4���M֍�Ecv����_�q��g�(h��#� �X����A1a]s�3�U�,f�M�1��ch{��\��s�vF e��u���Q�7o�W.|򚩏���6� �ݩoi�P��p{Mx�,��@�flvGL�5βFe��ը5ִ����(��Ȭ�˖V���?�"�w�c��d��#�|�����GZ��9�vx��������/�X�+ a%��^>AX�[�ȥ�[�ȫy���x0�����D"7�q8��>��7��A sZ^{�n_-�ڇ�� �¸��'ZݾE'ZݾE\���j��a��= �kv�u���Wd� D�����$0ct1{�ɣd id���..�h$�p��YQ�<��hq���f�N�uQ\f�9]E���B\��<������ �V�e�ȠJݾEzdy�=7�n�"���|�!h�����ȣ�n�"�?Ih+5�Rx"�{�/��x�;L}��٤|S]Yb>=��W�u���Q}�|����n��΋�`�pVF�"��0�������+Mtf�k��]������Ϊ̇��$+�4�с�$��������y���HX�C����*�y�E�v+%H��R�$����Z�FBUWaP����~G���=�*�,BT�ZTX�෋5"�GjNs���R��A�'ih?����Ay��DB���X�ʏ?�!����Bm��Šz/�_ ���%���A�[��;����� N���