MINI Sh3ll

Path : /usr/src/linux-headers-5.15.0-138/arch/arm/crypto/
File Upload :
Current File : //usr/src/linux-headers-5.15.0-138/arch/arm/crypto/poly1305-armv4.pl

#!/usr/bin/env perl
# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
#
# ====================================================================
# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL
# project.
# ====================================================================
#
#			IALU(*)/gcc-4.4		NEON
#
# ARM11xx(ARMv6)	7.78/+100%		-
# Cortex-A5		6.35/+130%		3.00
# Cortex-A8		6.25/+115%		2.36
# Cortex-A9		5.10/+95%		2.55
# Cortex-A15		3.85/+85%		1.25(**)
# Snapdragon S4		5.70/+100%		1.48(**)
#
# (*)	this is for -march=armv6, i.e. with bunch of ldrb loading data;
# (**)	these are trade-off results, they can be improved by ~8% but at
#	the cost of 15/12% regression on Cortex-A5/A7, it's even possible
#	to improve Cortex-A9 result, but then A5/A7 loose more than 20%;

$flavour = shift;
if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }

if ($flavour && $flavour ne "void") {
    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
    die "can't locate arm-xlate.pl";

    open STDOUT,"| \"$^X\" $xlate $flavour $output";
} else {
    open STDOUT,">$output";
}

($ctx,$inp,$len,$padbit)=map("r$_",(0..3));

$code.=<<___;
#ifndef	__KERNEL__
# include "arm_arch.h"
#else
# define __ARM_ARCH__ __LINUX_ARM_ARCH__
# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
# define poly1305_init   poly1305_init_arm
# define poly1305_blocks poly1305_blocks_arm
# define poly1305_emit   poly1305_emit_arm
.globl	poly1305_blocks_neon
#endif

#if defined(__thumb2__)
.syntax	unified
.thumb
#else
.code	32
#endif

.text

.globl	poly1305_emit
.globl	poly1305_blocks
.globl	poly1305_init
.type	poly1305_init,%function
.align	5
poly1305_init:
.Lpoly1305_init:
	stmdb	sp!,{r4-r11}

	eor	r3,r3,r3
	cmp	$inp,#0
	str	r3,[$ctx,#0]		@ zero hash value
	str	r3,[$ctx,#4]
	str	r3,[$ctx,#8]
	str	r3,[$ctx,#12]
	str	r3,[$ctx,#16]
	str	r3,[$ctx,#36]		@ clear is_base2_26
	add	$ctx,$ctx,#20

#ifdef	__thumb2__
	it	eq
#endif
	moveq	r0,#0
	beq	.Lno_key

#if	__ARM_MAX_ARCH__>=7
	mov	r3,#-1
	str	r3,[$ctx,#28]		@ impossible key power value
# ifndef __KERNEL__
	adr	r11,.Lpoly1305_init
	ldr	r12,.LOPENSSL_armcap
# endif
#endif
	ldrb	r4,[$inp,#0]
	mov	r10,#0x0fffffff
	ldrb	r5,[$inp,#1]
	and	r3,r10,#-4		@ 0x0ffffffc
	ldrb	r6,[$inp,#2]
	ldrb	r7,[$inp,#3]
	orr	r4,r4,r5,lsl#8
	ldrb	r5,[$inp,#4]
	orr	r4,r4,r6,lsl#16
	ldrb	r6,[$inp,#5]
	orr	r4,r4,r7,lsl#24
	ldrb	r7,[$inp,#6]
	and	r4,r4,r10

#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
# if !defined(_WIN32)
	ldr	r12,[r11,r12]		@ OPENSSL_armcap_P
# endif
# if defined(__APPLE__) || defined(_WIN32)
	ldr	r12,[r12]
# endif
#endif
	ldrb	r8,[$inp,#7]
	orr	r5,r5,r6,lsl#8
	ldrb	r6,[$inp,#8]
	orr	r5,r5,r7,lsl#16
	ldrb	r7,[$inp,#9]
	orr	r5,r5,r8,lsl#24
	ldrb	r8,[$inp,#10]
	and	r5,r5,r3

#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
	tst	r12,#ARMV7_NEON		@ check for NEON
# ifdef	__thumb2__
	adr	r9,.Lpoly1305_blocks_neon
	adr	r11,.Lpoly1305_blocks
	it	ne
	movne	r11,r9
	adr	r12,.Lpoly1305_emit
	orr	r11,r11,#1		@ thumb-ify addresses
	orr	r12,r12,#1
# else
	add	r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init)
	ite	eq
	addeq	r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init)
	addne	r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init)
# endif
#endif
	ldrb	r9,[$inp,#11]
	orr	r6,r6,r7,lsl#8
	ldrb	r7,[$inp,#12]
	orr	r6,r6,r8,lsl#16
	ldrb	r8,[$inp,#13]
	orr	r6,r6,r9,lsl#24
	ldrb	r9,[$inp,#14]
	and	r6,r6,r3

	ldrb	r10,[$inp,#15]
	orr	r7,r7,r8,lsl#8
	str	r4,[$ctx,#0]
	orr	r7,r7,r9,lsl#16
	str	r5,[$ctx,#4]
	orr	r7,r7,r10,lsl#24
	str	r6,[$ctx,#8]
	and	r7,r7,r3
	str	r7,[$ctx,#12]
#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
	stmia	r2,{r11,r12}		@ fill functions table
	mov	r0,#1
#else
	mov	r0,#0
#endif
.Lno_key:
	ldmia	sp!,{r4-r11}
#if	__ARM_ARCH__>=5
	ret				@ bx	lr
#else
	tst	lr,#1
	moveq	pc,lr			@ be binary compatible with V4, yet
	bx	lr			@ interoperable with Thumb ISA:-)
#endif
.size	poly1305_init,.-poly1305_init
___
{
my ($h0,$h1,$h2,$h3,$h4,$r0,$r1,$r2,$r3)=map("r$_",(4..12));
my ($s1,$s2,$s3)=($r1,$r2,$r3);

$code.=<<___;
.type	poly1305_blocks,%function
.align	5
poly1305_blocks:
.Lpoly1305_blocks:
	stmdb	sp!,{r3-r11,lr}

	ands	$len,$len,#-16
	beq	.Lno_data

	add	$len,$len,$inp		@ end pointer
	sub	sp,sp,#32

#if __ARM_ARCH__<7
	ldmia	$ctx,{$h0-$r3}		@ load context
	add	$ctx,$ctx,#20
	str	$len,[sp,#16]		@ offload stuff
	str	$ctx,[sp,#12]
#else
	ldr	lr,[$ctx,#36]		@ is_base2_26
	ldmia	$ctx!,{$h0-$h4}		@ load hash value
	str	$len,[sp,#16]		@ offload stuff
	str	$ctx,[sp,#12]

	adds	$r0,$h0,$h1,lsl#26	@ base 2^26 -> base 2^32
	mov	$r1,$h1,lsr#6
	adcs	$r1,$r1,$h2,lsl#20
	mov	$r2,$h2,lsr#12
	adcs	$r2,$r2,$h3,lsl#14
	mov	$r3,$h3,lsr#18
	adcs	$r3,$r3,$h4,lsl#8
	mov	$len,#0
	teq	lr,#0
	str	$len,[$ctx,#16]		@ clear is_base2_26
	adc	$len,$len,$h4,lsr#24

	itttt	ne
	movne	$h0,$r0			@ choose between radixes
	movne	$h1,$r1
	movne	$h2,$r2
	movne	$h3,$r3
	ldmia	$ctx,{$r0-$r3}		@ load key
	it	ne
	movne	$h4,$len
#endif

	mov	lr,$inp
	cmp	$padbit,#0
	str	$r1,[sp,#20]
	str	$r2,[sp,#24]
	str	$r3,[sp,#28]
	b	.Loop

.align	4
.Loop:
#if __ARM_ARCH__<7
	ldrb	r0,[lr],#16		@ load input
# ifdef	__thumb2__
	it	hi
# endif
	addhi	$h4,$h4,#1		@ 1<<128
	ldrb	r1,[lr,#-15]
	ldrb	r2,[lr,#-14]
	ldrb	r3,[lr,#-13]
	orr	r1,r0,r1,lsl#8
	ldrb	r0,[lr,#-12]
	orr	r2,r1,r2,lsl#16
	ldrb	r1,[lr,#-11]
	orr	r3,r2,r3,lsl#24
	ldrb	r2,[lr,#-10]
	adds	$h0,$h0,r3		@ accumulate input

	ldrb	r3,[lr,#-9]
	orr	r1,r0,r1,lsl#8
	ldrb	r0,[lr,#-8]
	orr	r2,r1,r2,lsl#16
	ldrb	r1,[lr,#-7]
	orr	r3,r2,r3,lsl#24
	ldrb	r2,[lr,#-6]
	adcs	$h1,$h1,r3

	ldrb	r3,[lr,#-5]
	orr	r1,r0,r1,lsl#8
	ldrb	r0,[lr,#-4]
	orr	r2,r1,r2,lsl#16
	ldrb	r1,[lr,#-3]
	orr	r3,r2,r3,lsl#24
	ldrb	r2,[lr,#-2]
	adcs	$h2,$h2,r3

	ldrb	r3,[lr,#-1]
	orr	r1,r0,r1,lsl#8
	str	lr,[sp,#8]		@ offload input pointer
	orr	r2,r1,r2,lsl#16
	add	$s1,$r1,$r1,lsr#2
	orr	r3,r2,r3,lsl#24
#else
	ldr	r0,[lr],#16		@ load input
	it	hi
	addhi	$h4,$h4,#1		@ padbit
	ldr	r1,[lr,#-12]
	ldr	r2,[lr,#-8]
	ldr	r3,[lr,#-4]
# ifdef	__ARMEB__
	rev	r0,r0
	rev	r1,r1
	rev	r2,r2
	rev	r3,r3
# endif
	adds	$h0,$h0,r0		@ accumulate input
	str	lr,[sp,#8]		@ offload input pointer
	adcs	$h1,$h1,r1
	add	$s1,$r1,$r1,lsr#2
	adcs	$h2,$h2,r2
#endif
	add	$s2,$r2,$r2,lsr#2
	adcs	$h3,$h3,r3
	add	$s3,$r3,$r3,lsr#2

	umull	r2,r3,$h1,$r0
	 adc	$h4,$h4,#0
	umull	r0,r1,$h0,$r0
	umlal	r2,r3,$h4,$s1
	umlal	r0,r1,$h3,$s1
	ldr	$r1,[sp,#20]		@ reload $r1
	umlal	r2,r3,$h2,$s3
	umlal	r0,r1,$h1,$s3
	umlal	r2,r3,$h3,$s2
	umlal	r0,r1,$h2,$s2
	umlal	r2,r3,$h0,$r1
	str	r0,[sp,#0]		@ future $h0
	 mul	r0,$s2,$h4
	ldr	$r2,[sp,#24]		@ reload $r2
	adds	r2,r2,r1		@ d1+=d0>>32
	 eor	r1,r1,r1
	adc	lr,r3,#0		@ future $h2
	str	r2,[sp,#4]		@ future $h1

	mul	r2,$s3,$h4
	eor	r3,r3,r3
	umlal	r0,r1,$h3,$s3
	ldr	$r3,[sp,#28]		@ reload $r3
	umlal	r2,r3,$h3,$r0
	umlal	r0,r1,$h2,$r0
	umlal	r2,r3,$h2,$r1
	umlal	r0,r1,$h1,$r1
	umlal	r2,r3,$h1,$r2
	umlal	r0,r1,$h0,$r2
	umlal	r2,r3,$h0,$r3
	ldr	$h0,[sp,#0]
	mul	$h4,$r0,$h4
	ldr	$h1,[sp,#4]

	adds	$h2,lr,r0		@ d2+=d1>>32
	ldr	lr,[sp,#8]		@ reload input pointer
	adc	r1,r1,#0
	adds	$h3,r2,r1		@ d3+=d2>>32
	ldr	r0,[sp,#16]		@ reload end pointer
	adc	r3,r3,#0
	add	$h4,$h4,r3		@ h4+=d3>>32

	and	r1,$h4,#-4
	and	$h4,$h4,#3
	add	r1,r1,r1,lsr#2		@ *=5
	adds	$h0,$h0,r1
	adcs	$h1,$h1,#0
	adcs	$h2,$h2,#0
	adcs	$h3,$h3,#0
	adc	$h4,$h4,#0

	cmp	r0,lr			@ done yet?
	bhi	.Loop

	ldr	$ctx,[sp,#12]
	add	sp,sp,#32
	stmdb	$ctx,{$h0-$h4}		@ store the result

.Lno_data:
#if	__ARM_ARCH__>=5
	ldmia	sp!,{r3-r11,pc}
#else
	ldmia	sp!,{r3-r11,lr}
	tst	lr,#1
	moveq	pc,lr			@ be binary compatible with V4, yet
	bx	lr			@ interoperable with Thumb ISA:-)
#endif
.size	poly1305_blocks,.-poly1305_blocks
___
}
{
my ($ctx,$mac,$nonce)=map("r$_",(0..2));
my ($h0,$h1,$h2,$h3,$h4,$g0,$g1,$g2,$g3)=map("r$_",(3..11));
my $g4=$ctx;

$code.=<<___;
.type	poly1305_emit,%function
.align	5
poly1305_emit:
.Lpoly1305_emit:
	stmdb	sp!,{r4-r11}

	ldmia	$ctx,{$h0-$h4}

#if __ARM_ARCH__>=7
	ldr	ip,[$ctx,#36]		@ is_base2_26

	adds	$g0,$h0,$h1,lsl#26	@ base 2^26 -> base 2^32
	mov	$g1,$h1,lsr#6
	adcs	$g1,$g1,$h2,lsl#20
	mov	$g2,$h2,lsr#12
	adcs	$g2,$g2,$h3,lsl#14
	mov	$g3,$h3,lsr#18
	adcs	$g3,$g3,$h4,lsl#8
	mov	$g4,#0
	adc	$g4,$g4,$h4,lsr#24

	tst	ip,ip
	itttt	ne
	movne	$h0,$g0
	movne	$h1,$g1
	movne	$h2,$g2
	movne	$h3,$g3
	it	ne
	movne	$h4,$g4
#endif

	adds	$g0,$h0,#5		@ compare to modulus
	adcs	$g1,$h1,#0
	adcs	$g2,$h2,#0
	adcs	$g3,$h3,#0
	adc	$g4,$h4,#0
	tst	$g4,#4			@ did it carry/borrow?

#ifdef	__thumb2__
	it	ne
#endif
	movne	$h0,$g0
	ldr	$g0,[$nonce,#0]
#ifdef	__thumb2__
	it	ne
#endif
	movne	$h1,$g1
	ldr	$g1,[$nonce,#4]
#ifdef	__thumb2__
	it	ne
#endif
	movne	$h2,$g2
	ldr	$g2,[$nonce,#8]
#ifdef	__thumb2__
	it	ne
#endif
	movne	$h3,$g3
	ldr	$g3,[$nonce,#12]

	adds	$h0,$h0,$g0
	adcs	$h1,$h1,$g1
	adcs	$h2,$h2,$g2
	adc	$h3,$h3,$g3

#if __ARM_ARCH__>=7
# ifdef __ARMEB__
	rev	$h0,$h0
	rev	$h1,$h1
	rev	$h2,$h2
	rev	$h3,$h3
# endif
	str	$h0,[$mac,#0]
	str	$h1,[$mac,#4]
	str	$h2,[$mac,#8]
	str	$h3,[$mac,#12]
#else
	strb	$h0,[$mac,#0]
	mov	$h0,$h0,lsr#8
	strb	$h1,[$mac,#4]
	mov	$h1,$h1,lsr#8
	strb	$h2,[$mac,#8]
	mov	$h2,$h2,lsr#8
	strb	$h3,[$mac,#12]
	mov	$h3,$h3,lsr#8

	strb	$h0,[$mac,#1]
	mov	$h0,$h0,lsr#8
	strb	$h1,[$mac,#5]
	mov	$h1,$h1,lsr#8
	strb	$h2,[$mac,#9]
	mov	$h2,$h2,lsr#8
	strb	$h3,[$mac,#13]
	mov	$h3,$h3,lsr#8

	strb	$h0,[$mac,#2]
	mov	$h0,$h0,lsr#8
	strb	$h1,[$mac,#6]
	mov	$h1,$h1,lsr#8
	strb	$h2,[$mac,#10]
	mov	$h2,$h2,lsr#8
	strb	$h3,[$mac,#14]
	mov	$h3,$h3,lsr#8

	strb	$h0,[$mac,#3]
	strb	$h1,[$mac,#7]
	strb	$h2,[$mac,#11]
	strb	$h3,[$mac,#15]
#endif
	ldmia	sp!,{r4-r11}
#if	__ARM_ARCH__>=5
	ret				@ bx	lr
#else
	tst	lr,#1
	moveq	pc,lr			@ be binary compatible with V4, yet
	bx	lr			@ interoperable with Thumb ISA:-)
#endif
.size	poly1305_emit,.-poly1305_emit
___
{
my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("d$_",(0..9));
my ($D0,$D1,$D2,$D3,$D4, $H0,$H1,$H2,$H3,$H4) = map("q$_",(5..14));
my ($T0,$T1,$MASK) = map("q$_",(15,4,0));

my ($in2,$zeros,$tbl0,$tbl1) = map("r$_",(4..7));

$code.=<<___;
#if	__ARM_MAX_ARCH__>=7
.fpu	neon

.type	poly1305_init_neon,%function
.align	5
poly1305_init_neon:
.Lpoly1305_init_neon:
	ldr	r3,[$ctx,#48]		@ first table element
	cmp	r3,#-1			@ is value impossible?
	bne	.Lno_init_neon

	ldr	r4,[$ctx,#20]		@ load key base 2^32
	ldr	r5,[$ctx,#24]
	ldr	r6,[$ctx,#28]
	ldr	r7,[$ctx,#32]

	and	r2,r4,#0x03ffffff	@ base 2^32 -> base 2^26
	mov	r3,r4,lsr#26
	mov	r4,r5,lsr#20
	orr	r3,r3,r5,lsl#6
	mov	r5,r6,lsr#14
	orr	r4,r4,r6,lsl#12
	mov	r6,r7,lsr#8
	orr	r5,r5,r7,lsl#18
	and	r3,r3,#0x03ffffff
	and	r4,r4,#0x03ffffff
	and	r5,r5,#0x03ffffff

	vdup.32	$R0,r2			@ r^1 in both lanes
	add	r2,r3,r3,lsl#2		@ *5
	vdup.32	$R1,r3
	add	r3,r4,r4,lsl#2
	vdup.32	$S1,r2
	vdup.32	$R2,r4
	add	r4,r5,r5,lsl#2
	vdup.32	$S2,r3
	vdup.32	$R3,r5
	add	r5,r6,r6,lsl#2
	vdup.32	$S3,r4
	vdup.32	$R4,r6
	vdup.32	$S4,r5

	mov	$zeros,#2		@ counter

.Lsquare_neon:
	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	@ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
	@ d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
	@ d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
	@ d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
	@ d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4

	vmull.u32	$D0,$R0,${R0}[1]
	vmull.u32	$D1,$R1,${R0}[1]
	vmull.u32	$D2,$R2,${R0}[1]
	vmull.u32	$D3,$R3,${R0}[1]
	vmull.u32	$D4,$R4,${R0}[1]

	vmlal.u32	$D0,$R4,${S1}[1]
	vmlal.u32	$D1,$R0,${R1}[1]
	vmlal.u32	$D2,$R1,${R1}[1]
	vmlal.u32	$D3,$R2,${R1}[1]
	vmlal.u32	$D4,$R3,${R1}[1]

	vmlal.u32	$D0,$R3,${S2}[1]
	vmlal.u32	$D1,$R4,${S2}[1]
	vmlal.u32	$D3,$R1,${R2}[1]
	vmlal.u32	$D2,$R0,${R2}[1]
	vmlal.u32	$D4,$R2,${R2}[1]

	vmlal.u32	$D0,$R2,${S3}[1]
	vmlal.u32	$D3,$R0,${R3}[1]
	vmlal.u32	$D1,$R3,${S3}[1]
	vmlal.u32	$D2,$R4,${S3}[1]
	vmlal.u32	$D4,$R1,${R3}[1]

	vmlal.u32	$D3,$R4,${S4}[1]
	vmlal.u32	$D0,$R1,${S4}[1]
	vmlal.u32	$D1,$R2,${S4}[1]
	vmlal.u32	$D2,$R3,${S4}[1]
	vmlal.u32	$D4,$R0,${R4}[1]

	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	@ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
	@ and P. Schwabe
	@
	@ H0>>+H1>>+H2>>+H3>>+H4
	@ H3>>+H4>>*5+H0>>+H1
	@
	@ Trivia.
	@
	@ Result of multiplication of n-bit number by m-bit number is
	@ n+m bits wide. However! Even though 2^n is a n+1-bit number,
	@ m-bit number multiplied by 2^n is still n+m bits wide.
	@
	@ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2,
	@ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit
	@ one is n+1 bits wide.
	@
	@ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that
	@ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4
	@ can be 27. However! In cases when their width exceeds 26 bits
	@ they are limited by 2^26+2^6. This in turn means that *sum*
	@ of the products with these values can still be viewed as sum
	@ of 52-bit numbers as long as the amount of addends is not a
	@ power of 2. For example,
	@
	@ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4,
	@
	@ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or
	@ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than
	@ 8 * (2^52) or 2^55. However, the value is then multiplied by
	@ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12),
	@ which is less than 32 * (2^52) or 2^57. And when processing
	@ data we are looking at triple as many addends...
	@
	@ In key setup procedure pre-reduced H0 is limited by 5*4+1 and
	@ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the
	@ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while
	@ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32
	@ instruction accepts 2x32-bit input and writes 2x64-bit result.
	@ This means that result of reduction have to be compressed upon
	@ loop wrap-around. This can be done in the process of reduction
	@ to minimize amount of instructions [as well as amount of
	@ 128-bit instructions, which benefits low-end processors], but
	@ one has to watch for H2 (which is narrower than H0) and 5*H4
	@ not being wider than 58 bits, so that result of right shift
	@ by 26 bits fits in 32 bits. This is also useful on x86,
	@ because it allows to use paddd in place for paddq, which
	@ benefits Atom, where paddq is ridiculously slow.

	vshr.u64	$T0,$D3,#26
	vmovn.i64	$D3#lo,$D3
	 vshr.u64	$T1,$D0,#26
	 vmovn.i64	$D0#lo,$D0
	vadd.i64	$D4,$D4,$T0		@ h3 -> h4
	vbic.i32	$D3#lo,#0xfc000000	@ &=0x03ffffff
	 vadd.i64	$D1,$D1,$T1		@ h0 -> h1
	 vbic.i32	$D0#lo,#0xfc000000

	vshrn.u64	$T0#lo,$D4,#26
	vmovn.i64	$D4#lo,$D4
	 vshr.u64	$T1,$D1,#26
	 vmovn.i64	$D1#lo,$D1
	 vadd.i64	$D2,$D2,$T1		@ h1 -> h2
	vbic.i32	$D4#lo,#0xfc000000
	 vbic.i32	$D1#lo,#0xfc000000

	vadd.i32	$D0#lo,$D0#lo,$T0#lo
	vshl.u32	$T0#lo,$T0#lo,#2
	 vshrn.u64	$T1#lo,$D2,#26
	 vmovn.i64	$D2#lo,$D2
	vadd.i32	$D0#lo,$D0#lo,$T0#lo	@ h4 -> h0
	 vadd.i32	$D3#lo,$D3#lo,$T1#lo	@ h2 -> h3
	 vbic.i32	$D2#lo,#0xfc000000

	vshr.u32	$T0#lo,$D0#lo,#26
	vbic.i32	$D0#lo,#0xfc000000
	 vshr.u32	$T1#lo,$D3#lo,#26
	 vbic.i32	$D3#lo,#0xfc000000
	vadd.i32	$D1#lo,$D1#lo,$T0#lo	@ h0 -> h1
	 vadd.i32	$D4#lo,$D4#lo,$T1#lo	@ h3 -> h4

	subs		$zeros,$zeros,#1
	beq		.Lsquare_break_neon

	add		$tbl0,$ctx,#(48+0*9*4)
	add		$tbl1,$ctx,#(48+1*9*4)

	vtrn.32		$R0,$D0#lo		@ r^2:r^1
	vtrn.32		$R2,$D2#lo
	vtrn.32		$R3,$D3#lo
	vtrn.32		$R1,$D1#lo
	vtrn.32		$R4,$D4#lo

	vshl.u32	$S2,$R2,#2		@ *5
	vshl.u32	$S3,$R3,#2
	vshl.u32	$S1,$R1,#2
	vshl.u32	$S4,$R4,#2
	vadd.i32	$S2,$S2,$R2
	vadd.i32	$S1,$S1,$R1
	vadd.i32	$S3,$S3,$R3
	vadd.i32	$S4,$S4,$R4

	vst4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!
	vst4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!
	vst4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
	vst4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
	vst1.32		{${S4}[0]},[$tbl0,:32]
	vst1.32		{${S4}[1]},[$tbl1,:32]

	b		.Lsquare_neon

.align	4
.Lsquare_break_neon:
	add		$tbl0,$ctx,#(48+2*4*9)
	add		$tbl1,$ctx,#(48+3*4*9)

	vmov		$R0,$D0#lo		@ r^4:r^3
	vshl.u32	$S1,$D1#lo,#2		@ *5
	vmov		$R1,$D1#lo
	vshl.u32	$S2,$D2#lo,#2
	vmov		$R2,$D2#lo
	vshl.u32	$S3,$D3#lo,#2
	vmov		$R3,$D3#lo
	vshl.u32	$S4,$D4#lo,#2
	vmov		$R4,$D4#lo
	vadd.i32	$S1,$S1,$D1#lo
	vadd.i32	$S2,$S2,$D2#lo
	vadd.i32	$S3,$S3,$D3#lo
	vadd.i32	$S4,$S4,$D4#lo

	vst4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!
	vst4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!
	vst4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
	vst4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
	vst1.32		{${S4}[0]},[$tbl0]
	vst1.32		{${S4}[1]},[$tbl1]

.Lno_init_neon:
	ret				@ bx	lr
.size	poly1305_init_neon,.-poly1305_init_neon

.type	poly1305_blocks_neon,%function
.align	5
poly1305_blocks_neon:
.Lpoly1305_blocks_neon:
	ldr	ip,[$ctx,#36]		@ is_base2_26

	cmp	$len,#64
	blo	.Lpoly1305_blocks

	stmdb	sp!,{r4-r7}
	vstmdb	sp!,{d8-d15}		@ ABI specification says so

	tst	ip,ip			@ is_base2_26?
	bne	.Lbase2_26_neon

	stmdb	sp!,{r1-r3,lr}
	bl	.Lpoly1305_init_neon

	ldr	r4,[$ctx,#0]		@ load hash value base 2^32
	ldr	r5,[$ctx,#4]
	ldr	r6,[$ctx,#8]
	ldr	r7,[$ctx,#12]
	ldr	ip,[$ctx,#16]

	and	r2,r4,#0x03ffffff	@ base 2^32 -> base 2^26
	mov	r3,r4,lsr#26
	 veor	$D0#lo,$D0#lo,$D0#lo
	mov	r4,r5,lsr#20
	orr	r3,r3,r5,lsl#6
	 veor	$D1#lo,$D1#lo,$D1#lo
	mov	r5,r6,lsr#14
	orr	r4,r4,r6,lsl#12
	 veor	$D2#lo,$D2#lo,$D2#lo
	mov	r6,r7,lsr#8
	orr	r5,r5,r7,lsl#18
	 veor	$D3#lo,$D3#lo,$D3#lo
	and	r3,r3,#0x03ffffff
	orr	r6,r6,ip,lsl#24
	 veor	$D4#lo,$D4#lo,$D4#lo
	and	r4,r4,#0x03ffffff
	mov	r1,#1
	and	r5,r5,#0x03ffffff
	str	r1,[$ctx,#36]		@ set is_base2_26

	vmov.32	$D0#lo[0],r2
	vmov.32	$D1#lo[0],r3
	vmov.32	$D2#lo[0],r4
	vmov.32	$D3#lo[0],r5
	vmov.32	$D4#lo[0],r6
	adr	$zeros,.Lzeros

	ldmia	sp!,{r1-r3,lr}
	b	.Lhash_loaded

.align	4
.Lbase2_26_neon:
	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	@ load hash value

	veor		$D0#lo,$D0#lo,$D0#lo
	veor		$D1#lo,$D1#lo,$D1#lo
	veor		$D2#lo,$D2#lo,$D2#lo
	veor		$D3#lo,$D3#lo,$D3#lo
	veor		$D4#lo,$D4#lo,$D4#lo
	vld4.32		{$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]!
	adr		$zeros,.Lzeros
	vld1.32		{$D4#lo[0]},[$ctx]
	sub		$ctx,$ctx,#16		@ rewind

.Lhash_loaded:
	add		$in2,$inp,#32
	mov		$padbit,$padbit,lsl#24
	tst		$len,#31
	beq		.Leven

	vld4.32		{$H0#lo[0],$H1#lo[0],$H2#lo[0],$H3#lo[0]},[$inp]!
	vmov.32		$H4#lo[0],$padbit
	sub		$len,$len,#16
	add		$in2,$inp,#32

# ifdef	__ARMEB__
	vrev32.8	$H0,$H0
	vrev32.8	$H3,$H3
	vrev32.8	$H1,$H1
	vrev32.8	$H2,$H2
# endif
	vsri.u32	$H4#lo,$H3#lo,#8	@ base 2^32 -> base 2^26
	vshl.u32	$H3#lo,$H3#lo,#18

	vsri.u32	$H3#lo,$H2#lo,#14
	vshl.u32	$H2#lo,$H2#lo,#12
	vadd.i32	$H4#hi,$H4#lo,$D4#lo	@ add hash value and move to #hi

	vbic.i32	$H3#lo,#0xfc000000
	vsri.u32	$H2#lo,$H1#lo,#20
	vshl.u32	$H1#lo,$H1#lo,#6

	vbic.i32	$H2#lo,#0xfc000000
	vsri.u32	$H1#lo,$H0#lo,#26
	vadd.i32	$H3#hi,$H3#lo,$D3#lo

	vbic.i32	$H0#lo,#0xfc000000
	vbic.i32	$H1#lo,#0xfc000000
	vadd.i32	$H2#hi,$H2#lo,$D2#lo

	vadd.i32	$H0#hi,$H0#lo,$D0#lo
	vadd.i32	$H1#hi,$H1#lo,$D1#lo

	mov		$tbl1,$zeros
	add		$tbl0,$ctx,#48

	cmp		$len,$len
	b		.Long_tail

.align	4
.Leven:
	subs		$len,$len,#64
	it		lo
	movlo		$in2,$zeros

	vmov.i32	$H4,#1<<24		@ padbit, yes, always
	vld4.32		{$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp]	@ inp[0:1]
	add		$inp,$inp,#64
	vld4.32		{$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2]	@ inp[2:3] (or 0)
	add		$in2,$in2,#64
	itt		hi
	addhi		$tbl1,$ctx,#(48+1*9*4)
	addhi		$tbl0,$ctx,#(48+3*9*4)

# ifdef	__ARMEB__
	vrev32.8	$H0,$H0
	vrev32.8	$H3,$H3
	vrev32.8	$H1,$H1
	vrev32.8	$H2,$H2
# endif
	vsri.u32	$H4,$H3,#8		@ base 2^32 -> base 2^26
	vshl.u32	$H3,$H3,#18

	vsri.u32	$H3,$H2,#14
	vshl.u32	$H2,$H2,#12

	vbic.i32	$H3,#0xfc000000
	vsri.u32	$H2,$H1,#20
	vshl.u32	$H1,$H1,#6

	vbic.i32	$H2,#0xfc000000
	vsri.u32	$H1,$H0,#26

	vbic.i32	$H0,#0xfc000000
	vbic.i32	$H1,#0xfc000000

	bls		.Lskip_loop

	vld4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!	@ load r^2
	vld4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!	@ load r^4
	vld4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
	vld4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
	b		.Loop_neon

.align	5
.Loop_neon:
	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	@ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
	@ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
	@   \___________________/
	@ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
	@ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
	@   \___________________/ \____________________/
	@
	@ Note that we start with inp[2:3]*r^2. This is because it
	@ doesn't depend on reduction in previous iteration.
	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	@ d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
	@ d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
	@ d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
	@ d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
	@ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4

	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	@ inp[2:3]*r^2

	vadd.i32	$H2#lo,$H2#lo,$D2#lo	@ accumulate inp[0:1]
	vmull.u32	$D2,$H2#hi,${R0}[1]
	vadd.i32	$H0#lo,$H0#lo,$D0#lo
	vmull.u32	$D0,$H0#hi,${R0}[1]
	vadd.i32	$H3#lo,$H3#lo,$D3#lo
	vmull.u32	$D3,$H3#hi,${R0}[1]
	vmlal.u32	$D2,$H1#hi,${R1}[1]
	vadd.i32	$H1#lo,$H1#lo,$D1#lo
	vmull.u32	$D1,$H1#hi,${R0}[1]

	vadd.i32	$H4#lo,$H4#lo,$D4#lo
	vmull.u32	$D4,$H4#hi,${R0}[1]
	subs		$len,$len,#64
	vmlal.u32	$D0,$H4#hi,${S1}[1]
	it		lo
	movlo		$in2,$zeros
	vmlal.u32	$D3,$H2#hi,${R1}[1]
	vld1.32		${S4}[1],[$tbl1,:32]
	vmlal.u32	$D1,$H0#hi,${R1}[1]
	vmlal.u32	$D4,$H3#hi,${R1}[1]

	vmlal.u32	$D0,$H3#hi,${S2}[1]
	vmlal.u32	$D3,$H1#hi,${R2}[1]
	vmlal.u32	$D4,$H2#hi,${R2}[1]
	vmlal.u32	$D1,$H4#hi,${S2}[1]
	vmlal.u32	$D2,$H0#hi,${R2}[1]

	vmlal.u32	$D3,$H0#hi,${R3}[1]
	vmlal.u32	$D0,$H2#hi,${S3}[1]
	vmlal.u32	$D4,$H1#hi,${R3}[1]
	vmlal.u32	$D1,$H3#hi,${S3}[1]
	vmlal.u32	$D2,$H4#hi,${S3}[1]

	vmlal.u32	$D3,$H4#hi,${S4}[1]
	vmlal.u32	$D0,$H1#hi,${S4}[1]
	vmlal.u32	$D4,$H0#hi,${R4}[1]
	vmlal.u32	$D1,$H2#hi,${S4}[1]
	vmlal.u32	$D2,$H3#hi,${S4}[1]

	vld4.32		{$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2]	@ inp[2:3] (or 0)
	add		$in2,$in2,#64

	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	@ (hash+inp[0:1])*r^4 and accumulate

	vmlal.u32	$D3,$H3#lo,${R0}[0]
	vmlal.u32	$D0,$H0#lo,${R0}[0]
	vmlal.u32	$D4,$H4#lo,${R0}[0]
	vmlal.u32	$D1,$H1#lo,${R0}[0]
	vmlal.u32	$D2,$H2#lo,${R0}[0]
	vld1.32		${S4}[0],[$tbl0,:32]

	vmlal.u32	$D3,$H2#lo,${R1}[0]
	vmlal.u32	$D0,$H4#lo,${S1}[0]
	vmlal.u32	$D4,$H3#lo,${R1}[0]
	vmlal.u32	$D1,$H0#lo,${R1}[0]
	vmlal.u32	$D2,$H1#lo,${R1}[0]

	vmlal.u32	$D3,$H1#lo,${R2}[0]
	vmlal.u32	$D0,$H3#lo,${S2}[0]
	vmlal.u32	$D4,$H2#lo,${R2}[0]
	vmlal.u32	$D1,$H4#lo,${S2}[0]
	vmlal.u32	$D2,$H0#lo,${R2}[0]

	vmlal.u32	$D3,$H0#lo,${R3}[0]
	vmlal.u32	$D0,$H2#lo,${S3}[0]
	vmlal.u32	$D4,$H1#lo,${R3}[0]
	vmlal.u32	$D1,$H3#lo,${S3}[0]
	vmlal.u32	$D3,$H4#lo,${S4}[0]

	vmlal.u32	$D2,$H4#lo,${S3}[0]
	vmlal.u32	$D0,$H1#lo,${S4}[0]
	vmlal.u32	$D4,$H0#lo,${R4}[0]
	vmov.i32	$H4,#1<<24		@ padbit, yes, always
	vmlal.u32	$D1,$H2#lo,${S4}[0]
	vmlal.u32	$D2,$H3#lo,${S4}[0]

	vld4.32		{$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp]	@ inp[0:1]
	add		$inp,$inp,#64
# ifdef	__ARMEB__
	vrev32.8	$H0,$H0
	vrev32.8	$H1,$H1
	vrev32.8	$H2,$H2
	vrev32.8	$H3,$H3
# endif

	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	@ lazy reduction interleaved with base 2^32 -> base 2^26 of
	@ inp[0:3] previously loaded to $H0-$H3 and smashed to $H0-$H4.

	vshr.u64	$T0,$D3,#26
	vmovn.i64	$D3#lo,$D3
	 vshr.u64	$T1,$D0,#26
	 vmovn.i64	$D0#lo,$D0
	vadd.i64	$D4,$D4,$T0		@ h3 -> h4
	vbic.i32	$D3#lo,#0xfc000000
	  vsri.u32	$H4,$H3,#8		@ base 2^32 -> base 2^26
	 vadd.i64	$D1,$D1,$T1		@ h0 -> h1
	  vshl.u32	$H3,$H3,#18
	 vbic.i32	$D0#lo,#0xfc000000

	vshrn.u64	$T0#lo,$D4,#26
	vmovn.i64	$D4#lo,$D4
	 vshr.u64	$T1,$D1,#26
	 vmovn.i64	$D1#lo,$D1
	 vadd.i64	$D2,$D2,$T1		@ h1 -> h2
	  vsri.u32	$H3,$H2,#14
	vbic.i32	$D4#lo,#0xfc000000
	  vshl.u32	$H2,$H2,#12
	 vbic.i32	$D1#lo,#0xfc000000

	vadd.i32	$D0#lo,$D0#lo,$T0#lo
	vshl.u32	$T0#lo,$T0#lo,#2
	  vbic.i32	$H3,#0xfc000000
	 vshrn.u64	$T1#lo,$D2,#26
	 vmovn.i64	$D2#lo,$D2
	vaddl.u32	$D0,$D0#lo,$T0#lo	@ h4 -> h0 [widen for a sec]
	  vsri.u32	$H2,$H1,#20
	 vadd.i32	$D3#lo,$D3#lo,$T1#lo	@ h2 -> h3
	  vshl.u32	$H1,$H1,#6
	 vbic.i32	$D2#lo,#0xfc000000
	  vbic.i32	$H2,#0xfc000000

	vshrn.u64	$T0#lo,$D0,#26		@ re-narrow
	vmovn.i64	$D0#lo,$D0
	  vsri.u32	$H1,$H0,#26
	  vbic.i32	$H0,#0xfc000000
	 vshr.u32	$T1#lo,$D3#lo,#26
	 vbic.i32	$D3#lo,#0xfc000000
	vbic.i32	$D0#lo,#0xfc000000
	vadd.i32	$D1#lo,$D1#lo,$T0#lo	@ h0 -> h1
	 vadd.i32	$D4#lo,$D4#lo,$T1#lo	@ h3 -> h4
	  vbic.i32	$H1,#0xfc000000

	bhi		.Loop_neon

.Lskip_loop:
	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	@ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1

	add		$tbl1,$ctx,#(48+0*9*4)
	add		$tbl0,$ctx,#(48+1*9*4)
	adds		$len,$len,#32
	it		ne
	movne		$len,#0
	bne		.Long_tail

	vadd.i32	$H2#hi,$H2#lo,$D2#lo	@ add hash value and move to #hi
	vadd.i32	$H0#hi,$H0#lo,$D0#lo
	vadd.i32	$H3#hi,$H3#lo,$D3#lo
	vadd.i32	$H1#hi,$H1#lo,$D1#lo
	vadd.i32	$H4#hi,$H4#lo,$D4#lo

.Long_tail:
	vld4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!	@ load r^1
	vld4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!	@ load r^2

	vadd.i32	$H2#lo,$H2#lo,$D2#lo	@ can be redundant
	vmull.u32	$D2,$H2#hi,$R0
	vadd.i32	$H0#lo,$H0#lo,$D0#lo
	vmull.u32	$D0,$H0#hi,$R0
	vadd.i32	$H3#lo,$H3#lo,$D3#lo
	vmull.u32	$D3,$H3#hi,$R0
	vadd.i32	$H1#lo,$H1#lo,$D1#lo
	vmull.u32	$D1,$H1#hi,$R0
	vadd.i32	$H4#lo,$H4#lo,$D4#lo
	vmull.u32	$D4,$H4#hi,$R0

	vmlal.u32	$D0,$H4#hi,$S1
	vld4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
	vmlal.u32	$D3,$H2#hi,$R1
	vld4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
	vmlal.u32	$D1,$H0#hi,$R1
	vmlal.u32	$D4,$H3#hi,$R1
	vmlal.u32	$D2,$H1#hi,$R1

	vmlal.u32	$D3,$H1#hi,$R2
	vld1.32		${S4}[1],[$tbl1,:32]
	vmlal.u32	$D0,$H3#hi,$S2
	vld1.32		${S4}[0],[$tbl0,:32]
	vmlal.u32	$D4,$H2#hi,$R2
	vmlal.u32	$D1,$H4#hi,$S2
	vmlal.u32	$D2,$H0#hi,$R2

	vmlal.u32	$D3,$H0#hi,$R3
	 it		ne
	 addne		$tbl1,$ctx,#(48+2*9*4)
	vmlal.u32	$D0,$H2#hi,$S3
	 it		ne
	 addne		$tbl0,$ctx,#(48+3*9*4)
	vmlal.u32	$D4,$H1#hi,$R3
	vmlal.u32	$D1,$H3#hi,$S3
	vmlal.u32	$D2,$H4#hi,$S3

	vmlal.u32	$D3,$H4#hi,$S4
	 vorn		$MASK,$MASK,$MASK	@ all-ones, can be redundant
	vmlal.u32	$D0,$H1#hi,$S4
	 vshr.u64	$MASK,$MASK,#38
	vmlal.u32	$D4,$H0#hi,$R4
	vmlal.u32	$D1,$H2#hi,$S4
	vmlal.u32	$D2,$H3#hi,$S4

	beq		.Lshort_tail

	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	@ (hash+inp[0:1])*r^4:r^3 and accumulate

	vld4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!	@ load r^3
	vld4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!	@ load r^4

	vmlal.u32	$D2,$H2#lo,$R0
	vmlal.u32	$D0,$H0#lo,$R0
	vmlal.u32	$D3,$H3#lo,$R0
	vmlal.u32	$D1,$H1#lo,$R0
	vmlal.u32	$D4,$H4#lo,$R0

	vmlal.u32	$D0,$H4#lo,$S1
	vld4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
	vmlal.u32	$D3,$H2#lo,$R1
	vld4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
	vmlal.u32	$D1,$H0#lo,$R1
	vmlal.u32	$D4,$H3#lo,$R1
	vmlal.u32	$D2,$H1#lo,$R1

	vmlal.u32	$D3,$H1#lo,$R2
	vld1.32		${S4}[1],[$tbl1,:32]
	vmlal.u32	$D0,$H3#lo,$S2
	vld1.32		${S4}[0],[$tbl0,:32]
	vmlal.u32	$D4,$H2#lo,$R2
	vmlal.u32	$D1,$H4#lo,$S2
	vmlal.u32	$D2,$H0#lo,$R2

	vmlal.u32	$D3,$H0#lo,$R3
	vmlal.u32	$D0,$H2#lo,$S3
	vmlal.u32	$D4,$H1#lo,$R3
	vmlal.u32	$D1,$H3#lo,$S3
	vmlal.u32	$D2,$H4#lo,$S3

	vmlal.u32	$D3,$H4#lo,$S4
	 vorn		$MASK,$MASK,$MASK	@ all-ones
	vmlal.u32	$D0,$H1#lo,$S4
	 vshr.u64	$MASK,$MASK,#38
	vmlal.u32	$D4,$H0#lo,$R4
	vmlal.u32	$D1,$H2#lo,$S4
	vmlal.u32	$D2,$H3#lo,$S4

.Lshort_tail:
	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	@ horizontal addition

	vadd.i64	$D3#lo,$D3#lo,$D3#hi
	vadd.i64	$D0#lo,$D0#lo,$D0#hi
	vadd.i64	$D4#lo,$D4#lo,$D4#hi
	vadd.i64	$D1#lo,$D1#lo,$D1#hi
	vadd.i64	$D2#lo,$D2#lo,$D2#hi

	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	@ lazy reduction, but without narrowing

	vshr.u64	$T0,$D3,#26
	vand.i64	$D3,$D3,$MASK
	 vshr.u64	$T1,$D0,#26
	 vand.i64	$D0,$D0,$MASK
	vadd.i64	$D4,$D4,$T0		@ h3 -> h4
	 vadd.i64	$D1,$D1,$T1		@ h0 -> h1

	vshr.u64	$T0,$D4,#26
	vand.i64	$D4,$D4,$MASK
	 vshr.u64	$T1,$D1,#26
	 vand.i64	$D1,$D1,$MASK
	 vadd.i64	$D2,$D2,$T1		@ h1 -> h2

	vadd.i64	$D0,$D0,$T0
	vshl.u64	$T0,$T0,#2
	 vshr.u64	$T1,$D2,#26
	 vand.i64	$D2,$D2,$MASK
	vadd.i64	$D0,$D0,$T0		@ h4 -> h0
	 vadd.i64	$D3,$D3,$T1		@ h2 -> h3

	vshr.u64	$T0,$D0,#26
	vand.i64	$D0,$D0,$MASK
	 vshr.u64	$T1,$D3,#26
	 vand.i64	$D3,$D3,$MASK
	vadd.i64	$D1,$D1,$T0		@ h0 -> h1
	 vadd.i64	$D4,$D4,$T1		@ h3 -> h4

	cmp		$len,#0
	bne		.Leven

	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	@ store hash value

	vst4.32		{$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]!
	vst1.32		{$D4#lo[0]},[$ctx]

	vldmia	sp!,{d8-d15}			@ epilogue
	ldmia	sp!,{r4-r7}
	ret					@ bx	lr
.size	poly1305_blocks_neon,.-poly1305_blocks_neon

.align	5
.Lzeros:
.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
#ifndef	__KERNEL__
.LOPENSSL_armcap:
# ifdef	_WIN32
.word	OPENSSL_armcap_P
# else
.word	OPENSSL_armcap_P-.Lpoly1305_init
# endif
.comm	OPENSSL_armcap_P,4,4
.hidden	OPENSSL_armcap_P
#endif
#endif
___
}	}
$code.=<<___;
.asciz	"Poly1305 for ARMv4/NEON, CRYPTOGAMS by \@dot-asm"
.align	2
___

foreach (split("\n",$code)) {
	s/\`([^\`]*)\`/eval $1/geo;

	s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo	or
	s/\bret\b/bx	lr/go						or
	s/\bbx\s+lr\b/.word\t0xe12fff1e/go;	# make it possible to compile with -march=armv4

	print $_,"\n";
}
close STDOUT; # enforce flush

OHA YOOOO
����JFIF��� ( %!1!%)+...383-7(-.+  ---+--------------------+-----7------+-7-----+---++����"����M!1AQaq�"2���Rr��#3Bb�s����CSc��$4���D���TdE������'1!AQ"2q�a���� ?�Z�L�[�����=D�6]�T mѰx$�6��@ۣ`�Itl �"��(6�Dst�2:��Fk���x���4��K�h}�l �?r��@��!�Q��Y��?��-� =��O�����(6����<A�x%B��<A�x%B��<(6�@��.���*%���$e�m��T�wi��~H�]F�Ѱx"�`�Ul��ꃁ���RPl�6�UIA�x(���#�B��zy%�<�L���mvN �ԭ6�Y$Qk �S��䮰�K6ף�x�+�T��L4���>�C=j�������p�|J�ǥ���b=���Y�6g9��F1��Y�vݩ�`��塏��>� � �ݨ,�����A�o�=W*���"��>����� \ �"݄(꧈�y���9�m���d�aAD�u&�T��D �@$BITU�"��D�D!BH�� � �UTu� �^c�?�[ND�K�`�\F'�jf��<�G�G��B�q]�����!tl�6�]\4mѰx"��<�6��B�֊�o4.�Ah�8QM,�y�����%cLh��y�c����!�8Tb���h�!p�q�t����EIA�x'Pl �KT N h6�J�P7�6Ԩ�6恰&��� �� � ����R���)m�`8�nC�J���E��%H� D�"T �n��W�s+���x���+g�?t��@�����;�>�o��0�����|Ћ0�����|"�J�%EBBBB!X�����|��X��̟s��Ӭ��H獎ŏ m׷�0���—���2q����s�'q]�����7�%����hp8EAYy�Ӗc��9%�A� _g�ٙ���}ӯ�Ul�Ƽl Ѓ�a�ۮ9�i�*��R"�*������:��j�zE+�H ����kB�2�e��~��Zd# ��0Vr�T�ev������Y�����-8]o��x�~)�9��}W:RF֟��P�A�� ���G+hH�P6�����:���Ԁ��I�O{Y�F��$U"H�#2��*J����L��L�B�*��T`���(�-:�R�H Z��"�B�Ihh��B�B�����urP�%� ��9��7v",�!�A�b�X�V6F��� ���^K�+��f��qm^��'�9�K� �����o��! ��P�%B����E��}Xo�U��(BXJ󥯢t��u�&�}Xj +%�7+�c� �\��t�9t p*)��L�Z��T��KTC�NGT�PH pQ�� ɚ^qB ��8!�*��� P��"T iHS���n��W�s+���x���,mtG��@��D~���� } tY������Y���4����!!@!@!@!@�a�^h��R���*��|!���Us;����n:��#���4-h�chW꼝���%�+Z�kA��E%��4“M$�����@y�q˓��ʽ�� $U��������eH�-;�a�ކ�&����*IB� �Z�w��;c��|�3JZ@��-��w�k������Q�Ϊ �g�d���I��G����8�N�G�����(R)�2�_�]3;]z7]�2�w�r����I�Iĭ=15���b~ 2�{cuO�'෎V�nyI)��1s�� ����i�lT*�ݠ�������H��p��^j�C�Q�B*(������(m�Wb ���Z�)P*D$EGimZU�ViZ��┵\�P�L���IW���Eh����[榚V�R8+l��zV<�B�M�j�V�pw�%�*�UKYǒ}�J�% ���(�����HM��NQ�S�toԝ�ܪZd�ল���,UP�J�=�Z m�T��-]��y��*k+���:�%J��V���X�i�o6�D38�h�=� �'G�$�@��X��H�P�~��X��e�Ã�����4���WS��x�3���q�˓V�S��k'�K�w�N�w�eb��,��bcw�1�� �ȃ�%����͖��Bd�J��*V�Y��.;Kh�� �*���1 X���-�� �OJ��$ sCU��H�Zj���N��e�m�zT�"T��%���8�(Q�4 雐��d8���j�$NH�'$@�� �a�< ᴖ��K��W ��5}��{��-�����w�}�,Y����䴣�,��S�|�R��BT D!�R ^��I *I4m%Gk�2&�y�m$�k;�7m��sW���:�q��!汖s��]�i�;(��ƣ�7_�Ve�o\㛜K�y���T/.yܝ�2! �AB(BD��ꦽ� �EX�w2��\�����^{Nɥ�=����lB�V���y ��t||�K$�v��Ȃ>* D��Q��z$�y��F�MqD��(���鍵M2 �G� ;[r*4�T�Rd�oV#�t+���P�A-��v�*�>��PhU ���-QJ��Y�mE;k�"�F�?%���R��&������G�ӳhx;�i��h���5��+�Cr��8���B�:�+BI�Ϯ�LOٳ��=�~��,��b�t�C�6p\����x ��«�!{�ҽhh��7<� ĊW���<�CNw�ai�@��ںf����j#^�Ny���\^rRU9�1u`�RC% T)SCM��VtR��U溢�f���i�|��Y/SpWF�V ��*�A�5)%T9����'B��O "�TTTQZTm�Dv] �����U����������R�����5�/B^�.�/���"rE�8B)�"�P�D!Km�<W��y�� |�[�m���,Y\A�]��7��f����ѻ,H�Zj[���eh� (N+P�U"N�*���ء6L� 뛐�������"T D!BBT�"�#��I~%�͑�W ���Q���w��] ���.���.��<���O��Zl�,��S�|���%F��9"H�Cj�66��4Wy��NTR��i Y��� '����|���c<�fژ��E�����\>.�|hH�Yem��&��"h!!*lehjӼ �s�HQ�b���� pi�Η|h�'�Rh��SP3 ۽�$0��P X?�w�-;5���4h�{� ���/U�v ���ְ\2o��e�����@(��+�u���Ē�����B^��&�M]�B� �"��D��@!@!@!,�lΖF��W=���^����da�cehi�����_��#d��[ 9_=�]�Ù^�$!�-p8�u�(B�/� ��A�!�����\�F4q��3����:�[�>�w�ك�[���]��<��[�3�'��M+�yMH�R D�P��(�P��AJ�b Qq�F�bq�ߪ:J�$j��8�-5 ��z@�#��K� ڕ�N ԺKړ�^y߉�Ԭ.�tv��n9��w���n�|s�Z��;q"{���9�! �BJ��BMe��=�`���֍�&�Ba�{� ���v ���@�> ���lp�6������uRh�"�i�,ɯ�79o�*�� ��V�&�[\v��:�bq k�|���\͒��1��]q��C �xi")��*0w��{��0�c��������߸ɢj�X�MQ����Y�R%B�D�P��!�$@$J��J��K�����r^�V~�� p�KWg���.��m 뛶���L�Z Bã� U; �H7�V�A��+�B��r R7!�%\���!�!�!�� �a��W�����+��Z�cw���]�C�D��}˽��]��g���>KM���?���� G�4HI&����i=,�Ge��*��o���ׯ r�c�RZ����$� ����fӿ.:��q'\j�M\㼕j�g�߃rh�� � S\ ��g���d%UkY� ��?Nn>�Y$ &�,�[/I�ZC��>�a��S�K��p ��� �Ƭ�QY�X��h!M�m�h�8]p���y%� C/gt����ڭX��� c�iW\�;'�i�� �atd���ā�7(6L`��]�=��hP��.�ss��X�9�X��ji�;��t��\��V���0f�87�U�?k��ww3W�|��=L@� ���OG��bv�Z���uj��AD�BEjubӨ&����F�Wgb�G���:�uT1��ni���y|�X�Etu������n�k�D��q57���g�A�n ��X]&����+����sD%�0�p��<��Vtm�����Z���9�^�Y*�(�{�/��j���sn}uz�����_n����������MErjΎ��[#N-5�5�a\��cu]m��M�WN�b�_ p�t�q�~ '��H�- y�@7%Oj�h�y/B��d�k�-{o�,-5��i�4toWx꘳�_�(E�LJ�í�m;�]t��V�^23I�{�h�g�43-zJ�ֽ��g�Z "'!N��:� N������Qku�8�3�n^���s,�6(� �nv��.�),��eƷK�\K����IPO�A��������e�K���ڌ6����yW�)֋��Z}�m{쾙��x���{hyn+/EY�l�W!-�$�U.��I��� ��� �� �/M�;��Pݎx�"~+��Ή1�&ѯ���=����#�ыv�$�[�"�R��To�v~)�U�˨ x0^q��S���^�d����ʠ�W� ��5�B���dy"��&�õ���c��g�+��9��ugh�ޖG���7������ �곗Hz;f�L��`8���{"��؛f�1��&�)J[�I������!n��v��b�Ik{�������ŋ���qo�s\��}ɛ\*KO<=h�L�2�U��Z� ���v���[O��8�@7$t����4S2r�ʬm���)18op�?��]1%�<��&71�k�.�s$.?-��s�ïZ� C�DjFC�w֠r+�@U4U��xY2����N�w�S�\S+�� P�0D��� ����R�>*D�3�֎m�Fu��v^k��,�9�-�V����M�k���Rw֏�Z�[Y�=���q:Aů%�>�����O�0�pmӅC�^뇍6��+E��N�f>29^L����=e�gi�-0�����e�W5��U�E�x�і��(ZKH�F(�Y�mrA;Gf\�H�z�G��iw� �^�r�y������-.��9��2|�%���ÌR�����J��� x��ab��KD�꾏!�k��D�s4o���F/9�2��fv��?y���z;WLC�T�*���l�"�b[�qi��A� ����Vf�җ�W'fA������� ¢�#h?%.�҆�lߊ�_��w� 6�uA�*V�;M�y�*�OY����1MZ6��g1�PsWiv�e���%B����@�B"�@$�TQF�т@p�q֫9��+��F��bF� VljWx�B�%���L����ۗ$��˒r������ � � d�p:KZ����+��Z�c{��r��D�ކ}˿h}��"be�I !���-[��n�{ft�J`�ь7f#�\c�{rڱ����T0уQ~���Q��h�����9���;��Ֆr�Ts�Y�dt��L�ٳx�R�i�<�B�g�=��^caؤs�EI��SAEɤ6�\��b�pn_�Z�U�u�� c\}Z�a��y��B�PZ���'�Ya���Ʈ��N�� ��I�C��,Vp��+A^Y,��,h*��[P�!�|�����I-,nn��& �x��u )�T�o��%��]8-x�m�dh́�sz]�/�#�}�{� �uq*͚��1=��g27�4�l�;���Rn�v�G�zܑ`�Z9���j� �ᲈ�X̜�]�$���OYɊT��2� %BAJ����� �:�uԮU�j_� z'�48d�T'��F�g�rI�Ƹ��+ :��p� '$涨�䳶J5�k�48Tk�[;c��K=��zՕXʛuS��"��Q�8l �m��ˋ��k+���ᴇ��$V _�%$g��k��]��GZ�t�^ʹ�쮣��k��m�2V:9׊8c෎v%�8O�&}�����]�\�G4?��3F ,�6I9�k��^�����m���TЕm � J�P� ��}�� ��Y���2��}�&���{��|(Y�o���J���v�qܶ��EAA����BG�o<�Y1�K+�����۽���^�dŹ���-�v�����P��It���sh�VŦ���ޫ.H�4+�9m�ώ�Q �H���%D�@�!@�%@AFkK��B�&hCm��8'&7.A9y�P�$J�B�P�*BJp< �t���͑�W�����E- ��#�h�8��� {K�s���Zgu���m=IU�v���;!�cϴ� ��J�"r\ 0 p��+�8T��e]ĵy�rߧ\'�4s\�ep����� <�ޑ}ƶ!��9�V�Mk]R;1���4L���'2j���B��F@c���f���e=�k�t�n�?X��2W����j����&��Ꜩ�n�WԮ�5� �����G���|h�m�(ݹ��@Z��VY$?�6�qy$�'���l�z��Õ��I���pK_ny�Tlq��<��.#��)��&���0�)#;s�`k��xn5ʞ�4�-�Y� ���'�kFe���k%������=�{c9���u�L�Ut��,�`g���iS)Z��Z@���l8B��P1�y��Z�ѩ�\�㏲Vk���&K,��I4����iX!��U,���7‰l����`t��E�I��ʺN�P�(����k!�}֒�\q��-m'%؝CBh����aTn[�%Z}�C��0P>bs$�)�4���x��9E�&�^EQf��!�mt�z��8��ձhR�Rw�ͨ��{>g��(��c�C���p�� �޺ S��n� l���$lgn�ۂ����I�{݉$�XnM��-{��Z��E-����n��*Si/`s?%��8��6�<����� \7A��WZR#���Ǔi�˹ V2*��B�� �� T�=3n�&юA�� ���%��_O� ->���G�EgJ�C��a?��tl@����6��m��Q���p�s(^���*�� � �Tfi :�b��Ed���׀�|c���8^a���F�[J�~7*@n�3��2��j��mik^��NyF�J�g�������6���*@��Pg�珢�C�g��*=5���R��g�=U_��;G�~)&��M���\<�F,�]�&��<\o�L�]����g �� r޿��������$kk��i(�!�ݥE�V�E�;ͨ���헮�����5���TiTM�z��+N����Ī��J�e{ݺ6 �n$�q<�Y�!�1�yAs����.�@��U�%+�έ��u\]J�`�Mdփ��C���;3��:&�^��7NK ���øS��]��'�7v�M;�ä\BD�1�P:�J#i��u��e���mZ���<���X?eew'8��� �=��փ��.����Ԯe�c�e�O)�KZ9 rW,�����a#[���U[�03\ޚ�\QU�3���L<⵶Zv�dP2���4d5����\f���f�b���н�~��x��e�I��J��e�M2���d��oݴ���\|2M��6U�.��RYt|���O~�Q��pV���։c��C����E:�9��q@��N�D�;� �y�;M�Z�3���BԷڒ� ���Sm>�t��q�O@��=��s�6����H���EAi����e��;��ղ� p�<1T�&��79�ǽ�1#�5��'xµ��eODB��Z^;@7o5���"����xZ+�!S6�<�&�5��z���xa�.������73k�6��Yc@@ � ���D� ��� �f����438D75�/?��.�%����� � ?�7�L�)�u�����ٙ�S�]��s+���?��r.�G��e���sBBB!Y�4]��) G���g OҴʡi�͐�6QԼ�' �2wT�~��p嬙�z�q��ˆ8a�-��f^ˆҀ���L����E4�GX^�5�(�R�a���>���F�v"! �5�~>�]�,�~*2��n�M���>�i�A �B1U����<�R&��ϊz2�*FV�ѵ����Fm������Z5���~�r�T-�=� �1�7���pP��N �otg���dǟGo޶D�l�U����T��hp8��ε���n#ӊ�[y98��(�N!![p5 P�D! �� $8��r�#r�/;� �ȪD rTԨ�J��f�y��i.w�#�n ��k��V���#�b7]���|�^l61�w�ܼO�\�=�:`��s:=3��H��zJ3ʾJ*�Ղ�f����V�8�c0�pV[�u�!5��*ZRJFw�|�(�Ù���{���5$�8p �d$�A�J��rI���4��Gm������V�2����#2��Y N ��C��zy���+e-���q��H�]<�9�W{��%&��g��2���[���sq �U���f�Ӎ�<.���@�e=�$h�y��r��� ��vޢ���1���{.�%E�I�6�����L,��s���F�e`�Z�w5�� ��?� ���uzR��0U��?́o�i#���t��)�u[#�{M�9�q��劘�?�OdqOџv8���q>�}�c��Y�:E��U��tj/�i��#�7�yQl�����4��vUg�~;�h�� �@'�\�09�9���S�Z�Ӕ�K]��KE�8�l{�?V��ܪ��{ւ�ٱ�ԕ��zd�U�tD��~�*.j��[Du0=��w'�kR�q��z5 x: Dߘ���k��WYh�Y�OUg ���t�w*�>u��du�L'P�����O%���iuմ���=�JpvnZcm�^��OٱC%܌�`�����Ee�L�u���غ�^�owȭ�<2�����W���v�����[�KK_P*�_i�ͮ\A�Fl��h ŭ��K��r�E��;FB�qSUs�'�W�'v�ߴp?tës��`�bm;b��H�bN���q�A���u���ki�Ԕ4A�J���"Ƌ��36C[�sLM�k���⺖��/����z�(J+{���+�|��8Q��\����͎���g��ޫ#-s\�F@�d��CR��4;��Y,⍾[4m�1��m�>��4�9B�� .�%�WTm�w�����Z։n��+�vD! :�ZO��7����t�.�\����u0 x��K>Et61�g$�B�!@!@!T ���%�̭/ ��]Ȁy+)IF>�����Ů����;�+Y�p��xzqY�3��!r|�L����͛��O�ތ�G�{��snV/9���v{Cd˲�~JE�*T�Q����[�G���z?{� �T��pg�H�;��ϊzBHP�m�6��`�>�n>��V�G� qEh��S� ��h�rZ߭�ю+$g�� ����xU:3�4��M{�f���^'�ܰ�}�7�y$�u�V��R�s@z�Z05��4d�i�&�Эe-��6�5���]�v�a# %dU']\|��vv���~����TuN`u ��I�gn'������C\#�|j�>�t��"�3�� ��$��̈́��1��K�yƤ��;�'⦉�A ��4�B1*�J���ft{I}fɓ�� �n��n!>ۦ!���\�n�i��& �s���+>]%yŐ]�����G�8f(�m,v6�{s=� `�8:�`�G�;�à����6��`��?S(V�"6觼}��WW�lQ�p��J�l�ST]��0V��LX?��j�,��ԝc}��O�;C�)�v���,toe/�Ԩ�D��hhFí] K&��H륎l�b�+'�.��Hޛl�> j�c�woI�����`+�Jݶ�D���z'S�÷/�r�_�[A�i��7v��w/��@ ~��T�pɼ� ^�)�z34��$D ��R=�l�[�j�a��FGT&�ŧ6��V��H�S�ttY]%o�3��y O�Y�4ⴹ�����D��F���94�zx���]{ܾ+7��*;P�nX�E I7t�XW���4�KD��i���epS��B72U{)B3cZ`nc;����բ��F�<�72Bמ������\k��F��?��$-�F�� ��Z,�fHۯhsM*�4��FY��@цbP�\�ÎW:s@JE�.�������b��N�D�$d��w�]49oi��Z|Ŏu���+}�WC�6b| Ǵ�P[��ps��Q��`�����=�G6�c� �R]N&�&�Fƣ������u�� i�Yњ[�������n���eu� ~,�=�k�4d��Bז�������;���Q�˴Ѱ1����()�yT}4�5�pd&�i5��n�ZvN������eÔ�br�~^�6�����v�m���:���Q=�8���O���Pw8��汴gL,Ґ�(�]{K��<�G�����+,o��3^�暃]��j �E �i#;�Դ v�iz3ƃ� Ap&X.Iڤч~�vNBAMy�uf [��u@�v��3�5��$�V���5�����9��i?�72:ր7U����S~�"y$��Rv�C�+�5�NЋh������ �mt�+�4��*$Ұ!�1�����z��:*��3�l�sX�1���h�J�g��`{NTj#h#Q���yL�y�ڜ1,}d��h�qh�p�Eq��g,�f�����(���]'�E]�����S?y���O��G˽G� ��?�͓�$�Ew�4{���I�i=��o#�Ը�(��Ru��[�L�ܾ݉���e��������_�g��?�\oY������U�}'����A� ��������DU�e�����M�o��/.S��o�aVsd��H��a�2v8�����V琽셤� .q�"m/P�$����u/i���V�;��:���­�2��W��.�{uÖ����>���ô1���'2w�i L2 �W(�}d�9����� H�A�= �!�) �d�\D�������ԥ�f���j F�AA�p 䴖u�G��XHd����L����xb7�Ly5�X�����׳�G������a�����4���t9=KjIµ+���c�x7�Q���ߊ�!T*��� :ߣ��nb)�h+E�����̓�k�k��+MU�X� ���=�Л�Է&����ݽ����;��]��Xwj:���b��C�Z�i�>9��sRf��h����� ��y�NF��%Mn\���*TԵD*Tڥ@�SP��SR�Ir< ാ��=���|��$��\&����ʟ� ���].=�������ĴsX���U�6�����*�]q�T���s�]¾�%��> Ș������غ�C)[�x$���WL�(,P�6�m�� mķ�5��MJ`QU����3A��k�W�6 ��� �����p8��5�G��ߑ�8�8�㹠rZ�hm nM��\�/��R�D�45�Z(� 70�B��Ktu�/=uƨ�:n;�y[�)�4���Hʂ6�[yƙō?�4��P��;��Ini���l�CJ��_I��j�,m��A�)N ;o�?�o�+�ש���BQ]t�i�h�RQKu!��j.�HIu�D�W�'�EٴT�il�}YV�>�{[��������uV?Gض���.>�Ol�/�+�݌@�;� ��Kr#��yͮ N��R��b�4��f�+�N�������q���๮��'Z�hid/-m�/�����y+)���4��,�X��p0dC^��A��jk�h�|.��6݆�lu2?T��р�55�MJ�3lWL]U���C�g ��o �W���~���˯P�@H��H�۲�(rjD��OE��E�����˻L�9r��J�q�����hΝ?�b�)9�%2��׍k�(��p��D��l��8„�m 3�J�U��b���� 鵝��C+����p��;���Ŧ��L�J�6��VF���_�U�z���ϴ���I�U��"�������.�@f�I��f��8�R.��i��6�X�V���Ү�Yg�u&GK,.oU#X�H܉qp7{�h����@)�$����j���`�I��5�lQ����⊪�y f� �CҤJ��!%�\��o�:#�V�DkpVI=���h9�n�� ˾�gcm20�����u�p�kȯSj���姫�z�N �և�K�lg}�a�ⴴ��8_#�Ɨ8�h�y�EzGz�#d�Z]V{��o1�6�2;nG�4� ��[�Ⴐ����$�W1�� cƾEJ��'��S��[��Z�(i=��j�skq����1�mOF�,� �h�ܕ��0��1�P�v�a��6�N��ӆI���Ln�1�1���滑�x�l�x7�v���y���X���-�)���W�]�uN�P� � ��y�(U� a���m h&k�����(������gk� �1��M(]��7�v� �xi�K,a„,y"������j��<6V���Wm�' �c[]N ����@ܹ'U1���R��@�SR�T$�J�� &G�\5�}��� �Z�w�O�yfy��7�\O�\�n���=��(�.��v�OK��:�����2Q�q�� x�.ӦhC��=IU�ƙ��h��' �)��R��E�g��cuG!�i�}X� [K*����+�m+���F-=a�H難�����5� �p�U\���fd��#��Nh!fݪT׶�*" adL����edr8��߼�d��PKt�{\ ^�{.i��P��$l)�[�Ǎi�j�Y���8 5>��e�p'�D,8G����C������Ň�5ql���+u � ����Tv7�`;FK�� c��Tƫ�zQ�>�3�;���1�V��N>R�Ň�G'/�?�ƚZ�ˍI$�IQT�NB�2>m�6�o�K��rU�!��Ev�BiT�"�BH� � \ڧ!w2�VSKVt�D��k��p$�L��F�<QoG�� �����׍�9�Ts^���̞&L��Esx5��4�E�0�����VX�u~6�t<��sṷ~�t����O ������xR����Ů�\��Qk��� �'<��jpx���Go��R\�S.���Ë�ӯ#�: ���~�0'���t��ׁtsK��hd��4����=���^��{Z����i�EAS<|j㗔Q�E� �� b4���<��-KL�_��J6юÛoj�\�b�2m�9��Y��G�_������m�/�B����lo�H�aR93�o �=����Y#|�IBZ�h�c�5�a����sՈek���F �*N��$`sH ��2 �U7���?w+��:�!�t\�p�CP]x��c(�A)��Sj��qm> ���0:�c\�=��|����V��1�Q� ��j:�1 9c*�tx:�i�hAe p"��i��k�T�no�;�)Ri�w��WF�upJ��@�SR�pB@��6�j�R�Bˑ�W�WHv����o�v�8�uO��G`W3W.7����}:q�i=*�GĀ�,�7%(6��I�ݓ����xM�`8)����$��$�WfY,���64��4h�����^;(u$��^rf�ơ�3䎉�u�V_xg��9����N�р��ʐ59Q���4V[�4��M��X�L� 4纥5J� ����k�r4vdh����O*U�"�!����3C$�"��+G㌟6�G  �����Yt�Վ��2sN���ɉq�@#.�暁#?)��j5)[°�S1I��#��p�C�KKi#��'ǵ�^���/��LK[�7� �s�9�;ҭ�J���x�J)^�U�{'��EI����T4T�}�k�+ �~#��{j=�ﭑ��t�����\�A�/g�;���k��O��V��훧-Ή�k0|��� s����y.|G��W8���<έ�-��BKc�e�;��[_�+%g'���o��i�*V�@�#�TM��Ց�9��6��w ���ed��R l�N��M�)J������G�V�L�k?UQPK�#��(�/F᳓!d��dpȜ(���n��E���t� qX�"��Y#��t�}�`��v����FY�=҂ )`4|���]��xS. ȥr��ZFI�t�>�ƍMh��%{x0���͗� P�B�� � JS$U*B ���@!@!@!@QB!6��� ��-ςVM/F�µ��A�� �4�,�Yt�}k���X��6^�P�{��ӆV�&��u�F�v�Qmtg���g0��-.0�ݺ���jk��{�'j���H�W=�{��8ԯ.vd�$�Ҭkݺ%-ls�����|�Яn�A����dx9�g�x�Y��Ln$���)Z�sV4�cO�Y�F<~G��b��~�?� ���m%��w�џvH�)��bgH��,��f�%��B^�}��O�o�)��ү�����e�}۟Խ���.�c�$p������H�׆��u���5�x K#��.��P��i�vG0y4+��~�?�<1߳�O�~��-k�^�!|lq���q�HJZ�AT/S�Np�UgDXoG��E>;Q�y;_=�Xsy��C!��a�v�u��'9���k8v9�#��n���4 qћ���q x�nz��|���b�,�i�� ����x�we��R�EB!@$)Rn������z�x�F�-�pBF�8%E BD� �J��J��9%�<���.�Uͅ��Y)Y�$��j���b�7=�;qB���y�o �u�sV�^���qǷZF�t@�}nA�D�� ��!�k����6�C!Ƅސ���c��$/b� ֆ�ZA�P�t�yS��!e��!�!�!�!U )a�s($��7�sc�+�+��W�6y��Z�s]�k�k��P��Nٺ�[� ������zZ��X�PZ?) ��;u�\_K#w�~8�N�+A»n��lQ�]��$���]�IIC��#�kN ^�����C�<�i36���~ � ͺ?�Y�չ$o�� �Ů����^�����@ � �ZG�S�a��� m k�w �0�AA)�Ǔ,z�8�&�>��V0�Ƶw�ܵ����(7%U��-%��c�l�u�C{��&Yܻ^L�f���sZ֊�8�� 䬷[m���O�i�?x�\�L�g.{�+��1���\Ʀ�@ i�NB�s ��龝F�Yfi�� )���Cy\���\d��{��s�4��O*�N⤞�u�j.uMv⚄/���|ܮ�P�*���R � �!P$J���PMN@!*D��� �!%�D"��}��:�f�R��E�z�х�}RFS+�w1�� �x/7�O[v��^�������J�).���`����Hm�ROz'1���0�\���S٧��Q��g?W��ub{i��]ą��Ykhu�����c}���]|�*�'��ix�϶h|�*7�����!|��X�<2��H�g��k���=����8��4k1[v C/N���{Z p�ؘ�Mݧk�-��4������9B�v��y9r�^ɡt�v������4�� �V�^S�+��z�:��\�.�w�1�}X�ö���j�3ӿ��t��O!�_��C����&]����o��4 �����+���ľ�&Jw�X> �KO"��`��V �n=�a\{k?�UT�8%_F>aP�"�R ��@���P �P�B (Q�)�Oc��ÁBi ��� � � T!�R �F4�՟ q!����MN �w"|��Jr��j�7Wol�����C���R���1����[�x�B$?_����c�*n���i�W��������/(�ұ���ͻ#���q�H�� �L��V��u��/�^Ž8�Q���s�t���Zߎ�&�������y5ɖ�eA�f�7B� Y'xP�CBq`��C0ƻT�{��G�,�M� �B �#%�=�_Z��l��6<;�W�^$һߣM'G>�Nl~�x��T�5�z*AN ��Q�v~*D( d�ge���:³J��Pv&�\ ��j�F��4ێ��;A�9;楆F����ӟ-�{��-Mk@� ��*]�n��+ pp�MB�L��x�ɻ��Tdؾ���d���Fv�T��*;BBBD T!UҖ��3kG�L<胗^.q�r��^7��Wkh�3SX����U��v���7K�8��}h�̖��=K7G<�%���KJ������}^�T��/����d�c�f�*rBBBBB ��A�zF� �HBT �zi�ʗZ"���ƽkxH�v�? �C��%y�Mz1�8�aoٻ�h~ONƹ;$�^#�u�6!5��^���O�W1�.�]7F�`H��bw��\�GԮ�ݛ�p��]"�x{.�o�� �R5�S�i���څo��+��X;���Z2�M�*����FѬs^�l��9㓼�P����9���*E��cHQ�k�f�ٸs�Ժe���� ���\/N����>n?%��t���3��ަ-������´TV��1ߕ�� �=����؎ �(��} z|�T! �BD��E�4�d�\��T�R%Z@�!�* p�@�P� ���f��HF�/ԦM�&���! P"���T�A�У�g?����5^ �m� ��w#�����ndδ>9�m�����Ƹm����{z�o�;E�G��ݐ�g�>��`^Ch��^9�I�q>k��}%2��;Ee��q �i�� Y��8Uk� {g�=�2Q�pAM��Gխl|26X��y�8�TsUKT���z3�Yl��E�4�FV�]��A�~J�{��Od`9����}�q#kA����su��{.���Nװ�5��#"4\,Ӵ�qb9�=hZ� hv]�k�5���0�x�հ�+)2 R!C,�vNН��Ɂ��G��J�#� ���5�-f�=��{l�o`EA�*�2^��S�e� ��8!W6��4a��#��%Mn\���P�*)P�*D U��Ii`�I94�>M�Z�����6�k$y�KCԳ��5gl��dv�s$���W�~3�B?t���xooL�WC�z�tm���>�;G�i�z��}��M,�}�a���S��5�M q�:r�`!@!@!@!@!@!@!@��E�8q'!�=2��՝���Լ�=K���:�e�aX�A�489�c�j����^����EA0B��t}�I/�������a����<�r�9-6j�:��������+9�,"�� �������D������F{�=������t���P>�s=�?�Wn ����1�;T�Ŵ���j3�8|v��E����OS�Z�� �����-i�p��׆֓Q�Nn?/qǃ��껅���m� ��.�9�p �dAȮ?���B��0��ג=�L�6X�G.���Uev���$v����U/N]�:�T�uB�8_O��g�B�*D�6L�`��Dȃ.k?*�*D-!P� � D�F D�(���P��b�:��KI����#��+l��TB �B繬cK��5�hĹ�4s)��~��.jt��p �P�MGjPb��;ݹL���Α�&�|Խj���Y/:�/7�`] .��j+�y�^��"�[,���i�fY�s�ye��66��$V��;�W��蟭۠�f��d����R÷�;�X�.�:�����7���p�+�ccs�m ���(��fM�m���[,P0E ��|&)#!���� W"3"���aӯ��~� ����������hk�7�.f����*RK�����ߢ�IK~Z��]- G�TDӫi9�����o�Me�ء�9�Dz�$�8z��`@��k#�2� �^�C��Ѥ:�F�ܤ�� u�E��C������_m H� ��v?�m��W���.���8C��w�q澴��������$�T��Q��D�kFƁ�:E�g���u�GP�����;���yu�at.+�(�0s�y�?���{*G:��*k�E@�Ȃ����47N�J��UƐ�/�i#ۈ9�{��W��~.��Y�](׀施�[MU��Xu��7� B�T!"BD T!!!�Al�2F:7�9��X�qS��V� �m��$�2{.��89������,�������#d�#��]�����"tR �ÛN�4�!p�j7� l���G,&7bЇ��G G�'X�e��e���i9��x��#��D&�G`q�qR�|x+C��L��C�ѵ�7�#�1��:��F�fR�Ni��,�� TҔ��v)-�@�dto&�:��x���5�J�^^^?{�g.櫝R�M,���'b�r�c_?%�&��O]�H�D3R��5�����OBT����P�(�R �f�P�D*�,d�>7�NrUB�'jICِ��63����&��J�QD�J�E�e릊��t��xҍ���ܫU�I},�G >�����äu�Hb���#m���c7�$M &����Y;��دH�9饭���m��fN�/�5�`Z{ з���%�=� _[�u�{�{Ndv����kk�M6�p�'8H�ėd�%K��{F��>��9ͺ�lkh��n�N׹� ��.��h#G��� ��o��j�<ׇ &���0���R�3+�N���� ��4��kE�5~��:��5���A8�k!�c�c���i4�4T8ePuQ3E}� ~���GՏZ޺id��n!ΒS�2�٧|n��D�E#�}6^iY�O$���z�<�L��@VcQ�} t�i;`|U������H ���KC������Z��[\�7������^�����87ch��4��V��mC��y� ����81�������SPv?C�^8-��G Lmc 4k]V����` kj��cm|�2="���Z&��ZZ�� Mpk�ZV��Լ������7n ]�6���]���ˡԢ����.��M ���4{��V9��Z] d�ɥ#'�P Ay��ю�M�2e{#�ѭ܅M7*q���MN�v��I�HsIii����FE\q��諒*�-� f4� �5,�G�ZN׹� v��ql�#���G��=�|�t�q�U�$�¤�P��z7n�Ŗ)+RX���]� �5�;A�\ѕ��RBOݼ=����O����g�y�ˤ�f^�u8X-�fk�Zj�H�j��7��[uSr��]е��6,�M�ё�渐�iZjä�'d�\3�[�{��\���Б ��H���}㸡%�����i��R���U����P R!���?O�H u�[�Ok�C�=nŠ{O`�PO�+WN�YX�a�狍��5�+*|^����?Լ��n���I�v�vC�W�N�wh���u�6�^���H�� �`+UP������B?�+�n!"TFu�:;�*�ԴCxo,�q��&�4���ne>����L릏��#}���g�$�W�d����O�ZkC��;�г��;�dZ�aݶ�J`���=�c0���=��if����oٵ��Ӭs�. �sIk�X�=�0~#X:��q�yG�� Sh�[���u� -5i�GQ�E��вZE�Dq�ym��ਨߗ���+0��d����h:�|��F^�Qg�Ϡc\�a�^k�+�tz����f��U����q+ج�6F۱��h�Ɔ��'�Ayv�o���=��U�Y鹅��U�5��4{\ñ�->}�����##N��8x�sX�x�;��)�鯣�,��+g~�U�-9r!y�����~�>�h$e]��4^�9�qˎ�:l��Lb*�஻s�f,��ZL{Z�-�D%BH�R!*PN�j�)� �,X���D�.m,Œz�褕�[��:6WW$m�P�*U� �Ϊա�DJ,�J�!@�ZU�1VV�J� Hn��=Vq�KUa�*��U��aP�*���iCf��O�M��8��XK٘���S�=��lK��P ����,�ס"U�� �p�r#"3 #�J�>� $��,��ٚ�h!TB :����[���P���! 8"�!�M�T ��@�H�i;A��R���}��|�PaO)|�?Q����;#Оj�A2��kO�T�2�Cv o* �5����=��Axr��N3Qab�v��ඊ�N�����f��q��B��hWV��o��[p�B�U�`�F�YB��#��-4�p޳Ee٥c��ֶ�?j{QKќ����I�t��\e!�`����H�$��J��ؽ���q�.�|~_�˗�c�v�tf�k�b����iFKOgaܷ!���y�`��Nd� ���њe�Q��jy���������' ��|����Ci�T�)�"��PF�U�=��;�pۭ��u A�*2T!!!! �+I��v� �=D�`%$TBH� �񦲵t����K^�#�r;]�w��Y�lk&�Ĝ�㋜w�S�T>8�E�� ���FPIJ��M(�J�PR%B)r� p ��7�*D���;;8�_+}��?G�e�G��_�v(Ve~��}<���^�����`� �0�賭?Gv����s�m<׬ ��˔�f��^'i腽�ٞ�?�1�ɦ�*�a�<$�H��9��{�������T.��/�c�G�F4۫ܭ��K߳Ŏ���w�hV�����b�X�J�:5�ɟ,����$]�����߻|3x����y�+wG�p�%�V�����څ�r�~\�Q��%��չYѥ HH�*�1MS���Z�HJ��� P�6(�T))�@%H��!Q,L�h�q�P�U*˕U��<.��>#B�e ����:+�M���F��\��X� xK H¦SmK��֧,��[��,Rk�u��{.��5����@!@ �n?h�)Sm�x�(Zi�ܒ� VZB* Z��?/r?i���X0�"����t��y��F?N'��r\�.��c7UmN��a{ ���<�YF7 aZl'<�v�SO�w����ixރ^�v����޶}۽ҹ�Ci\J�^�:�h�g�5k,�>�X���/�ෂۅBBR�V���ɴV�� KLiN�]e#�@91�۷l�2�4�R�Ay�cuos�4.Y�|�"�\Mf����ku2��N<<�ޓ�?��H�ok#�&���@r ��(8���+N=ƴ4T묝�G%�ڨ}W���ݩ�NsH�Q5m��o|��ä́� ���ݑݚ�,V�L��5�9�c�����tn��\#2{�5�~T\98e�;z8��>�N��r�b� ��as#K6qB J������{��֣$#"�vYu^�YM��B�g��w��[�EB�@BHUEz�I��;@��H1�O�B���+Zi[C�?�w��`�B�{}���A�@7�P�@4���ɒ�r�d�D\Ƽ6[v�c�T V��s�*+te����Xk�r�f����������� hoXt��v{��{�q�Z|���)����-�y��� ����������鈕��)�� q:SQ$��5��8c�x���E�„l$� m/�J�"��Z�֙���1܆�oV�ު��N�J^+��z�:�����gY٭n�Vz���_�ӄ�+�����n��I,�[o����� Y��5��\m�b��1/cĮ��1� sh�I�Գ����.�Թw�n�K���Wڻ���XGS�:�\cv�-��%gYtV�&�v �rc��Gy[���U�5��.�4pc��M� �Fgvc h��G�FG�޹p۴u���۷kZ�V�������Q�u��S��ҧ�i����[��)~�ޏ_��Z�~>2��N���S?[O ��y�jk��8�lƮ|���#���T]lт'{�X��E�@{����c���dk:�M� ��|6Ԧb6QX��! �!*D��Ui�*�, R!eVXj�0�R�t�BsR�Q��FS��#=���}A]��Ϣٱ���w��%罽��QB� u��� -�x�(Zm}��ђ*��ڢ�9"Kȼ���q�q�-.<�W17q�����j�2V�������ǀ}��w�9��Nfh��ƃ̯?5�v�&ݭ����/�` g�<�Ȟ�qh�(�s��� �Q�M&~���q���.{ޱ;Gp�\J�N��z��G��}�:��Ē7���Ի+�i�(ro����6��(�RP`�4�����:�h�f� ��Z䫎� �-�g���8��~ �����q�ʮYLq�9,��Ӏ�FPǍݧ?2v-;4 ����$�gz��g m5�����V�c#Ŗ[�T�u=�YV�mq&�qZeb�08 w��RX�t�����mܝ�( iy��#0ѫ��֛M+��7X/�6��V�5�2�=�8�F Iߏ%cGC� ���м�������i�oX�M�aROy����vn.Y�����ol���f������ra��c�Ifsy����p���pW肼�e�w^�p��PT�N[�XY.���i��&�����8+V;[%e���� � Yi��vEH�é�� �S�0���+�Ll������i���M%�Xv:R+��^Y�@ޖ���!���Ni�/���4d��T$�@�M��ȣ�n�"��0斜������h�̔ޕ�i]u��%�7��� ����|�:���+xr���sϋ ���0�#�ˢ���@��,;�1�cZoґ���Z�WmV[�1�rD潍����o4���M֍�Ecv����_�q��g�(h��#� �X����A1a]s�3�U�,f�M�1��ch{��\��s�vF e��u���Q�7o�W.|򚩏���6� �ݩoi�P��p{Mx�,��@�flvGL�5βFe��ը5ִ����(��Ȭ�˖V���?�"�w�c��d��#�|�����GZ��9�vx��������/�X�+ a%��^>AX�[�ȥ�[�ȫy���x0�����D"7�q8��>��7��A sZ^{�n_-�ڇ�� �¸��'ZݾE'ZݾE\���j��a��= �kv�u���Wd� D�����$0ct1{�ɣd id���..�h$�p��YQ�<��hq���f�N�uQ\f�9]E���B\��<������ �V�e�ȠJݾEzdy�=7�n�"���|�!h�����ȣ�n�"�?Ih+5�Rx"�{�/��x�;L}��٤|S]Yb>=��W�u���Q}�|����n��΋�`�pVF�"��0�������+Mtf�k��]������Ϊ̇��$+�4�с�$��������y���HX�C����*�y�E�v+%H��R�$����Z�FBUWaP����~G���=�*�,BT�ZTX�෋5"�GjNs���R��A�'ih?����Ay��DB���X�ʏ?�!����Bm��Šz/�_ ���%���A�[��;����� N���