ssl_des/lib/openssl/crypto/aes/asm/aes-c64xplus.pl
2019-04-06 16:42:39 +03:00

1382 lines
43 KiB
Perl

#! /usr/bin/env perl
# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
# in the file LICENSE in the source distribution or at
# https://www.openssl.org/source/license.html
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# [Endian-neutral] AES for C64x+.
#
# Even though SPLOOPs are scheduled for 13 cycles, and thus expected
# performance is ~8.5 cycles per byte processed with 128-bit key,
# measured performance turned to be ~10 cycles per byte. Discrepancy
# must be caused by limitations of L1D memory banking(*), see SPRU871
# TI publication for further details. If any consolation it's still
# ~20% faster than TI's linear assembly module anyway... Compared to
# aes_core.c compiled with cl6x 6.0 with -mv6400+ -o2 options this
# code is 3.75x faster and almost 3x smaller (tables included).
#
# (*) This means that there might be subtle correlation between data
# and timing and one can wonder if it can be ... attacked:-(
# On the other hand this also means that *if* one chooses to
# implement *4* T-tables variant [instead of 1 T-table as in
# this implementation, or in addition to], then one ought to
# *interleave* them. Even though it complicates addressing,
# references to interleaved tables would be guaranteed not to
# clash. I reckon that it should be possible to break 8 cycles
# per byte "barrier," i.e. improve by ~20%, naturally at the
# cost of 8x increased pressure on L1D. 8x because you'd have
# to interleave both Te and Td tables...
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
($TEA,$TEB)=("A5","B5");
($KPA,$KPB)=("A3","B1");
@K=("A6","B6","A7","B7");
@s=("A8","B8","A9","B9");
@Te0=@Td0=("A16","B16","A17","B17");
@Te1=@Td1=("A18","B18","A19","B19");
@Te2=@Td2=("A20","B20","A21","B21");
@Te3=@Td3=("A22","B22","A23","B23");
$code=<<___;
.text
.if .ASSEMBLER_VERSION<7000000
.asg 0,__TI_EABI__
.endif
.if __TI_EABI__
.nocmp
.asg AES_encrypt,_AES_encrypt
.asg AES_decrypt,_AES_decrypt
.asg AES_set_encrypt_key,_AES_set_encrypt_key
.asg AES_set_decrypt_key,_AES_set_decrypt_key
.asg AES_ctr32_encrypt,_AES_ctr32_encrypt
.endif
.asg B3,RA
.asg A4,INP
.asg B4,OUT
.asg A6,KEY
.asg A4,RET
.asg B15,SP
.eval 24,EXT0
.eval 16,EXT1
.eval 8,EXT2
.eval 0,EXT3
.eval 8,TBL1
.eval 16,TBL2
.eval 24,TBL3
.if .BIG_ENDIAN
.eval 24-EXT0,EXT0
.eval 24-EXT1,EXT1
.eval 24-EXT2,EXT2
.eval 24-EXT3,EXT3
.eval 32-TBL1,TBL1
.eval 32-TBL2,TBL2
.eval 32-TBL3,TBL3
.endif
.global _AES_encrypt
_AES_encrypt:
.asmfunc
MVK 1,B2
__encrypt:
.if __TI_EABI__
[B2] LDNDW *INP++,A9:A8 ; load input
|| MVKL \$PCR_OFFSET(AES_Te,__encrypt),$TEA
|| ADDKPC __encrypt,B0
[B2] LDNDW *INP++,B9:B8
|| MVKH \$PCR_OFFSET(AES_Te,__encrypt),$TEA
|| ADD 0,KEY,$KPA
|| ADD 4,KEY,$KPB
.else
[B2] LDNDW *INP++,A9:A8 ; load input
|| MVKL (AES_Te-__encrypt),$TEA
|| ADDKPC __encrypt,B0
[B2] LDNDW *INP++,B9:B8
|| MVKH (AES_Te-__encrypt),$TEA
|| ADD 0,KEY,$KPA
|| ADD 4,KEY,$KPB
.endif
LDW *$KPA++[2],$Te0[0] ; zero round key
|| LDW *$KPB++[2],$Te0[1]
|| MVK 60,A0
|| ADD B0,$TEA,$TEA ; AES_Te
LDW *KEY[A0],B0 ; rounds
|| MVK 1024,A0 ; sizeof(AES_Te)
LDW *$KPA++[2],$Te0[2]
|| LDW *$KPB++[2],$Te0[3]
|| MV $TEA,$TEB
NOP
.if .BIG_ENDIAN
MV A9,$s[0]
|| MV A8,$s[1]
|| MV B9,$s[2]
|| MV B8,$s[3]
.else
MV A8,$s[0]
|| MV A9,$s[1]
|| MV B8,$s[2]
|| MV B9,$s[3]
.endif
XOR $Te0[0],$s[0],$s[0]
|| XOR $Te0[1],$s[1],$s[1]
|| LDW *$KPA++[2],$K[0] ; 1st round key
|| LDW *$KPB++[2],$K[1]
SUB B0,2,B0
SPLOOPD 13
|| MVC B0,ILC
|| LDW *$KPA++[2],$K[2]
|| LDW *$KPB++[2],$K[3]
;;====================================================================
EXTU $s[1],EXT1,24,$Te1[1]
|| EXTU $s[0],EXT3,24,$Te3[0]
LDW *${TEB}[$Te1[1]],$Te1[1] ; Te1[s1>>8], t0
|| LDW *${TEA}[$Te3[0]],$Te3[0] ; Te3[s0>>24], t1
|| XOR $s[2],$Te0[2],$s[2] ; modulo-scheduled
|| XOR $s[3],$Te0[3],$s[3] ; modulo-scheduled
|| EXTU $s[1],EXT3,24,$Te3[1]
|| EXTU $s[0],EXT1,24,$Te1[0]
LDW *${TEB}[$Te3[1]],$Te3[1] ; Te3[s1>>24], t2
|| LDW *${TEA}[$Te1[0]],$Te1[0] ; Te1[s0>>8], t3
|| EXTU $s[2],EXT2,24,$Te2[2]
|| EXTU $s[3],EXT2,24,$Te2[3]
LDW *${TEA}[$Te2[2]],$Te2[2] ; Te2[s2>>16], t0
|| LDW *${TEB}[$Te2[3]],$Te2[3] ; Te2[s3>>16], t1
|| EXTU $s[3],EXT3,24,$Te3[3]
|| EXTU $s[2],EXT1,24,$Te1[2]
LDW *${TEB}[$Te3[3]],$Te3[3] ; Te3[s3>>24], t0
|| LDW *${TEA}[$Te1[2]],$Te1[2] ; Te1[s2>>8], t1
|| EXTU $s[0],EXT2,24,$Te2[0]
|| EXTU $s[1],EXT2,24,$Te2[1]
LDW *${TEA}[$Te2[0]],$Te2[0] ; Te2[s0>>16], t2
|| LDW *${TEB}[$Te2[1]],$Te2[1] ; Te2[s1>>16], t3
|| EXTU $s[3],EXT1,24,$Te1[3]
|| EXTU $s[2],EXT3,24,$Te3[2]
LDW *${TEB}[$Te1[3]],$Te1[3] ; Te1[s3>>8], t2
|| LDW *${TEA}[$Te3[2]],$Te3[2] ; Te3[s2>>24], t3
|| ROTL $Te1[1],TBL1,$Te3[0] ; t0
|| ROTL $Te3[0],TBL3,$Te1[1] ; t1
|| EXTU $s[0],EXT0,24,$Te0[0]
|| EXTU $s[1],EXT0,24,$Te0[1]
LDW *${TEA}[$Te0[0]],$Te0[0] ; Te0[s0], t0
|| LDW *${TEB}[$Te0[1]],$Te0[1] ; Te0[s1], t1
|| ROTL $Te3[1],TBL3,$Te1[0] ; t2
|| ROTL $Te1[0],TBL1,$Te3[1] ; t3
|| EXTU $s[2],EXT0,24,$Te0[2]
|| EXTU $s[3],EXT0,24,$Te0[3]
LDW *${TEA}[$Te0[2]],$Te0[2] ; Te0[s2], t2
|| LDW *${TEB}[$Te0[3]],$Te0[3] ; Te0[s3], t3
|| ROTL $Te2[2],TBL2,$Te2[2] ; t0
|| ROTL $Te2[3],TBL2,$Te2[3] ; t1
|| XOR $K[0],$Te3[0],$s[0]
|| XOR $K[1],$Te1[1],$s[1]
ROTL $Te3[3],TBL3,$Te1[2] ; t0
|| ROTL $Te1[2],TBL1,$Te3[3] ; t1
|| XOR $K[2],$Te1[0],$s[2]
|| XOR $K[3],$Te3[1],$s[3]
|| LDW *$KPA++[2],$K[0] ; next round key
|| LDW *$KPB++[2],$K[1]
ROTL $Te2[0],TBL2,$Te2[0] ; t2
|| ROTL $Te2[1],TBL2,$Te2[1] ; t3
|| XOR $s[0],$Te2[2],$s[0]
|| XOR $s[1],$Te2[3],$s[1]
|| LDW *$KPA++[2],$K[2]
|| LDW *$KPB++[2],$K[3]
ROTL $Te1[3],TBL1,$Te3[2] ; t2
|| ROTL $Te3[2],TBL3,$Te1[3] ; t3
|| XOR $s[0],$Te1[2],$s[0]
|| XOR $s[1],$Te3[3],$s[1]
XOR $s[2],$Te2[0],$s[2]
|| XOR $s[3],$Te2[1],$s[3]
|| XOR $s[0],$Te0[0],$s[0]
|| XOR $s[1],$Te0[1],$s[1]
SPKERNEL
|| XOR.L $s[2],$Te3[2],$s[2]
|| XOR.L $s[3],$Te1[3],$s[3]
;;====================================================================
ADD.D ${TEA},A0,${TEA} ; point to Te4
|| ADD.D ${TEB},A0,${TEB}
|| EXTU $s[1],EXT1,24,$Te1[1]
|| EXTU $s[0],EXT3,24,$Te3[0]
LDBU *${TEB}[$Te1[1]],$Te1[1] ; Te1[s1>>8], t0
|| LDBU *${TEA}[$Te3[0]],$Te3[0] ; Te3[s0>>24], t1
|| XOR $s[2],$Te0[2],$s[2] ; modulo-scheduled
|| XOR $s[3],$Te0[3],$s[3] ; modulo-scheduled
|| EXTU $s[0],EXT0,24,$Te0[0]
|| EXTU $s[1],EXT0,24,$Te0[1]
LDBU *${TEA}[$Te0[0]],$Te0[0] ; Te0[s0], t0
|| LDBU *${TEB}[$Te0[1]],$Te0[1] ; Te0[s1], t1
|| EXTU $s[3],EXT3,24,$Te3[3]
|| EXTU $s[2],EXT1,24,$Te1[2]
LDBU *${TEB}[$Te3[3]],$Te3[3] ; Te3[s3>>24], t0
|| LDBU *${TEA}[$Te1[2]],$Te1[2] ; Te1[s2>>8], t1
|| EXTU $s[2],EXT2,24,$Te2[2]
|| EXTU $s[3],EXT2,24,$Te2[3]
LDBU *${TEA}[$Te2[2]],$Te2[2] ; Te2[s2>>16], t0
|| LDBU *${TEB}[$Te2[3]],$Te2[3] ; Te2[s3>>16], t1
|| EXTU $s[1],EXT3,24,$Te3[1]
|| EXTU $s[0],EXT1,24,$Te1[0]
LDBU *${TEB}[$Te3[1]],$Te3[1] ; Te3[s1>>24], t2
|| LDBU *${TEA}[$Te1[0]],$Te1[0] ; Te1[s0>>8], t3
|| EXTU $s[3],EXT1,24,$Te1[3]
|| EXTU $s[2],EXT3,24,$Te3[2]
LDBU *${TEB}[$Te1[3]],$Te1[3] ; Te1[s3>>8], t2
|| LDBU *${TEA}[$Te3[2]],$Te3[2] ; Te3[s2>>24], t3
|| EXTU $s[2],EXT0,24,$Te0[2]
|| EXTU $s[3],EXT0,24,$Te0[3]
LDBU *${TEA}[$Te0[2]],$Te0[2] ; Te0[s2], t2
|| LDBU *${TEB}[$Te0[3]],$Te0[3] ; Te0[s3], t3
|| EXTU $s[0],EXT2,24,$Te2[0]
|| EXTU $s[1],EXT2,24,$Te2[1]
LDBU *${TEA}[$Te2[0]],$Te2[0] ; Te2[s0>>16], t2
|| LDBU *${TEB}[$Te2[1]],$Te2[1] ; Te2[s1>>16], t3
.if .BIG_ENDIAN
PACK2 $Te0[0],$Te1[1],$Te0[0]
|| PACK2 $Te0[1],$Te1[2],$Te0[1]
PACK2 $Te2[2],$Te3[3],$Te2[2]
|| PACK2 $Te2[3],$Te3[0],$Te2[3]
PACKL4 $Te0[0],$Te2[2],$Te0[0]
|| PACKL4 $Te0[1],$Te2[3],$Te0[1]
XOR $K[0],$Te0[0],$Te0[0] ; s[0]
|| XOR $K[1],$Te0[1],$Te0[1] ; s[1]
PACK2 $Te0[2],$Te1[3],$Te0[2]
|| PACK2 $Te0[3],$Te1[0],$Te0[3]
PACK2 $Te2[0],$Te3[1],$Te2[0]
|| PACK2 $Te2[1],$Te3[2],$Te2[1]
|| BNOP RA
PACKL4 $Te0[2],$Te2[0],$Te0[2]
|| PACKL4 $Te0[3],$Te2[1],$Te0[3]
XOR $K[2],$Te0[2],$Te0[2] ; s[2]
|| XOR $K[3],$Te0[3],$Te0[3] ; s[3]
MV $Te0[0],A9
|| MV $Te0[1],A8
MV $Te0[2],B9
|| MV $Te0[3],B8
|| [B2] STNDW A9:A8,*OUT++
[B2] STNDW B9:B8,*OUT++
.else
PACK2 $Te1[1],$Te0[0],$Te1[1]
|| PACK2 $Te1[2],$Te0[1],$Te1[2]
PACK2 $Te3[3],$Te2[2],$Te3[3]
|| PACK2 $Te3[0],$Te2[3],$Te3[0]
PACKL4 $Te3[3],$Te1[1],$Te1[1]
|| PACKL4 $Te3[0],$Te1[2],$Te1[2]
XOR $K[0],$Te1[1],$Te1[1] ; s[0]
|| XOR $K[1],$Te1[2],$Te1[2] ; s[1]
PACK2 $Te1[3],$Te0[2],$Te1[3]
|| PACK2 $Te1[0],$Te0[3],$Te1[0]
PACK2 $Te3[1],$Te2[0],$Te3[1]
|| PACK2 $Te3[2],$Te2[1],$Te3[2]
|| BNOP RA
PACKL4 $Te3[1],$Te1[3],$Te1[3]
|| PACKL4 $Te3[2],$Te1[0],$Te1[0]
XOR $K[2],$Te1[3],$Te1[3] ; s[2]
|| XOR $K[3],$Te1[0],$Te1[0] ; s[3]
MV $Te1[1],A8
|| MV $Te1[2],A9
MV $Te1[3],B8
|| MV $Te1[0],B9
|| [B2] STNDW A9:A8,*OUT++
[B2] STNDW B9:B8,*OUT++
.endif
.endasmfunc
.global _AES_decrypt
_AES_decrypt:
.asmfunc
MVK 1,B2
__decrypt:
.if __TI_EABI__
[B2] LDNDW *INP++,A9:A8 ; load input
|| MVKL \$PCR_OFFSET(AES_Td,__decrypt),$TEA
|| ADDKPC __decrypt,B0
[B2] LDNDW *INP++,B9:B8
|| MVKH \$PCR_OFFSET(AES_Td,__decrypt),$TEA
|| ADD 0,KEY,$KPA
|| ADD 4,KEY,$KPB
.else
[B2] LDNDW *INP++,A9:A8 ; load input
|| MVKL (AES_Td-__decrypt),$TEA
|| ADDKPC __decrypt,B0
[B2] LDNDW *INP++,B9:B8
|| MVKH (AES_Td-__decrypt),$TEA
|| ADD 0,KEY,$KPA
|| ADD 4,KEY,$KPB
.endif
LDW *$KPA++[2],$Td0[0] ; zero round key
|| LDW *$KPB++[2],$Td0[1]
|| MVK 60,A0
|| ADD B0,$TEA,$TEA ; AES_Td
LDW *KEY[A0],B0 ; rounds
|| MVK 1024,A0 ; sizeof(AES_Td)
LDW *$KPA++[2],$Td0[2]
|| LDW *$KPB++[2],$Td0[3]
|| MV $TEA,$TEB
NOP
.if .BIG_ENDIAN
MV A9,$s[0]
|| MV A8,$s[1]
|| MV B9,$s[2]
|| MV B8,$s[3]
.else
MV A8,$s[0]
|| MV A9,$s[1]
|| MV B8,$s[2]
|| MV B9,$s[3]
.endif
XOR $Td0[0],$s[0],$s[0]
|| XOR $Td0[1],$s[1],$s[1]
|| LDW *$KPA++[2],$K[0] ; 1st round key
|| LDW *$KPB++[2],$K[1]
SUB B0,2,B0
SPLOOPD 13
|| MVC B0,ILC
|| LDW *$KPA++[2],$K[2]
|| LDW *$KPB++[2],$K[3]
;;====================================================================
EXTU $s[1],EXT3,24,$Td3[1]
|| EXTU $s[0],EXT1,24,$Td1[0]
LDW *${TEB}[$Td3[1]],$Td3[1] ; Td3[s1>>24], t0
|| LDW *${TEA}[$Td1[0]],$Td1[0] ; Td1[s0>>8], t1
|| XOR $s[2],$Td0[2],$s[2] ; modulo-scheduled
|| XOR $s[3],$Td0[3],$s[3] ; modulo-scheduled
|| EXTU $s[1],EXT1,24,$Td1[1]
|| EXTU $s[0],EXT3,24,$Td3[0]
LDW *${TEB}[$Td1[1]],$Td1[1] ; Td1[s1>>8], t2
|| LDW *${TEA}[$Td3[0]],$Td3[0] ; Td3[s0>>24], t3
|| EXTU $s[2],EXT2,24,$Td2[2]
|| EXTU $s[3],EXT2,24,$Td2[3]
LDW *${TEA}[$Td2[2]],$Td2[2] ; Td2[s2>>16], t0
|| LDW *${TEB}[$Td2[3]],$Td2[3] ; Td2[s3>>16], t1
|| EXTU $s[3],EXT1,24,$Td1[3]
|| EXTU $s[2],EXT3,24,$Td3[2]
LDW *${TEB}[$Td1[3]],$Td1[3] ; Td1[s3>>8], t0
|| LDW *${TEA}[$Td3[2]],$Td3[2] ; Td3[s2>>24], t1
|| EXTU $s[0],EXT2,24,$Td2[0]
|| EXTU $s[1],EXT2,24,$Td2[1]
LDW *${TEA}[$Td2[0]],$Td2[0] ; Td2[s0>>16], t2
|| LDW *${TEB}[$Td2[1]],$Td2[1] ; Td2[s1>>16], t3
|| EXTU $s[3],EXT3,24,$Td3[3]
|| EXTU $s[2],EXT1,24,$Td1[2]
LDW *${TEB}[$Td3[3]],$Td3[3] ; Td3[s3>>24], t2
|| LDW *${TEA}[$Td1[2]],$Td1[2] ; Td1[s2>>8], t3
|| ROTL $Td3[1],TBL3,$Td1[0] ; t0
|| ROTL $Td1[0],TBL1,$Td3[1] ; t1
|| EXTU $s[0],EXT0,24,$Td0[0]
|| EXTU $s[1],EXT0,24,$Td0[1]
LDW *${TEA}[$Td0[0]],$Td0[0] ; Td0[s0], t0
|| LDW *${TEB}[$Td0[1]],$Td0[1] ; Td0[s1], t1
|| ROTL $Td1[1],TBL1,$Td3[0] ; t2
|| ROTL $Td3[0],TBL3,$Td1[1] ; t3
|| EXTU $s[2],EXT0,24,$Td0[2]
|| EXTU $s[3],EXT0,24,$Td0[3]
LDW *${TEA}[$Td0[2]],$Td0[2] ; Td0[s2], t2
|| LDW *${TEB}[$Td0[3]],$Td0[3] ; Td0[s3], t3
|| ROTL $Td2[2],TBL2,$Td2[2] ; t0
|| ROTL $Td2[3],TBL2,$Td2[3] ; t1
|| XOR $K[0],$Td1[0],$s[0]
|| XOR $K[1],$Td3[1],$s[1]
ROTL $Td1[3],TBL1,$Td3[2] ; t0
|| ROTL $Td3[2],TBL3,$Td1[3] ; t1
|| XOR $K[2],$Td3[0],$s[2]
|| XOR $K[3],$Td1[1],$s[3]
|| LDW *$KPA++[2],$K[0] ; next round key
|| LDW *$KPB++[2],$K[1]
ROTL $Td2[0],TBL2,$Td2[0] ; t2
|| ROTL $Td2[1],TBL2,$Td2[1] ; t3
|| XOR $s[0],$Td2[2],$s[0]
|| XOR $s[1],$Td2[3],$s[1]
|| LDW *$KPA++[2],$K[2]
|| LDW *$KPB++[2],$K[3]
ROTL $Td3[3],TBL3,$Td1[2] ; t2
|| ROTL $Td1[2],TBL1,$Td3[3] ; t3
|| XOR $s[0],$Td3[2],$s[0]
|| XOR $s[1],$Td1[3],$s[1]
XOR $s[2],$Td2[0],$s[2]
|| XOR $s[3],$Td2[1],$s[3]
|| XOR $s[0],$Td0[0],$s[0]
|| XOR $s[1],$Td0[1],$s[1]
SPKERNEL
|| XOR.L $s[2],$Td1[2],$s[2]
|| XOR.L $s[3],$Td3[3],$s[3]
;;====================================================================
ADD.D ${TEA},A0,${TEA} ; point to Td4
|| ADD.D ${TEB},A0,${TEB}
|| EXTU $s[1],EXT3,24,$Td3[1]
|| EXTU $s[0],EXT1,24,$Td1[0]
LDBU *${TEB}[$Td3[1]],$Td3[1] ; Td3[s1>>24], t0
|| LDBU *${TEA}[$Td1[0]],$Td1[0] ; Td1[s0>>8], t1
|| XOR $s[2],$Td0[2],$s[2] ; modulo-scheduled
|| XOR $s[3],$Td0[3],$s[3] ; modulo-scheduled
|| EXTU $s[0],EXT0,24,$Td0[0]
|| EXTU $s[1],EXT0,24,$Td0[1]
LDBU *${TEA}[$Td0[0]],$Td0[0] ; Td0[s0], t0
|| LDBU *${TEB}[$Td0[1]],$Td0[1] ; Td0[s1], t1
|| EXTU $s[2],EXT2,24,$Td2[2]
|| EXTU $s[3],EXT2,24,$Td2[3]
LDBU *${TEA}[$Td2[2]],$Td2[2] ; Td2[s2>>16], t0
|| LDBU *${TEB}[$Td2[3]],$Td2[3] ; Td2[s3>>16], t1
|| EXTU $s[3],EXT1,24,$Td1[3]
|| EXTU $s[2],EXT3,24,$Td3[2]
LDBU *${TEB}[$Td1[3]],$Td1[3] ; Td1[s3>>8], t0
|| LDBU *${TEA}[$Td3[2]],$Td3[2] ; Td3[s2>>24], t1
|| EXTU $s[1],EXT1,24,$Td1[1]
|| EXTU $s[0],EXT3,24,$Td3[0]
LDBU *${TEB}[$Td1[1]],$Td1[1] ; Td1[s1>>8], t2
|| LDBU *${TEA}[$Td3[0]],$Td3[0] ; Td3[s0>>24], t3
|| EXTU $s[0],EXT2,24,$Td2[0]
|| EXTU $s[1],EXT2,24,$Td2[1]
LDBU *${TEA}[$Td2[0]],$Td2[0] ; Td2[s0>>16], t2
|| LDBU *${TEB}[$Td2[1]],$Td2[1] ; Td2[s1>>16], t3
|| EXTU $s[3],EXT3,24,$Td3[3]
|| EXTU $s[2],EXT1,24,$Td1[2]
LDBU *${TEB}[$Td3[3]],$Td3[3] ; Td3[s3>>24], t2
|| LDBU *${TEA}[$Td1[2]],$Td1[2] ; Td1[s2>>8], t3
|| EXTU $s[2],EXT0,24,$Td0[2]
|| EXTU $s[3],EXT0,24,$Td0[3]
LDBU *${TEA}[$Td0[2]],$Td0[2] ; Td0[s2], t2
|| LDBU *${TEB}[$Td0[3]],$Td0[3] ; Td0[s3], t3
.if .BIG_ENDIAN
PACK2 $Td0[0],$Td1[3],$Td0[0]
|| PACK2 $Td0[1],$Td1[0],$Td0[1]
PACK2 $Td2[2],$Td3[1],$Td2[2]
|| PACK2 $Td2[3],$Td3[2],$Td2[3]
PACKL4 $Td0[0],$Td2[2],$Td0[0]
|| PACKL4 $Td0[1],$Td2[3],$Td0[1]
XOR $K[0],$Td0[0],$Td0[0] ; s[0]
|| XOR $K[1],$Td0[1],$Td0[1] ; s[1]
PACK2 $Td0[2],$Td1[1],$Td0[2]
|| PACK2 $Td0[3],$Td1[2],$Td0[3]
PACK2 $Td2[0],$Td3[3],$Td2[0]
|| PACK2 $Td2[1],$Td3[0],$Td2[1]
|| BNOP RA
PACKL4 $Td0[2],$Td2[0],$Td0[2]
|| PACKL4 $Td0[3],$Td2[1],$Td0[3]
XOR $K[2],$Td0[2],$Td0[2] ; s[2]
|| XOR $K[3],$Td0[3],$Td0[3] ; s[3]
MV $Td0[0],A9
|| MV $Td0[1],A8
MV $Td0[2],B9
|| MV $Td0[3],B8
|| [B2] STNDW A9:A8,*OUT++
[B2] STNDW B9:B8,*OUT++
.else
PACK2 $Td1[3],$Td0[0],$Td1[3]
|| PACK2 $Td1[0],$Td0[1],$Td1[0]
PACK2 $Td3[1],$Td2[2],$Td3[1]
|| PACK2 $Td3[2],$Td2[3],$Td3[2]
PACKL4 $Td3[1],$Td1[3],$Td1[3]
|| PACKL4 $Td3[2],$Td1[0],$Td1[0]
XOR $K[0],$Td1[3],$Td1[3] ; s[0]
|| XOR $K[1],$Td1[0],$Td1[0] ; s[1]
PACK2 $Td1[1],$Td0[2],$Td1[1]
|| PACK2 $Td1[2],$Td0[3],$Td1[2]
PACK2 $Td3[3],$Td2[0],$Td3[3]
|| PACK2 $Td3[0],$Td2[1],$Td3[0]
|| BNOP RA
PACKL4 $Td3[3],$Td1[1],$Td1[1]
|| PACKL4 $Td3[0],$Td1[2],$Td1[2]
XOR $K[2],$Td1[1],$Td1[1] ; s[2]
|| XOR $K[3],$Td1[2],$Td1[2] ; s[3]
MV $Td1[3],A8
|| MV $Td1[0],A9
MV $Td1[1],B8
|| MV $Td1[2],B9
|| [B2] STNDW A9:A8,*OUT++
[B2] STNDW B9:B8,*OUT++
.endif
.endasmfunc
___
{
my @K=(@K,@s); # extended key
my @Te4=map("B$_",(16..19));
my @Kx9=@Te0; # used in AES_set_decrypt_key
my @KxB=@Te1;
my @KxD=@Te2;
my @KxE=@Te3;
$code.=<<___;
.asg OUT,BITS
.global _AES_set_encrypt_key
_AES_set_encrypt_key:
__set_encrypt_key:
.asmfunc
MV INP,A0
|| SHRU BITS,5,BITS ; 128-192-256 -> 4-6-8
|| MV KEY,A1
[!A0] B RA
||[!A0] MVK -1,RET
||[!A0] MVK 1,A1 ; only one B RA
[!A1] B RA
||[!A1] MVK -1,RET
||[!A1] MVK 0,A0
|| MVK 0,B0
|| MVK 0,A1
[A0] LDNDW *INP++,A9:A8
|| [A0] CMPEQ 4,BITS,B0
|| [A0] CMPLT 3,BITS,A1
[B0] B key128?
|| [A1] LDNDW *INP++,B9:B8
|| [A0] CMPEQ 6,BITS,B0
|| [A0] CMPLT 5,BITS,A1
[B0] B key192?
|| [A1] LDNDW *INP++,B17:B16
|| [A0] CMPEQ 8,BITS,B0
|| [A0] CMPLT 7,BITS,A1
[B0] B key256?
|| [A1] LDNDW *INP++,B19:B18
.if __TI_EABI__
[A0] ADD 0,KEY,$KPA
|| [A0] ADD 4,KEY,$KPB
|| [A0] MVKL \$PCR_OFFSET(AES_Te4,__set_encrypt_key),$TEA
|| [A0] ADDKPC __set_encrypt_key,B6
[A0] MVKH \$PCR_OFFSET(AES_Te4,__set_encrypt_key),$TEA
[A0] ADD B6,$TEA,$TEA ; AES_Te4
.else
[A0] ADD 0,KEY,$KPA
|| [A0] ADD 4,KEY,$KPB
|| [A0] MVKL (AES_Te4-__set_encrypt_key),$TEA
|| [A0] ADDKPC __set_encrypt_key,B6
[A0] MVKH (AES_Te4-__set_encrypt_key),$TEA
[A0] ADD B6,$TEA,$TEA ; AES_Te4
.endif
NOP
NOP
BNOP RA,5
|| MVK -2,RET ; unknown bit length
|| MVK 0,B0 ; redundant
;;====================================================================
;;====================================================================
key128?:
.if .BIG_ENDIAN
MV A9,$K[0]
|| MV A8,$K[1]
|| MV B9,$Te4[2]
|| MV B8,$K[3]
.else
MV A8,$K[0]
|| MV A9,$K[1]
|| MV B8,$Te4[2]
|| MV B9,$K[3]
.endif
MVK 256,A0
|| MVK 9,B0
SPLOOPD 14
|| MVC B0,ILC
|| MV $TEA,$TEB
|| ADD $TEA,A0,A30 ; rcon
;;====================================================================
LDW *A30++[1],A31 ; rcon[i]
|| MV $Te4[2],$K[2]
|| EXTU $K[3],EXT1,24,$Te4[0]
LDBU *${TEB}[$Te4[0]],$Te4[0]
|| MV $K[3],A0
|| EXTU $K[3],EXT2,24,$Te4[1]
LDBU *${TEB}[$Te4[1]],$Te4[1]
|| EXTU A0,EXT3,24,A0
|| EXTU $K[3],EXT0,24,$Te4[3]
.if .BIG_ENDIAN
LDBU *${TEA}[A0],$Te4[3]
|| LDBU *${TEB}[$Te4[3]],A0
.else
LDBU *${TEA}[A0],A0
|| LDBU *${TEB}[$Te4[3]],$Te4[3]
.endif
STW $K[0],*$KPA++[2]
|| STW $K[1],*$KPB++[2]
STW $K[2],*$KPA++[2]
|| STW $K[3],*$KPB++[2]
XOR A31,$K[0],$K[0] ; ^=rcon[i]
.if .BIG_ENDIAN
PACK2 $Te4[0],$Te4[1],$Te4[1]
PACK2 $Te4[3],A0,$Te4[3]
PACKL4 $Te4[1],$Te4[3],$Te4[3]
.else
PACK2 $Te4[1],$Te4[0],$Te4[1]
PACK2 $Te4[3],A0,$Te4[3]
PACKL4 $Te4[3],$Te4[1],$Te4[3]
.endif
XOR $Te4[3],$K[0],$Te4[0] ; K[0]
XOR $Te4[0],$K[1],$K[1] ; K[1]
MV $Te4[0],$K[0]
|| XOR $K[1],$K[2],$Te4[2] ; K[2]
XOR $Te4[2],$K[3],$K[3] ; K[3]
SPKERNEL
;;====================================================================
BNOP RA
MV $Te4[2],$K[2]
|| STW $K[0],*$KPA++[2]
|| STW $K[1],*$KPB++[2]
STW $K[2],*$KPA++[2]
|| STW $K[3],*$KPB++[2]
MVK 10,B0 ; rounds
STW B0,*++${KPB}[15]
MVK 0,RET
;;====================================================================
;;====================================================================
key192?:
.if .BIG_ENDIAN
MV A9,$K[0]
|| MV A8,$K[1]
|| MV B9,$K[2]
|| MV B8,$K[3]
MV B17,$Te4[2]
|| MV B16,$K[5]
.else
MV A8,$K[0]
|| MV A9,$K[1]
|| MV B8,$K[2]
|| MV B9,$K[3]
MV B16,$Te4[2]
|| MV B17,$K[5]
.endif
MVK 256,A0
|| MVK 6,B0
MV $TEA,$TEB
|| ADD $TEA,A0,A30 ; rcon
;;====================================================================
loop192?:
LDW *A30++[1],A31 ; rcon[i]
|| MV $Te4[2],$K[4]
|| EXTU $K[5],EXT1,24,$Te4[0]
LDBU *${TEB}[$Te4[0]],$Te4[0]
|| MV $K[5],A0
|| EXTU $K[5],EXT2,24,$Te4[1]
LDBU *${TEB}[$Te4[1]],$Te4[1]
|| EXTU A0,EXT3,24,A0
|| EXTU $K[5],EXT0,24,$Te4[3]
.if .BIG_ENDIAN
LDBU *${TEA}[A0],$Te4[3]
|| LDBU *${TEB}[$Te4[3]],A0
.else
LDBU *${TEA}[A0],A0
|| LDBU *${TEB}[$Te4[3]],$Te4[3]
.endif
STW $K[0],*$KPA++[2]
|| STW $K[1],*$KPB++[2]
STW $K[2],*$KPA++[2]
|| STW $K[3],*$KPB++[2]
STW $K[4],*$KPA++[2]
|| STW $K[5],*$KPB++[2]
XOR A31,$K[0],$K[0] ; ^=rcon[i]
.if .BIG_ENDIAN
PACK2 $Te4[0],$Te4[1],$Te4[1]
|| PACK2 $Te4[3],A0,$Te4[3]
PACKL4 $Te4[1],$Te4[3],$Te4[3]
.else
PACK2 $Te4[1],$Te4[0],$Te4[1]
|| PACK2 $Te4[3],A0,$Te4[3]
PACKL4 $Te4[3],$Te4[1],$Te4[3]
.endif
BDEC loop192?,B0
|| XOR $Te4[3],$K[0],$Te4[0] ; K[0]
XOR $Te4[0],$K[1],$K[1] ; K[1]
MV $Te4[0],$K[0]
|| XOR $K[1],$K[2],$Te4[2] ; K[2]
XOR $Te4[2],$K[3],$K[3] ; K[3]
MV $Te4[2],$K[2]
|| XOR $K[3],$K[4],$Te4[2] ; K[4]
XOR $Te4[2],$K[5],$K[5] ; K[5]
;;====================================================================
BNOP RA
STW $K[0],*$KPA++[2]
|| STW $K[1],*$KPB++[2]
STW $K[2],*$KPA++[2]
|| STW $K[3],*$KPB++[2]
MVK 12,B0 ; rounds
STW B0,*++${KPB}[7]
MVK 0,RET
;;====================================================================
;;====================================================================
key256?:
.if .BIG_ENDIAN
MV A9,$K[0]
|| MV A8,$K[1]
|| MV B9,$K[2]
|| MV B8,$K[3]
MV B17,$K[4]
|| MV B16,$K[5]
|| MV B19,$Te4[2]
|| MV B18,$K[7]
.else
MV A8,$K[0]
|| MV A9,$K[1]
|| MV B8,$K[2]
|| MV B9,$K[3]
MV B16,$K[4]
|| MV B17,$K[5]
|| MV B18,$Te4[2]
|| MV B19,$K[7]
.endif
MVK 256,A0
|| MVK 6,B0
MV $TEA,$TEB
|| ADD $TEA,A0,A30 ; rcon
;;====================================================================
loop256?:
LDW *A30++[1],A31 ; rcon[i]
|| MV $Te4[2],$K[6]
|| EXTU $K[7],EXT1,24,$Te4[0]
LDBU *${TEB}[$Te4[0]],$Te4[0]
|| MV $K[7],A0
|| EXTU $K[7],EXT2,24,$Te4[1]
LDBU *${TEB}[$Te4[1]],$Te4[1]
|| EXTU A0,EXT3,24,A0
|| EXTU $K[7],EXT0,24,$Te4[3]
.if .BIG_ENDIAN
LDBU *${TEA}[A0],$Te4[3]
|| LDBU *${TEB}[$Te4[3]],A0
.else
LDBU *${TEA}[A0],A0
|| LDBU *${TEB}[$Te4[3]],$Te4[3]
.endif
STW $K[0],*$KPA++[2]
|| STW $K[1],*$KPB++[2]
STW $K[2],*$KPA++[2]
|| STW $K[3],*$KPB++[2]
STW $K[4],*$KPA++[2]
|| STW $K[5],*$KPB++[2]
STW $K[6],*$KPA++[2]
|| STW $K[7],*$KPB++[2]
|| XOR A31,$K[0],$K[0] ; ^=rcon[i]
.if .BIG_ENDIAN
PACK2 $Te4[0],$Te4[1],$Te4[1]
|| PACK2 $Te4[3],A0,$Te4[3]
PACKL4 $Te4[1],$Te4[3],$Te4[3]
||[!B0] B done256?
.else
PACK2 $Te4[1],$Te4[0],$Te4[1]
|| PACK2 $Te4[3],A0,$Te4[3]
PACKL4 $Te4[3],$Te4[1],$Te4[3]
||[!B0] B done256?
.endif
XOR $Te4[3],$K[0],$Te4[0] ; K[0]
XOR $Te4[0],$K[1],$K[1] ; K[1]
MV $Te4[0],$K[0]
|| XOR $K[1],$K[2],$Te4[2] ; K[2]
XOR $Te4[2],$K[3],$K[3] ; K[3]
MV $Te4[2],$K[2]
|| [B0] EXTU $K[3],EXT0,24,$Te4[0]
|| [B0] SUB B0,1,B0
LDBU *${TEB}[$Te4[0]],$Te4[0]
|| MV $K[3],A0
|| EXTU $K[3],EXT1,24,$Te4[1]
LDBU *${TEB}[$Te4[1]],$Te4[1]
|| EXTU A0,EXT2,24,A0
|| EXTU $K[3],EXT3,24,$Te4[3]
.if .BIG_ENDIAN
LDBU *${TEA}[A0],$Te4[3]
|| LDBU *${TEB}[$Te4[3]],A0
NOP 3
PACK2 $Te4[0],$Te4[1],$Te4[1]
PACK2 $Te4[3],A0,$Te4[3]
|| B loop256?
PACKL4 $Te4[1],$Te4[3],$Te4[3]
.else
LDBU *${TEA}[A0],A0
|| LDBU *${TEB}[$Te4[3]],$Te4[3]
NOP 3
PACK2 $Te4[1],$Te4[0],$Te4[1]
PACK2 $Te4[3],A0,$Te4[3]
|| B loop256?
PACKL4 $Te4[3],$Te4[1],$Te4[3]
.endif
XOR $Te4[3],$K[4],$Te4[0] ; K[4]
XOR $Te4[0],$K[5],$K[5] ; K[5]
MV $Te4[0],$K[4]
|| XOR $K[5],$K[6],$Te4[2] ; K[6]
XOR $Te4[2],$K[7],$K[7] ; K[7]
;;====================================================================
done256?:
BNOP RA
STW $K[0],*$KPA++[2]
|| STW $K[1],*$KPB++[2]
STW $K[2],*$KPA++[2]
|| STW $K[3],*$KPB++[2]
MVK 14,B0 ; rounds
STW B0,*--${KPB}[1]
MVK 0,RET
.endasmfunc
.global _AES_set_decrypt_key
_AES_set_decrypt_key:
.asmfunc
B __set_encrypt_key ; guarantee local call
MV KEY,B30 ; B30 is not modified
MV RA, B31 ; B31 is not modified
ADDKPC ret?,RA,2
ret?: ; B0 holds rounds or zero
[!B0] BNOP B31 ; return if zero
[B0] SHL B0,4,A0 ; offset to last round key
[B0] SHRU B0,1,B1
[B0] SUB B1,1,B1
[B0] MVK 0x0000001B,B3 ; AES polynomial
[B0] MVKH 0x07000000,B3
SPLOOPD 9 ; flip round keys
|| MVC B1,ILC
|| MV B30,$KPA
|| ADD B30,A0,$KPB
|| MVK 16,A0 ; sizeof(round key)
;;====================================================================
LDW *${KPA}[0],A16
|| LDW *${KPB}[0],B16
LDW *${KPA}[1],A17
|| LDW *${KPB}[1],B17
LDW *${KPA}[2],A18
|| LDW *${KPB}[2],B18
LDW *${KPA}[3],A19
|| ADD $KPA,A0,$KPA
|| LDW *${KPB}[3],B19
|| SUB $KPB,A0,$KPB
NOP
STW B16,*${KPA}[-4]
|| STW A16,*${KPB}[4]
STW B17,*${KPA}[-3]
|| STW A17,*${KPB}[5]
STW B18,*${KPA}[-2]
|| STW A18,*${KPB}[6]
STW B19,*${KPA}[-1]
|| STW A19,*${KPB}[7]
SPKERNEL
;;====================================================================
SUB B0,1,B0 ; skip last round
|| ADD B30,A0,$KPA ; skip first round
|| ADD B30,A0,$KPB
|| MVC GFPGFR,B30 ; save GFPGFR
LDW *${KPA}[0],$K[0]
|| LDW *${KPB}[1],$K[1]
|| MVC B3,GFPGFR
LDW *${KPA}[2],$K[2]
|| LDW *${KPB}[3],$K[3]
MVK 0x00000909,A24
|| MVK 0x00000B0B,B24
MVKH 0x09090000,A24
|| MVKH 0x0B0B0000,B24
MVC B0,ILC
|| SUB B0,1,B0
GMPY4 $K[0],A24,$Kx9[0] ; ·0x09
|| GMPY4 $K[1],A24,$Kx9[1]
|| MVK 0x00000D0D,A25
|| MVK 0x00000E0E,B25
GMPY4 $K[2],A24,$Kx9[2]
|| GMPY4 $K[3],A24,$Kx9[3]
|| MVKH 0x0D0D0000,A25
|| MVKH 0x0E0E0000,B25
GMPY4 $K[0],B24,$KxB[0] ; ·0x0B
|| GMPY4 $K[1],B24,$KxB[1]
GMPY4 $K[2],B24,$KxB[2]
|| GMPY4 $K[3],B24,$KxB[3]
SPLOOP 11 ; InvMixColumns
;;====================================================================
GMPY4 $K[0],A25,$KxD[0] ; ·0x0D
|| GMPY4 $K[1],A25,$KxD[1]
|| SWAP2 $Kx9[0],$Kx9[0] ; rotate by 16
|| SWAP2 $Kx9[1],$Kx9[1]
|| MV $K[0],$s[0] ; this or DINT
|| MV $K[1],$s[1]
|| [B0] LDW *${KPA}[4],$K[0]
|| [B0] LDW *${KPB}[5],$K[1]
GMPY4 $K[2],A25,$KxD[2]
|| GMPY4 $K[3],A25,$KxD[3]
|| SWAP2 $Kx9[2],$Kx9[2]
|| SWAP2 $Kx9[3],$Kx9[3]
|| MV $K[2],$s[2]
|| MV $K[3],$s[3]
|| [B0] LDW *${KPA}[6],$K[2]
|| [B0] LDW *${KPB}[7],$K[3]
GMPY4 $s[0],B25,$KxE[0] ; ·0x0E
|| GMPY4 $s[1],B25,$KxE[1]
|| XOR $Kx9[0],$KxB[0],$KxB[0]
|| XOR $Kx9[1],$KxB[1],$KxB[1]
GMPY4 $s[2],B25,$KxE[2]
|| GMPY4 $s[3],B25,$KxE[3]
|| XOR $Kx9[2],$KxB[2],$KxB[2]
|| XOR $Kx9[3],$KxB[3],$KxB[3]
ROTL $KxB[0],TBL3,$KxB[0]
|| ROTL $KxB[1],TBL3,$KxB[1]
|| SWAP2 $KxD[0],$KxD[0] ; rotate by 16
|| SWAP2 $KxD[1],$KxD[1]
ROTL $KxB[2],TBL3,$KxB[2]
|| ROTL $KxB[3],TBL3,$KxB[3]
|| SWAP2 $KxD[2],$KxD[2]
|| SWAP2 $KxD[3],$KxD[3]
XOR $KxE[0],$KxD[0],$KxE[0]
|| XOR $KxE[1],$KxD[1],$KxE[1]
|| [B0] GMPY4 $K[0],A24,$Kx9[0] ; ·0x09
|| [B0] GMPY4 $K[1],A24,$Kx9[1]
|| ADDAW $KPA,4,$KPA
XOR $KxE[2],$KxD[2],$KxE[2]
|| XOR $KxE[3],$KxD[3],$KxE[3]
|| [B0] GMPY4 $K[2],A24,$Kx9[2]
|| [B0] GMPY4 $K[3],A24,$Kx9[3]
|| ADDAW $KPB,4,$KPB
XOR $KxB[0],$KxE[0],$KxE[0]
|| XOR $KxB[1],$KxE[1],$KxE[1]
|| [B0] GMPY4 $K[0],B24,$KxB[0] ; ·0x0B
|| [B0] GMPY4 $K[1],B24,$KxB[1]
XOR $KxB[2],$KxE[2],$KxE[2]
|| XOR $KxB[3],$KxE[3],$KxE[3]
|| [B0] GMPY4 $K[2],B24,$KxB[2]
|| [B0] GMPY4 $K[3],B24,$KxB[3]
|| STW $KxE[0],*${KPA}[-4]
|| STW $KxE[1],*${KPB}[-3]
STW $KxE[2],*${KPA}[-2]
|| STW $KxE[3],*${KPB}[-1]
|| [B0] SUB B0,1,B0
SPKERNEL
;;====================================================================
BNOP B31,3
MVC B30,GFPGFR ; restore GFPGFR(*)
MVK 0,RET
.endasmfunc
___
# (*) Even though ABI doesn't specify GFPGFR as non-volatile, there
# are code samples out there that *assume* its default value.
}
{
my ($inp,$out,$blocks,$key,$ivp)=("A4","B4","A6","B6","A8");
$code.=<<___;
.global _AES_ctr32_encrypt
_AES_ctr32_encrypt:
.asmfunc
LDNDW *${ivp}[0],A31:A30 ; load counter value
|| MV $blocks,A2 ; reassign $blocks
|| DMV RA,$key,B27:B26 ; reassign RA and $key
LDNDW *${ivp}[1],B31:B30
|| MVK 0,B2 ; don't let __encrypt load input
|| MVK 0,A1 ; and postpone writing output
.if .BIG_ENDIAN
NOP
.else
NOP 4
SWAP2 B31,B31 ; keep least significant 32 bits
SWAP4 B31,B31 ; in host byte order
.endif
ctr32_loop?:
[A2] BNOP __encrypt
|| [A1] XOR A29,A9,A9 ; input^Ek(counter)
|| [A1] XOR A28,A8,A8
|| [A2] LDNDW *INP++,A29:A28 ; load input
[!A2] BNOP B27 ; return
|| [A1] XOR B29,B9,B9
|| [A1] XOR B28,B8,B8
|| [A2] LDNDW *INP++,B29:B28
.if .BIG_ENDIAN
[A1] STNDW A9:A8,*OUT++ ; save output
|| [A2] DMV A31,A30,A9:A8 ; pass counter value to __encrypt
[A1] STNDW B9:B8,*OUT++
|| [A2] DMV B31,B30,B9:B8
|| [A2] ADD B30,1,B30 ; counter++
.else
[A1] STNDW A9:A8,*OUT++ ; save output
|| [A2] DMV A31,A30,A9:A8
|| [A2] SWAP2 B31,B0
|| [A2] ADD B31,1,B31 ; counter++
[A1] STNDW B9:B8,*OUT++
|| [A2] MV B30,B8
|| [A2] SWAP4 B0,B9
.endif
[A2] ADDKPC ctr32_loop?,RA ; return to ctr32_loop?
|| [A2] MV B26,KEY ; pass $key
|| [A2] SUB A2,1,A2 ; $blocks--
||[!A1] MVK 1,A1
NOP
NOP
.endasmfunc
___
}
# Tables are kept in endian-neutral manner
$code.=<<___;
.if __TI_EABI__
.sect ".text:aes_asm.const"
.else
.sect ".const:aes_asm"
.endif
.align 128
AES_Te:
.byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84
.byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d
.byte 0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd
.byte 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54
.byte 0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03
.byte 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d
.byte 0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62
.byte 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a
.byte 0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d
.byte 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87
.byte 0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb
.byte 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b
.byte 0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67
.byte 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea
.byte 0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7
.byte 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b
.byte 0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c
.byte 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a
.byte 0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41
.byte 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f
.byte 0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4
.byte 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08
.byte 0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73
.byte 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f
.byte 0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52
.byte 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e
.byte 0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1
.byte 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5
.byte 0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36
.byte 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d
.byte 0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69
.byte 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f
.byte 0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e
.byte 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e
.byte 0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2
.byte 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb
.byte 0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d
.byte 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce
.byte 0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e
.byte 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97
.byte 0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68
.byte 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c
.byte 0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f
.byte 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed
.byte 0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46
.byte 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b
.byte 0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4
.byte 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a
.byte 0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a
.byte 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16
.byte 0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7
.byte 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94
.byte 0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10
.byte 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81
.byte 0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44
.byte 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3
.byte 0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe
.byte 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a
.byte 0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc
.byte 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04
.byte 0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1
.byte 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63
.byte 0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a
.byte 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d
.byte 0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14
.byte 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f
.byte 0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2
.byte 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39
.byte 0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2
.byte 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47
.byte 0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7
.byte 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95
.byte 0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98
.byte 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f
.byte 0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e
.byte 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83
.byte 0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29
.byte 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c
.byte 0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2
.byte 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76
.byte 0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56
.byte 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e
.byte 0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a
.byte 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4
.byte 0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e
.byte 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6
.byte 0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4
.byte 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b
.byte 0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43
.byte 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7
.byte 0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64
.byte 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0
.byte 0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa
.byte 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25
.byte 0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e
.byte 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18
.byte 0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88
.byte 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72
.byte 0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1
.byte 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51
.byte 0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c
.byte 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21
.byte 0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc
.byte 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85
.byte 0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42
.byte 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa
.byte 0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05
.byte 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12
.byte 0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f
.byte 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0
.byte 0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58
.byte 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9
.byte 0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13
.byte 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33
.byte 0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70
.byte 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7
.byte 0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22
.byte 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20
.byte 0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff
.byte 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a
.byte 0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8
.byte 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17
.byte 0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31
.byte 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8
.byte 0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0
.byte 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11
.byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc
.byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a
AES_Te4:
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
rcon:
.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00
.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00
.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00
.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00
.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00
.align 128
AES_Td:
.byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53
.byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96
.byte 0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1
.byte 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93
.byte 0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6
.byte 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25
.byte 0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7
.byte 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f
.byte 0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67
.byte 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1
.byte 0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12
.byte 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6
.byte 0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95
.byte 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda
.byte 0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3
.byte 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44
.byte 0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78
.byte 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd
.byte 0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17
.byte 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4
.byte 0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82
.byte 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45
.byte 0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84
.byte 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94
.byte 0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19
.byte 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7
.byte 0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2
.byte 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a
.byte 0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03
.byte 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5
.byte 0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2
.byte 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c
.byte 0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92
.byte 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1
.byte 0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5
.byte 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a
.byte 0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0
.byte 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75
.byte 0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa
.byte 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51
.byte 0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d
.byte 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46
.byte 0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05
.byte 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff
.byte 0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97
.byte 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77
.byte 0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88
.byte 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb
.byte 0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9
.byte 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00
.byte 0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48
.byte 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e
.byte 0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56
.byte 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27
.byte 0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21
.byte 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a
.byte 0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f
.byte 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e
.byte 0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2
.byte 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16
.byte 0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5
.byte 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d
.byte 0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad
.byte 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8
.byte 0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c
.byte 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd
.byte 0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc
.byte 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34
.byte 0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc
.byte 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63
.byte 0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10
.byte 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20
.byte 0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8
.byte 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d
.byte 0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3
.byte 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0
.byte 0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99
.byte 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22
.byte 0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a
.byte 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef
.byte 0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1
.byte 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36
.byte 0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28
.byte 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4
.byte 0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d
.byte 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62
.byte 0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8
.byte 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5
.byte 0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c
.byte 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3
.byte 0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7
.byte 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b
.byte 0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4
.byte 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8
.byte 0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e
.byte 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6
.byte 0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce
.byte 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6
.byte 0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31
.byte 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0
.byte 0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6
.byte 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15
.byte 0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7
.byte 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f
.byte 0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d
.byte 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf
.byte 0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b
.byte 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f
.byte 0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d
.byte 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e
.byte 0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52
.byte 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13
.byte 0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a
.byte 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89
.byte 0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35
.byte 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c
.byte 0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f
.byte 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf
.byte 0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b
.byte 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86
.byte 0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e
.byte 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f
.byte 0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c
.byte 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41
.byte 0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde
.byte 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90
.byte 0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70
.byte 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42
AES_Td4:
.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
.cstring "AES for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
.align 4
___
print $code;
close STDOUT;