timewave0 (OP)
Newbie
Offline
Activity: 37
Merit: 0
|
|
January 03, 2014, 10:05:08 AM |
|
Anyone interested in retromining? Several years ago I implemented SHA-256 on my TI-83. I also implemented AES-128-CFB. I gave up on RSA and DSA because of memory constraints, so I never did create a complete strong-crypto suite for TI-83/z80. With all the interest in bitcoin these days, maybe someone will find my SHA-256 code interesting (or I'll revisit it---although I'd personally find asymmetric crypto or a stronger symmetric block mode more interesting). If the ~800 bytes/sec from my comments is reliable, the old z80 should do ~7 h/sec, right? Calculation code: ; sha256up.z80 by timewave0 ; 1twzU46whuMER6hhBPCGmdaLw8atyv9c4 ; ; this file constains pure, portable z80 code, ; no TI-8x ROM calls
; this subroutine updates the sha256 hash ; in H0 to H7 given a complete block copied ; to the beginning of WBUF sha256update:
; second part of message schedule (step 1) ld ix,WBUF+64-4 ld c,16-1 step1: inc ix inc ix ; ix tracks W(t) inc ix inc ix inc c ; counter++
; lowercase sigma_1 ld h,(ix-8+2) ; rotate 16 bits ld l,(ix-8+3) ; as two bytes ld d,(ix-8+0) ; 0123 -> dehl ld e,(ix-8+1) ld b,1 call rotrb ; one bit right ld a,h ld (sigma_temp+0+0),a ; all bytes that ld a,l ; will get xored ld (sigma_temp+3+0),a ; later should ld a,d ; be consecutive ld (sigma_temp+6+0),a ld a,e ld (sigma_temp+9+0),a
ld b,2 call rotrb ; two bits right ld a,h ld (sigma_temp+0+1),a ; all bytes that ld a,l ; will get xored ld (sigma_temp+3+1),a ; later should ld a,d ; be consecutive ld (sigma_temp+6+1),a ld a,e ld (sigma_temp+9+1),a
ld h,0 ; shift 8 bits ld l,(ix-8+0) ; as a byte ld d,(ix-8+1) ld e,(ix-8+2) ld b,2 call sharb ; two more bits ld a,h ld (sigma_temp+0+2),a ; all bytes that ld a,l ; will get xored ld (sigma_temp+3+2),a ; later should ld a,d ; be consecutive ld (sigma_temp+6+2),a ld a,e ld (sigma_temp+9+2),a
ld hl,sigma_temp ld a,(hl) inc hl xor (hl) inc hl xor (hl) ld (ix+0),a
inc hl ld a,(hl) inc hl xor (hl) inc hl xor (hl) ld (ix+1),a
inc hl ld a,(hl) inc hl xor (hl) inc hl xor (hl) ld (ix+2),a
inc hl ld a,(hl) inc hl xor (hl) inc hl xor (hl) ld (ix+3),a ; done with lowercase sigma_1
; lowercase sigma_0 ld h,(ix-60+3) ; rotate 8 bits ld l,(ix-60+0) ; as one byte ld d,(ix-60+1) ld e,(ix-60+2) ld b,1 call slcb ; one bit left ld a,h ld (sigma_temp+0+0),a ; all bytes that ld a,l ; will get xored ld (sigma_temp+3+0),a ; later should ld a,d ; be consecutive ld (sigma_temp+6+0),a ld a,e ld (sigma_temp+9+0),a
ld a,e ld e,d ld d,l ld l,h ld h,a ; hlde -> ehld ld b,3 call rotrb ; three more bits ld a,h ld (sigma_temp+0+1),a ; all bytes that ld a,l ; will get xored ld (sigma_temp+3+1),a ; later should ld a,d ; be consecutive ld (sigma_temp+6+1),a ld a,e ld (sigma_temp+9+1),a
ld h,(ix-60+0) ld l,(ix-60+1) ld d,(ix-60+2) ld e,(ix-60+3) ld b,3 call sharb ; three more bits ld a,h ld (sigma_temp+0+2),a ; all bytes that ld a,l ; will get xored ld (sigma_temp+3+2),a ; later should ld a,d ; be consecutive ld (sigma_temp+6+2),a ld a,e ld (sigma_temp+9+2),a
ld hl,sigma_temp ld a,(hl) inc hl xor (hl) inc hl xor (hl) ld d,a
inc hl ld a,(hl) inc hl xor (hl) inc hl xor (hl) ld e,a
inc hl ld a,(hl) inc hl xor (hl) inc hl xor (hl) ld b,a
inc hl ld a,(hl) inc hl xor (hl) inc hl xor (hl)
add a,(ix+3) ld (ix+3),a ld a,(ix+2) adc a,b ld (ix+2),a ld a,(ix+1) adc a,e ld (ix+1),a ld a,(ix+0) adc a,d ld (ix+0),a
push ix
push ix pop hl inc hl inc hl inc hl ld de,3-28 add ix,de call add32 ; += W_t-7
inc hl inc hl inc hl ld de,28-64 add ix,de call add32 ; += W_t-16
pop ix
ld a,63 cp c ; repeat? jp nc,step1 ; done with message schedule
; step 2 is a simple copy, made even easier by ; keeping [A-H] and H[0-7] together in memory ld hl,H0 ld de,ABUF ld bc,32 ldir ; wow, that was easy
; step 3 is the hardest part of the algorithm xor a ; start with t = 0 ld (step3_t),a ; counter is step3_t step_3:
; uppercase sigma_1 of e ld ix,EBUF
ld h,(ix+3) ld l,(ix+0) ld d,(ix+1) ld e,(ix+2) ld b,2 call slcb ; net 6 bits right ld a,h ld (sigma_temp+0+0),a ; all bytes that ld a,l ; will get xored ld (sigma_temp+3+0),a ; later should ld a,d ; be consecutive ld (sigma_temp+6+0),a ld a,e ld (sigma_temp+9+0),a
ld a,e ld e,d ld d,l ld l,h ld h,a ld b,3 call slcb ; 6+8-3 bits right ld a,h ld (sigma_temp+0+1),a ; all bytes that ld a,l ; will get xored ld (sigma_temp+3+1),a ; later should ld a,d ; be consecutive ld (sigma_temp+6+1),a ld a,e ld (sigma_temp+9+1),a
ex de,hl ld b,2 call slcb ; 6+8-3+16-2 ld a,h ld (sigma_temp+0+2),a ; all bytes that ld a,l ; will get xored ld (sigma_temp+3+2),a ; later should ld a,d ; be consecutive ld (sigma_temp+6+2),a ld a,e ld (sigma_temp+9+2),a
ld hl,sigma_temp ld a,(hl) inc hl xor (hl) inc hl xor (hl) ld (TEMP1+0),a
inc hl ld a,(hl) inc hl xor (hl) inc hl xor (hl) ld (TEMP1+1),a
inc hl ld a,(hl) inc hl xor (hl) inc hl xor (hl) ld (TEMP1+2),a
inc hl ld a,(hl) inc hl xor (hl) inc hl xor (hl) ld (TEMP1+3),a ; done with uppercase sigma_1 of e
ld ix,HBUF+3 ld hl,TEMP1+3 call add32 ; T_1 += HBUF
ld ix,WBUF+3 ld a,(step3_t) ld b,0 sla a rl b ; *= 2 sla a rl b ; *= 2 ld c,a add ix,bc ld hl,TEMP1+3 call add32 ; T_1 += W_t
ld ix,Karray+3 add ix,bc ld hl,TEMP1+3 call add32 ; T_1 += K_t
; use g xor (e and (f xor g)) for Ch ld ix,EBUF
ld a,(ix+8+0) ; GBUF xor (ix+4+0) ; xor FBUF and (ix+0+0) ; and EBUF xor (ix+8+0) ; xor GBUF ld d,a ld a,(ix+8+1) ; GBUF xor (ix+4+1) ; xor FBUF and (ix+0+1) ; and EBUF xor (ix+8+1) ; xor GBUF ld e,a ld a,(ix+8+2) ; GBUF xor (ix+4+2) ; xor FBUF and (ix+0+2) ; and EBUF xor (ix+8+2) ; xor GBUF ld b,a ld a,(ix+8+3) ; GBUF xor (ix+4+3) ; xor FBUF and (ix+0+3) ; and EBUF xor (ix+8+3) ; xor GBUF
add a,(ix-20+3) ld (ix-20+3),a ld a,(ix-20+2) adc a,b ld (ix-20+2),a ld a,(ix-20+1) adc a,e ld (ix-20+1),a ld a,(ix-20+0) adc a,d ld (ix-20+0),a ; TEMP1 += Ch
; uppercase sigma_0 of a ld ix,ABUF
ld h,(ix+0) ld l,(ix+1) ld d,(ix+2) ; 0123 -> hlde ld e,(ix+3) ld b,2 call rotrb ; two bits right ld a,h ld (sigma_temp+0+0),a ; all bytes that ld a,l ; will get xored ld (sigma_temp+3+0),a ; later should ld a,d ; be consecutive ld (sigma_temp+6+0),a ld a,e ld (sigma_temp+9+0),a
ld a,e ld e,d ld d,l ld l,h ld h,a ld b,3 call rotrb ; 2+8+3 ld a,h ld (sigma_temp+0+1),a ; all bytes that ld a,l ; will get xored ld (sigma_temp+3+1),a ; later should ld a,d ; be consecutive ld (sigma_temp+6+1),a ld a,e ld (sigma_temp+9+1),a
ld a,e ld e,d ld d,l ld l,h ld h,a ld b,1 call rotrb ; 2+8+3+8+1 ld a,h ld (sigma_temp+0+2),a ; all bytes that ld a,l ; will get xored ld (sigma_temp+3+2),a ; later should ld a,d ; be consecutive ld (sigma_temp+6+2),a ld a,e ld (sigma_temp+9+2),a
ld hl,sigma_temp ld a,(hl) inc hl xor (hl) inc hl xor (hl) ld (TEMP2+0),a
inc hl ld a,(hl) inc hl xor (hl) inc hl xor (hl) ld (TEMP2+1),a
inc hl ld a,(hl) inc hl xor (hl) inc hl xor (hl) ld (TEMP2+2),a
inc hl ld a,(hl) inc hl xor (hl) inc hl xor (hl) ld (TEMP2+3),a ; done with uppercase sigma_0 of a
; use (a and b) or (c and (a or b)) for Maj ; ix still points to ABUF
ld a,(ix+0+0) ; ABUF or (ix+4+0) ; or BBUF and (ix+8+0) ; and CBUF ld d,a ld a,(ix+0+0) ; ABUF and (ix+4+0) ; and BBUF or d ld d,a ld a,(ix+0+1) ; ABUF or (ix+4+1) ; or BBUF and (ix+8+1) ; and CBUF ld e,a ld a,(ix+0+1) ; ABUF and (ix+4+1) ; and BBUF or e ld e,a ld a,(ix+0+2) ; ABUF or (ix+4+2) ; or BBUF and (ix+8+2) ; and CBUF ld b,a ld a,(ix+0+2) ; ABUF and (ix+4+2) ; and BBUF or b ld b,a ld a,(ix+0+3) ; ABUF or (ix+4+3) ; or BBUF and (ix+8+3) ; and CBUF ld c,a ld a,(ix+0+3) ; ABUF and (ix+4+3) ; and BBUF or c
add a,(ix+32+3) ld (ix+32+3),a ld a,(ix+32+2) adc a,b ld (ix+32+2),a ld a,(ix+32+1) adc a,e ld (ix+32+1),a ld a,(ix+32+0) adc a,d ld (ix+32+0),a ; TEMP2 += Maj
ld de,HBUF+3 ; H=G, G=F, F=E, ld hl,GBUF+3 ; E=D, D=C, C=B, ld bc,32 ; B=A, A=T1 lddr
ld hl,EBUF+3 ld ix,TEMP1+3 call add32 ; E += T1
ld hl,ABUF+3 ld ix,TEMP2+3 call add32 ; A += T2
ld a,(step3_t) inc a ld (step3_t),a cp 64 ; loop 64 times jp c,step_3
; part 4 is all 32-bit addition ld hl,H7+3 ld ix,HBUF+3 call add32
ld ix,GBUF+3 dec hl ; remember add32 call add32 ; does hl -= 3
ld ix,FBUF+3 dec hl call add32
ld ix,EBUF+3 dec hl call add32
ld ix,DBUF+3 dec hl call add32
ld ix,CBUF+3 dec hl call add32
ld ix,BBUF+3 dec hl call add32
ld ix,ABUF+3 dec hl call add32 ; end of not-so-difficult part 4
ret ; end of sha256update subroutine
; subroutine to perform a 32-bit circular ; right shift of hlde iterated b times rotrb: srl h rr l rr d rr e jr nc,ncrce set 7,h ncrce: djnz rotrb ret ; the result is in hlde
; subroutine to perform a 32-bit ; right shift of hlde iterated b times sharb: srl h rr l rr d rr e ncrh: djnz sharb ret ; the result is in hlde
; subroutine to perform a 32-bit circular ; left shift of hlde iterated b times slcb: sla e rl d rl l rl h jr nc,nclh inc e nclh: djnz slcb ret ; the result is in hlde
; subroutine to perform 32-bit addition ; adds (ix) to (hl) ; both should initially point to the LSB add32: ld a,(hl) add a,(ix+0) ; LSB ld (hl),a dec hl ld a,(hl) adc a,(ix-1) ; add with carry ld (hl),a dec hl ld a,(hl) adc a,(ix-2) ld (hl),a dec hl ld a,(hl) adc a,(ix-3) ; MSB ld (hl),a ret ; hl changes, and a is clobbered
Str1: .db $04,$AA,$00
; initial values H0init: .db $6a, $09, $e6, $67 H1init: .db $bb, $67, $ae, $85 H2init: .db $3c, $6e, $f3, $72 H3init: .db $a5, $4f, $f5, $3a H4init: .db $51, $0e, $52, $7f H5init: .db $9b, $05, $68, $8c H6init: .db $1f, $83, $d9, $ab H7init: .db $5b, $e0, $cd, $19
; constants Karray: .db $42,$8a,$2f,$98,$71,$37,$44,$91, .db $b5,$c0,$fb,$cf,$e9,$b5,$db,$a5, .db $39,$56,$c2,$5b,$59,$f1,$11,$f1, .db $92,$3f,$82,$a4,$ab,$1c,$5e,$d5, .db $d8,$07,$aa,$98,$12,$83,$5b,$01, .db $24,$31,$85,$be,$55,$0c,$7d,$c3, .db $72,$be,$5d,$74,$80,$de,$b1,$fe, .db $9b,$dc,$06,$a7,$c1,$9b,$f1,$74, .db $e4,$9b,$69,$c1,$ef,$be,$47,$86, .db $0f,$c1,$9d,$c6,$24,$0c,$a1,$cc, .db $2d,$e9,$2c,$6f,$4a,$74,$84,$aa, .db $5c,$b0,$a9,$dc,$76,$f9,$88,$da, .db $98,$3e,$51,$52,$a8,$31,$c6,$6d, .db $b0,$03,$27,$c8,$bf,$59,$7f,$c7, .db $c6,$e0,$0b,$f3,$d5,$a7,$91,$47, .db $06,$ca,$63,$51,$14,$29,$29,$67, .db $27,$b7,$0a,$85,$2e,$1b,$21,$38, .db $4d,$2c,$6d,$fc,$53,$38,$0d,$13, .db $65,$0a,$73,$54,$76,$6a,$0a,$bb, .db $81,$c2,$c9,$2e,$92,$72,$2c,$85, .db $a2,$bf,$e8,$a1,$a8,$1a,$66,$4b, .db $c2,$4b,$8b,$70,$c7,$6c,$51,$a3, .db $d1,$92,$e8,$19,$d6,$99,$06,$24, .db $f4,$0e,$35,$85,$10,$6a,$a0,$70, .db $19,$a4,$c1,$16,$1e,$37,$6c,$08, .db $27,$48,$77,$4c,$34,$b0,$bc,$b5, .db $39,$1c,$0c,$b3,$4e,$d8,$aa,$4a, .db $5b,$9c,$ca,$4f,$68,$2e,$6f,$f3, .db $74,$8f,$82,$ee,$78,$a5,$63,$6f, .db $84,$c8,$78,$14,$8c,$c7,$02,$08, .db $90,$be,$ff,$fa,$a4,$50,$6c,$eb, .db $be,$f9,$a3,$f7,$c6,$71,$78,$f2
Interface code: ; sha256.z80 by timewave0 ; 1twzU46whuMER6hhBPCGmdaLw8atyv9c4 ; ; see FIPS PUB 180-2 ; ; ~800 bytes/sec for very large ; programs, as measured on Vti ; ; verified for random byte patterns of lengths: ; 0-10, 50-70, 255-257, 16383-16385, 19000
.LIST _chkfindsym .equ $442A _findsym .equ $442E _zeroop1 .equ $428E _errundefined .equ $467B _errsyntax .equ $466C _createstrng .equ $4472 _delvar .equ $44AA
OP1 .equ $8039
progobj .equ $05 strngobj .equ $04
block .equ $8265 ; magic number extra_bytes .equ block+2 H0 .equ extra_bytes+1 H1 .equ H0+4 H2 .equ H1+4 H3 .equ H2+4 H4 .equ H3+4 H5 .equ H4+4 H6 .equ H5+4 H7 .equ H6+4 ; start of variables that can't be moved in memory TEMP1 .equ H7+4 ABUF .equ TEMP1+4 BBUF .equ ABUF+4 CBUF .equ BBUF+4 DBUF .equ CBUF+4 EBUF .equ DBUF+4 FBUF .equ EBUF+4 GBUF .equ FBUF+4 HBUF .equ GBUF+4 TEMP2 .equ HBUF+4 ; end of variables that can't be moved in memory WBUF .equ TEMP2+4 sigma_temp .equ WBUF+(4*64) dataptr .equ sigma_temp+(3*4) step3_t .equ dataptr+2 size .equ step3_t+1
.org $9327 ; magic number
call clearvars ; clear variables
; based on code from squish by Pat Milheron call _zeroop1 ld hl,Str1 ld de,OP1 ld bc,3 ldir call _findsym ; lookup Str1 jp c,_errundefined
and $1F cp strngobj ; is it a sring? jp nz,_errsyntax
ld hl,op1 ld (hl),progobj inc hl ld a,(de) ; size of name ld c,a ld b,0 inc de inc de ; (de) is name ptr ex de,hl ldir ; name to op1
call _chkfindsym ; size ptr -> de jp c,_errundefined ; end of squish-based code
ex de,hl ld e,(hl) ; LSB -> e inc hl ld d,(hl) ; MSB -> d inc hl ld (size),de ld (dataptr),hl
ld b,6 xor a div64: srl d rr e rr a ; save remainder djnz div64 srl a srl a
ld (block),de ; # of _whole_ blocks ld (extra_bytes),a
ld hl,H0init ld de,H0 ld bc,64 ldir
do_hash: ; a label or advice? ld hl,(block) xor a cp h jr nz,no_check_l cp l jr z,do_padding no_check_l:
dec hl ld (block),hl ; block-- ld hl,(dataptr) ld de,WBUF ld bc,64 ldir ; copy block ld (dataptr),hl ; dataptr += 64
call sha256update jr do_hash
do_padding: ld a,(extra_bytes) ld b,0 ld c,a ld hl,(dataptr) ld de,WBUF cp 0 jr z,no_cpy ldir no_cpy: ex de,hl ld (hl),$80 inc hl cp 63 jr z,need_another_block
load_0s: neg add a,63 ; 64-1-extra_bytes ld b,a zero_fill: ld (hl),0 inc hl djnz zero_fill
cp 8 ; room for length? jr c,need_another_block ; since the message length can't possibly be more than ; 16 bits, I'm safe reusing the 32-bit circular shift ; from the hash code to multiply by 8 ld de,(size) ld hl,$0000 ; ensure 0s shift in ld b,3 call slcb ld a,e ld (WBUF+63),a ld a,d ld (WBUF+62),a ld a,l ld (WBUF+61),a
call sha256update jr done
need_another_block: call sha256update xor a ld hl,WBUF jr load_0s ; do another block
; call this guy when we're done hashing done: call _zeroop1 ld hl,Str1 ld de,OP1 ld bc,3 ldir call _chkfindsym call _delvar ld hl,2*256/8 call _createstrng ; recreate Str1 inc de ld b,256/8 ld hl,H0-1 store: inc hl ld a,(hl) push af and $F0 srl a srl a srl a srl a call store_hex ; high nibble
pop af and $0F call store_hex ; low nibble
djnz store
call clearvars
ret ; end of done subroutine
.include "sha256up.z80" ; the fun stuff
; subroutine to clear all variables clearvars: ld hl,block xor a ld (hl),a ld de,block+1 ld bc,size-block ; last var - start ldir ret ; end of clearvars subroutine
; subroutine to store a character of the hash store_hex: inc de cp $A jr nc,letter add a,'0' ld (de),a ret letter: add a,'A'-$A ld (de),a ret ; end of hex character store subroutine
.end
|