[Hardware] RC5 algorithm

Martin Klingensmith martin at nnytech.net
Mon Dec 26 19:42:00 EST 2005


John L. Bass wrote:

>Actually Martin, the pipelining is a piece of cake with the right tools,
>and it does work out to one key per clock. It takes a pretty fair sized
>FPGA, and there are fit problems because of that. "All you need to do,
>is just unroll the loop" (hehehe ... haven't we heard that before ... lol).
>
>Actually, I did just that about a year ago, and it took about a day,
>using a hacked version of TMCC.
>
>You will find Fpga C on sourceforge.net:
>
>	http://sourceforge.net/projects/fpgac
>  
>

Hi John
Thanks for the very detailed reply.
I unrolled the loops and enumerated the variables and came up with this 
code. The only problem I'm having is right here:
    L0 = 0x53030cc9;
    L0 = ROTL(L0 + 0xbf0a8b1d, 0xbf0a8b1d);

If I replace the L0 in the second line with 0x5303...., I get the wrong 
answer out (still normal C compiled with gcc).
Anyway here the code is:
///////////////////////////////////////////////////////
// Martin's unrolled RC5-32/12/9 (72 bit) code

#include <stdio.h>

#define Pw  0xb7e15163
#define Qw  0x9e3779b9
#define ROTL3(x) (((x) << 3) | ((x) >> 29))
#define ROTL(x,n) (((x) << (n)) | ((x) >> (32-(n))))
   
int main(){
    unsigned long S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
        S10, S11, S12, S13, S14, S15, S16, S17, S18, S19,
        S20, S21, S22, S23, S24, S25, L0, L1, L2,
        A0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A,
        B0,B1,B2,B3,B4,B5,B6,B7,B8,B9,B10,B;

    // key = c9 0c 03 53 c0 d4 e1 fe 85
    L0 = 0x53030cc9;
    printf("\nL0+0xbf.. = 0x%x",(0x53030cc9+0xbf0a8b1d)&0xffffffff);
    L1 = 0xfee1d4c0;
    L2 = 0x85;
    L0 = ROTL(L0 + 0xbf0a8b1d, 0xbf0a8b1d);
    S1 = ROTL3(0x5618cb1c + 0xbf0a8b1d + L0);
    L1 = ROTL(L1 + S1 + L0, S1 + L0);
    S2 = ROTL3(0xf45044d5 + L1 + S1);
    L2 = ROTL(L2 + S2 + L1, S2 + L1);
    S3 = ROTL3(0x9287be8e + S2 + L2);
    L0 = ROTL(L0 + S3 + L2, S3 + L2);
    S4 = ROTL3(0x30bf3847 + S3 + L0);
    L1 = ROTL(L1 + S4 + L0, S4 + L0);
    S5 = ROTL3(0xcef6b200 + S4 + L1);
    L2 = ROTL(L2 + S5 + L1, S5 + L1);
    S6 = ROTL3(0x6d2e2bb9 + S5 + L2);
    L0 = ROTL(L0 + L2 + S6, L2 + S6);
    S7 = ROTL3(0x0b65a572 + L0 + S6);
    L1 = ROTL(L1 + S7 + L0, S7 + L0);
    S8 = ROTL3(0xa99d1f2b + S7 + L1);
    L2 = ROTL(L2 + S8 + L1, S8 + L1);
    S9 = ROTL3(0x47d498e4 + S8 + L2);
    L0 = ROTL(L0 + S9 + L2, S9 + L2);
    S10 = ROTL3(0xe60c129d + L0 + S9);
    L1  = ROTL(L1 + S10 + L0, S10 + L0);
    S11 = ROTL3(0x84438c56 + L1+S10);
    L2  = ROTL(L2 + S11+L1, S11+L1);
    S12 = ROTL3(0x227b060f + L2+S11);
    L0  = ROTL(L0 + S12+L2, S12+L2);
    S13 = ROTL3(0xc0b27fc8 + L0+S12);
    L1  = ROTL(L1 + S13+L0, S13+L0);
    S14 = ROTL3(0x5ee9f981 + L1+S13);
    L2  = ROTL(L2 + S14+L1, S14+L1);
    S15 = ROTL3(0xfd21733a + L2+S14);
    L0  = ROTL(L0 + S15+L2, S15+L2);
    S16 = ROTL3(0x9b58ecf3 + S15+L0);
    L1  = ROTL(L1 + S16+L0, S16+L0);
    S17 = ROTL3(0x399066ac + L1+S16);
    L2  = ROTL(L2 + S17+L1, S17+L1);
    S18 = ROTL3(0xd7c7e065 + L2+S17);
    L0  = ROTL(L0 + S18+L2, S18+L2);
    S19 = ROTL3(0x75ff5a1e + S18+L0);
    L1  = ROTL(L1 + S19+L0, S19+L0);
    S20 = ROTL3(0x1436d3d7 + S19+L1);
    L2  = ROTL(L2 + S20+L1, S20+L1);
    S21 = ROTL3(0xb26e4d90 + L2+S20);
    L0  = ROTL(L0 + S21+L2, S21+L2);
    S22 = ROTL3(0x50a5c749 + L0+S21);
    L1  = ROTL(L1 + S22+L0, S22+L0);
    S23 = ROTL3(0xeedd4102 + L1+S22);
    L2  = ROTL(L2 + L1+S23, S23+L1);
    S24 = ROTL3(0x8d14babb + L2+S23);
    L0  = ROTL(L0 + S24+L2, S24+L2);
    S25 = ROTL3(0x2b4c3474 + L0+S24);
    L1  = ROTL(L1 + S25+L0, S25+L0);
    S0  = ROTL3(0xbf0a8b1d + L1+S25);
   
    L2  = ROTL(L2 + S0+L1, S0+L1);
    S1  = ROTL3(S1 + L2+S0);
    L0  = ROTL(L0 + S1+L2, S1+L2);
    S2  = ROTL3(S2 + L0+S1);
    L1  = ROTL(L1 + S2+L0, S2+L0);
    S3  = ROTL3(S3 + L1+S2);
    L2  = ROTL(L2 + S3+L1, S3+L1);
    S4  = ROTL3(S4+ L2+S3);
    L0  = ROTL(L0 + S4+L2, L2+S4);
    S5  = ROTL3(S5 + L0+S4);
    L1  = ROTL(L1 + S5+L0, S5+L0);
    S6  = ROTL3(S6 + L1+S5);
    L2  = ROTL(L2 + S6+L1, S6+L1);
    S7  = ROTL3(S7 + L2+S6);
    L0  = ROTL(L0 + L2+S7, L2+S7);
    S8  = ROTL3(S8 + L0+S7);
    L1  = ROTL(L1 + S8+L0, S8+L0);
    S9  = ROTL3(S9 + L1+S8);
    L2  = ROTL(L2 + S9+L1, S9+L1);
    S10 = ROTL3(S10 + L2+S9);
    L0  = ROTL(L0 + S10+L2, S10+L2);
    S11 = ROTL3(S11 + L0+S10);
    L1  = ROTL(L1 + S11+L0, S11+L0);
    S12 = ROTL3(S12 + L1+S11);
    L2  = ROTL(L2 + S12+L1, S12+L1);
    S13 = ROTL3(S13 + L2+S12);
    L0  = ROTL(L0 + S13+L2, S13+L2);
    S14 = ROTL3(S14 + L0+S13);
    L1  = ROTL(L1 + S14+L0, S14+L0);
    S15 = ROTL3(S15 + L1+S14);
    L2  = ROTL(L2 + S15+L1, S15+L1);
    S16 = ROTL3(S16 + L2+S15);
    L0  = ROTL(L0 + S16+L2, S16+L2);
    S17 = ROTL3(S17 + L0+S16);
    L1  = ROTL(L1 + S17+L0, S17+L0);
    S18 = ROTL3(S18 + L1+S17);
    L2  = ROTL(L2 + S18+L1, S18+L1);
    S19 = ROTL3(S19 + L2+S18);
    L0  = ROTL(L0 + S19+L2, S19+L2);
    S20 = ROTL3(S20 + L0+S19);
    L1  = ROTL(L1 + S20+L0, S20+L0);
    S21 = ROTL3(S21 + L1+S20);
    L2  = ROTL(L2 + S21+L1, S21+L1);
    S22 = ROTL3(S22 + L2+S21);
    L0  = ROTL(L0 + S22+L2, S22+L2);
    S23 = ROTL3(S23 + L0+S22);
    L1  = ROTL(L1 + S23+L0, S23+L0);
    S24 = ROTL3(S24 + L1+S23);
    L2  = ROTL(L2 + S24+L1, S24+L1);
    S25 = ROTL3(S25 + L2+S24);
    L0  = ROTL(L0 + S25+L2, L2+S25);
    S0  = ROTL3(S0 + L0+S25);
    L1  = ROTL(L1 + S0+L0, S0+L0);
    S1  = ROTL3(S1 + L1+S0);
    L2  = ROTL(L2 + L1+S1, L1+S1);
    S2  = ROTL3(S2 + L2+S1);
    L0  = ROTL(L0 + S2+L2, S2+L2);
    S3  = ROTL3(S3 + L0+S2);
    L1  = ROTL(L1 + L0+S3, L0+S3);
    S4  = ROTL3(S4 + L1+S3);
    L2  = ROTL(L2 + S4+L1, S4+L1);
    S5  = ROTL3(S5 + L2+S4);
    L0  = ROTL(L0 + S5+L2, S5+L2);
    S6  = ROTL3(S6 + L0+S5);
    L1  = ROTL(L1 + S6+L0, S6+L0);
    S7  = ROTL3(S7 + L1+S6);
    L2  = ROTL(L2 + S7+L1, S7+L1);
    S8  = ROTL3(S8 + S7+L2);
    L0  = ROTL(L0 + S8+L2, S8+L2);
    S9  = ROTL3(S9 + L0+S8);
    L1  = ROTL(L1 + S9+L0, S9+L0);
    S10 = ROTL3(S10 + L1+S9);
    L2  = ROTL(L2 + S10+L1, S10+L1);
    S11 = ROTL3(S11 + L2+S10);
    L0  = ROTL(L0 + S11+L2, S11+L2);
    S12 = ROTL3(S12 + L0+S11);
    L1  = ROTL(L1 + S12+L0, S12+L0);
    S13 = ROTL3(S13 + L1+S12);
    L2  = ROTL(L2 + L1+S13, L1+S13);
    S14 = ROTL3(S14 + L2+S13);
    L0  = ROTL(L0 + S14+L2, S14+L2);
    S15 = ROTL3(S15 + S14+L0);
    L1  = ROTL(L1 + S15+L0, S15+L0);
    S16 = ROTL3(S16 + L1 + S15);
    L2  = ROTL(L2 + L1+S16, L1+S16);
    S17 = ROTL3(S17 + S16+L2);
    L0  = ROTL(L0 + S17+L2, S17+L2);
    S18 = ROTL3(S18 + L0+S17);
    L1  = ROTL(L1 + S18+L0, S18+L0);
    S19 = ROTL3(S19 + S18 + L1);
    L2  = ROTL(L2 + S19+L1, S19+L1);
    S20 = ROTL3(S20 + S19+L2);
    L0  = ROTL(L0 + L2+S20, L2+S20);
    S21 = ROTL3(S21 + S20+L0);
    L1  = ROTL(L1 + L0+S21, L0+S21);
    S22 = ROTL3(S22 + S21+L1);
    L2  = ROTL(L2 + L1+S22, L1+S22);
    S23 = ROTL3(S23 + S22+L2);
    L0  = ROTL(L0 + L2+S23, L2+S23);
    S24 = ROTL3(S24 + S23+L0);
    L1  = ROTL(L1 + L0+S24, L0+S24);
    S25 = A = ROTL3(S25 + S24+L1);
    L2  = B = ROTL(L2 + L1+S25, L1+S25);

    // p = 54 68 65 20 75 6e 6b 6e = "The Unkn"
    //A = 0x20656854;
    //B = 0x6e6b6e75;

    // iv = 07 ce 59 1f 86 14 9a 41
    //ivA = 0x1f59ce07;
    //ivB = 0x419a1486;   
   
    // RC5-CBC requires the IV to be XORed with the input block
    //    A = (0x20656854 ^ 0x1f59ce07);
    //    B = (0x6e6b6e75 ^ 0x419a1486);
   
    printf("\nA: %08x\nB: %08x\n",(unsigned int)A,(unsigned int)B);

    A0 = ROTL((0x3f3ca653+S0) ^ (0x2ff17af3+S1), (0x2ff17af3+S1)) + S2;
    B0 = ROTL((0x2ff17af3+S1) ^ A0, A0) + S3;
    A1 = ROTL(A0 ^ B0, B0) + S4;
    B1 = ROTL(B0 ^ A1, A1) + S5;
    A2 = ROTL(A1 ^ B1, B1) + S6;
    B2 = ROTL(B1 ^ A2, A2) + S7;
    A3 = ROTL(A2 ^ B2, B2) + S8;
    B3 = ROTL(B2 ^ A3, A3) + S9;
    A4 = ROTL(A3 ^ B3, B3) + S10;
    B4 = ROTL(B3 ^ A4, A4) + S11;
    A5 = ROTL(A4 ^ B4, B4) + S12;
    B5 = ROTL(B4 ^ A5, A5) + S13;
    A6 = ROTL(A5 ^ B5, B5) + S14;
    B6 = ROTL(B5 ^ A6, A6) + S15;
    A7 = ROTL(A6 ^ B6, B6) + S16;
    B7 = ROTL(B6 ^ A7, A7) + S17;
    A8 = ROTL(A7 ^ B7, B7) + S18;
    B8 = ROTL(B7 ^ A8, A8) + S19;
    A9 = ROTL(A8 ^ B8, B8) + S20;
    B9 = ROTL(B8 ^ A9, A9) + S21;
    A10 = ROTL(A9 ^ B9, B9) + S22;
    B10 = ROTL(B9 ^ A10, A10) + S23;
    A = ROTL(A10 ^ B10, B10) + S24;
    B = ROTL(B10 ^ A, A) + S25;

    printf("\nA: %x\nB: %x\n",(unsigned int)A,(unsigned int)B);

    system("PAUSE");
    return (0);
}



-- 
---
Martin Klingensmith
nnytech.net
infoarchive.net



More information about the Hardware mailing list