Bitcoin Forum
May 08, 2024, 08:10:03 AM *
News: Latest Bitcoin Core release: 27.0 [Torrent]
 
   Home   Help Search Login Register More  
Pages: [1]
  Print  
Author Topic: How fast/simple/short a CPU bitcoin mining core script can be?  (Read 2574 times)
jenga (OP)
Newbie
*
Offline Offline

Activity: 26
Merit: 0


View Profile
September 02, 2013, 05:35:01 PM
 #1

Randomly building a CPU miner in pure C.
I just wanted to make it as compact as humanly possible, and maybe fast... maybe...

It uses a bunch of optimizations like midstate precalc and early 2nd round loop termination.

Basically it is a heavily simplified version of this reference algorithm:
http://bradconte.com/sha256_c

Compiling it with
Code:
-g -O3 -march=native
in Windows and a Core 2 Duo 2.33 GHz brings 850 Kh/s (single thread).
I wanted to make use of some sort of SSE/SIMD though, but myself I don't think I'd do it right (any help is appreciated).

What do you peolpe think?  Tongue

Windows executable:
https://mediafire.com/?xb9en1ab3t8e3c2

Source code:

sha256_cpu.c
Code:
#include "sha256_cpu.h"

int main() {

    // Big-endian 255123 block string
    uchar text[]="02000000"
    "61b9273640571357bdc428788b36ae9827349e9d40627d2d2d00000000000000"
    "b1eb3bce1dde137625382e9445e707e6ec3f9b46948d2a7d8d88da42a877104d"
    "5f1c2152"
    "57524119"
    "79f90238"
    "800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000280"
    ;

    // Little-endian version:

    // 000000023627b961571357407828c4bd98ae368b9d9e34272d7d62400000002d00000000ce3bebb1
    // 7613de1d942e3825e607e745469b3fec7d2a8d9442da888d4d1077a852211c5f194152573802f979
    // 00000080000000000000000000000000000000000000000000000000000000000000000000000000
    // 0000000080020000

    /** STRING PRE-PROCESS **/

    uint i;
    uchar *pos = text;
    static uint text1[32];

    // convert chars to hex values. Evil dump into text1
    for(i = 0; i < 128; i++) {
        *((uchar*)text1+i) = htochar(pos);
        pos+=2;
    }

    // Switch endianness
    // You can remove the following loop if you are already
    // working with a little-endian string
    for(i = 0; i < 32; i++) {
        text1[i] = byte_swap4(text1[i]);
    }

    // Pre-process finished.
    // String is loaded into text1 as 32 binary u-integers.

    /** MIDSTATE CALCULATION **/

    uint midstate[8];
    sha256_MS(text1, midstate);

    static uint res;
    uint *ptroff = &text1[16];

    text1[19] = 0x79f90238U;
    printf("Starting nonce: %08x\n", text1[19]);

    SYSTEMTIME st; // Windows API to build a timer...
    GetSystemTime(&st);
    uint start = st.wSecond*1000 + st.wMilliseconds;

    /** MAIN LOOP **/

    const uint endnonce = 0x79f90238U + 1000000U; // 850 Kh/s: hash rate on my Core 2 Duo 2.333 GHz :)

    for (text1[19] = 0x79f90238U; text1[19] < endnonce; text1[19]++) { // Increment nonce

        res = sha256d(midstate, ptroff); // The kraken. Release it.

        if (res == 0) printf("Share found at nonce: %08x SUCCESS\n", text1[19]);
    }

    GetSystemTime(&st);
    printf("Ending nonce: %08x\n\n", --text1[19]);
    printf("Total time taken: %f secs\n", ((st.wSecond*1000.0 + st.wMilliseconds)-start)/1000.0);
    printf("Estimated hashrate (can be very inaccurate): %f Mh/s\n", 1000.0/((st.wSecond*1000.0 + st.wMilliseconds)-start));

    // Btw real hash is:
    // e17e38f81b4af47ab2ff29fe554c8c767c03444aee9119381f00000000000000
    // 000000000000001f381991ee4a44037c768c4c55fe29ffb27af44a1bf8387ee1

    printf("You can now safely terminate the program.\n");
    getchar();
    return 0;
}


sha256_cpu.h
Code:
#include <stdio.h>
#include <windows.h>

#define uchar unsigned char
#define uint unsigned int

uchar htochar(uchar *ptr) {
    uchar value = 0;
    char ch = *ptr;

    if (ch >= '0' && ch <= '9')
        value = (value << 4) + (ch - '0');
    else
        value = (value << 4) + (ch - 'a' + 10);

    ch = *(++ptr);

    if (ch >= '0' && ch <= '9')
        value = (value << 4) + (ch - '0');
    else
        value = (value << 4) + (ch - 'a' + 10);

    return value;
}

#define byte_swap4(val)              \
         (((val & 0xff) << 24) |     \
          ((val & 0xff00) << 8) |    \
          ((val & 0xff0000) >> 8) |  \
          ((val & 0xff000000) >> 24))

#define ROTLEFT(a,b) ((a << b) | (a >> (32-b)))
#define ROTRIGHT(a,b) ((a >> b) | (a << (32-b)))

#define CH(x,y,z) ((x & y) ^ (~x & z))
#define MAJ(x,y,z) ((x & y) ^ (x & z) ^ (y & z))
#define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22))
#define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25))
#define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ (x >> 3))
#define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ (x >> 10))

static const uint k[64] = {
   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174,
   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da,
   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967,
   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85,
   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070,
   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3,
   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
};


void sha256_MS(uint data[], uint midstate[]) {
    uint a,b,c,d,e,f,g,h,i=0,t1,t2,m[64];

    a = 0x6a09e667U;
    b = 0xbb67ae85U;
    c = 0x3c6ef372U;
    d = 0xa54ff53aU;
    e = 0x510e527fU;
    f = 0x9b05688cU;
    g = 0x1f83d9abU;
    h = 0x5be0cd19U;

    for (; i < 16; i++) m[i] = data[i];

    for (; i < 64; i++)
        m[i] = SIG1(m[i-2]) + m[i-7] + SIG0(m[i-15]) + m[i-16];

    for (i = 0; i < 64; ++i) {
        t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i];
        t2 = EP0(a) + MAJ(a,b,c);
        h = g;
        g = f;
        f = e;
        e = d + t1;
        d = c;
        c = b;
        b = a;
        a = t1 + t2;
    }

    midstate[0] = 0x6a09e667U + a;
    midstate[1] = 0xbb67ae85U + b;
    midstate[2] = 0x3c6ef372U + c;
    midstate[3] = 0xa54ff53aU + d;
    midstate[4] = 0x510e527fU + e;
    midstate[5] = 0x9b05688cU + f;
    midstate[6] = 0x1f83d9abU + g;
    midstate[7] = 0x5be0cd19U + h;
}

uint sha256d(uint midstate[], uint text1[]) {
    uint a,b,c,d,e,f,g,h,i,t1,t2,m[64];
    uint ee,eee,eeee;

    // Hash One

    a = midstate[0];
    b = midstate[1];
    c = midstate[2];
    d = midstate[3];
    e = midstate[4];
    f = midstate[5];
    g = midstate[6];
    h = midstate[7];

    for (i = 0;  i < 16; i++) m[i] = text1[i];

    for (; i < 64; i++)
        m[i] = SIG1(m[i-2]) + m[i-7] + SIG0(m[i-15]) + m[i-16];

    for (i = 0; i < 64; i++) {
        t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i];
        t2 = EP0(a) + MAJ(a,b,c);
        h = g;
        g = f;
        f = e;
        e = d + t1;
        d = c;
        c = b;
        b = a;
        a = t1 + t2;

    }

    m[0] = midstate[0] + a;
    m[1] = midstate[1] + b;
    m[2] = midstate[2] + c;
    m[3] = midstate[3] + d;
    m[4] = midstate[4] + e;
    m[5] = midstate[5] + f;
    m[6] = midstate[6] + g;
    m[7] = midstate[7] + h;

    // Hash Two

    a = 0x6a09e667U;
    b = 0xbb67ae85U;
    c = 0x3c6ef372U;
    d = 0xa54ff53aU;
    e = 0x510e527fU;
    f = 0x9b05688cU;
    g = 0x1f83d9abU;
    h = 0x5be0cd19U;

    m[8]  = 0x80000000U;
    m[9]  = 0x00U;
    m[10] = 0x00U;
    m[11] = 0x00U;
    m[12] = 0x00U;
    m[13] = 0x00U;
    m[14] = 0x00U;
    m[15] = 0x100U;

    for (i = 16; i < 64; i++)
        m[i] = SIG1(m[i-2]) + m[i-7] + SIG0(m[i-15]) + m[i-16];

    for (i = 0; i < 57; i++) {
        t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i];
        t2 = EP0(a) + MAJ(a,b,c);
        h = g;
        g = f;
        f = e;
        e = d + t1;
        d = c;
        c = b;
        b = a;
        a = t1 + t2;
    }

    eeee = d + h + EP1(e) + CH(e,f,g) + 0x78a5636fU + m[57];
    eee = c + g + EP1(eeee) + CH(eeee,e,f) + 0x84c87814U + m[58];
    ee = b + f + EP1(eee) + CH(eee,eeee,e) + 0x8cc70208U + m[59];
    h = a + e + EP1(ee) + CH(ee,eee,eeee) + 0x90befffaU + m[60];

    return 0x5be0cd19U + h;
}


#include <windows.h>
is only used for setting up the timer. If you work on linux or mac, just get rid of the lines related to the timer.
1715155803
Hero Member
*
Offline Offline

Posts: 1715155803

View Profile Personal Message (Offline)

Ignore
1715155803
Reply with quote  #2

1715155803
Report to moderator
The Bitcoin network protocol was designed to be extremely flexible. It can be used to create timed transactions, escrow transactions, multi-signature transactions, etc. The current features of the client only hint at what will be possible in the future.
Advertised sites are not endorsed by the Bitcoin Forum. They may be unsafe, untrustworthy, or illegal in your jurisdiction.
1715155803
Hero Member
*
Offline Offline

Posts: 1715155803

View Profile Personal Message (Offline)

Ignore
1715155803
Reply with quote  #2

1715155803
Report to moderator
1715155803
Hero Member
*
Offline Offline

Posts: 1715155803

View Profile Personal Message (Offline)

Ignore
1715155803
Reply with quote  #2

1715155803
Report to moderator
1715155803
Hero Member
*
Offline Offline

Posts: 1715155803

View Profile Personal Message (Offline)

Ignore
1715155803
Reply with quote  #2

1715155803
Report to moderator
jenga (OP)
Newbie
*
Offline Offline

Activity: 26
Merit: 0


View Profile
September 05, 2013, 10:40:27 PM
 #2

Anybody has anything to point out?
Of course its nothing serious, just for fun and constructive learning.
jonjakejingle
Newbie
*
Offline Offline

Activity: 9
Merit: 0


View Profile
March 19, 2014, 10:41:40 AM
 #3

Hey! I like it. I compared your code to mine.. Maybe you can help me out with my sha256 function. It keeps giving me the wrong hash. Do you notice anything obvi wrong w/it ? (it's as3)

Code:
import flash.utils.ByteArray;

var K =
[0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174,
0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da,
0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967,
0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85,
0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070,
0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3,
0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2]

function ch(x,y,z)
{
return (x & y) | (~x & z);
}

function maj(x,y,z)
{
return (x & y) | (x & z) | (y & z);
}

function sigma0(x)
{
return rotr(2, x) | rotr(13, x) | rotr(22, x)
}

function sigma1(x)
{
return rotr(6, x) | rotr(11, x) | rotr(25,x)
}

function lsigma0(x)
{
return rotr(7, x) | rotr(18, x) | shr(3, x);
}

function lsigma1(x)
{
return rotr(17, x) | rotr(19, x) | shr(10, x);
}

// rotate right
function rotr(n, x)
{
return (x >> n) | (x << (32-n));
}

// shift right
function shr(n, x)
{
return x >> n;
}

function pad(str)
{
var msg = new ByteArray();
// msg.length = 512/8;
msg.position = 0;

// write the characters
for( var i=0; i < str.length; i++ ) {
msg.writeByte( str.charCodeAt(i) );
}

// pad it with k zeroes
// (lmod512) +1+ k ≡ 448
var str_length_bits = str.length * 8;
var num_zeroes = 448 - 1 - (str_length_bits % 512);

// write a 1
msg.writeByte( 0x80 );

// we can only write bytes at a time, so let's see how many bytes it is
var num_zeroes_bytes = Math.floor(num_zeroes / 8);

// need to wrap to another block?
if( num_zeroes_bytes < 0 ) {
num_zeroes_bytes += 64
}

trace('num_zeroes_bytes', num_zeroes_bytes);
for( var i=0; i < num_zeroes_bytes; i++ ) {
msg.writeByte(0);
}

// 64 bit block that is the length of the message (in # of bits).
// we ignore the first 32 bits because I'm not sure how to bitwise
// shift a flash number to two 32-bit slices
msg.writeUnsignedInt(0);

// meaning our message can not be longer than (2**33)-1 in length
msg.writeUnsignedInt(str_length_bits);

return msg;
}

// do eeet
var str = "jakes";

var msg = pad(str);

for( var n = 0; n < msg.length; n++ )
{
trace( n + ' : ' +  left_pad(msg[n].toString(2)) );
}

function left_pad(str, length=8, padstr='0')
{
while( str.length < length ) {
str = padstr + str;
}

return str;
}

// parse into n 512-bit (64 bytes, 16 words) blocks

// set initial hash value
var H = [0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19];

var modVal = Math.pow(2, 32);

var block:ByteArray = new ByteArray();

var offset = 0;
msg.position = 0;
while(true)
{
try
{
// read the block
msg.readBytes(block, offset, 64);
trace('reading block from ' + offset);
offset += 64;

// construct the message schedule (64 32-bit words)
var W = [];

block.position = 0;
for( var t = 0; t < 64; t++ )
{
if( t <= 15 )
{
W[t] = block.readByte();
}
else
{
W[t] = lsigma1(W[t-2]) + W[t-7] + lsigma0(W[t-15]) + W[t-16];
}
}

// 8 working variables
var a:uint = H[0];
var b:uint = H[1];
var c:uint = H[2];
var d:uint = H[3];
var e:uint = H[4];
var f:uint = H[5];
var g:uint = H[6];
var h:uint = H[7];

for( t = 0; t < 64; t++ )
{
var T1:uint = h + sigma1(e) + ch(e,f,g) + K[t] + W[t];
//T1 = T1%modVal;

var T2:uint = sigma0(a) + maj(a,b,c);
//T2 = T2%modVal;

h = g;
g = f;
f = e;
e = d + T1;
d = c;
c = b;
b = a;
a = (T1 + T2);
//a = a%modVal;
}

// compute the i-th intermediate hash value of H(i)
H[0] = (a + H[0]) % modVal;
H[1] = (b + H[1]) % modVal;
H[2] = (c + H[2]) % modVal;
H[3] = (d + H[3]) % modVal;
H[4] = (e + H[4]) % modVal;
H[5] = (f + H[5]) % modVal;
H[6] = (g + H[6]) % modVal;
H[7] = (h + H[7]) % modVal;
}
catch(e)
{
trace('no more blocks, ending at byte ',offset, 'of', msg.length);
// no more bytes to read?
break;
}
}

// sha256('jakes') =
// 1e03a95d8dc846c1c64271998a9865ed1a46d962634df2c9cff5b095dc8b5aca

var hex = '';
for( n = 0; n < 8; n++ )
{
hex += left_pad(H[n].toString(16), 8) + ' ';
}

trace( hex.length + ': ' + hex);
Pages: [1]
  Print  
 
Jump to:  

Powered by MySQL Powered by PHP Powered by SMF 1.1.19 | SMF © 2006-2009, Simple Machines Valid XHTML 1.0! Valid CSS!