Title: How fast/simple/short a CPU bitcoin mining core script can be?
Post by: jenga on September 02, 2013, 05:35:01 PM
Randomly building a CPU miner in pure C. I just wanted to make it as compact as humanly possible, and maybe fast... maybe... It uses a bunch of optimizations like midstate precalc and early 2nd round loop termination. Basically it is a heavily simplified version of this reference algorithm: http://bradconte.com/sha256_c Compiling it with in Windows and a Core 2 Duo 2.33 GHz brings 850 Kh/s (single thread). I wanted to make use of some sort of SSE/SIMD though, but myself I don't think I'd do it right (any help is appreciated). What do you peolpe think? :P Windows executable: https://mediafire.com/?xb9en1ab3t8e3c2 Source code: sha256_cpu.c #include "sha256_cpu.h"
int main() {
// Big-endian 255123 block string uchar text[]="02000000" "61b9273640571357bdc428788b36ae9827349e9d40627d2d2d00000000000000" "b1eb3bce1dde137625382e9445e707e6ec3f9b46948d2a7d8d88da42a877104d" "5f1c2152" "57524119" "79f90238" "800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000280" ;
// Little-endian version:
// 000000023627b961571357407828c4bd98ae368b9d9e34272d7d62400000002d00000000ce3bebb1 // 7613de1d942e3825e607e745469b3fec7d2a8d9442da888d4d1077a852211c5f194152573802f979 // 00000080000000000000000000000000000000000000000000000000000000000000000000000000 // 0000000080020000
/** STRING PRE-PROCESS **/
uint i; uchar *pos = text; static uint text1[32];
// convert chars to hex values. Evil dump into text1 for(i = 0; i < 128; i++) { *((uchar*)text1+i) = htochar(pos); pos+=2; }
// Switch endianness // You can remove the following loop if you are already // working with a little-endian string for(i = 0; i < 32; i++) { text1[i] = byte_swap4(text1[i]); }
// Pre-process finished. // String is loaded into text1 as 32 binary u-integers.
/** MIDSTATE CALCULATION **/
uint midstate[8]; sha256_MS(text1, midstate);
static uint res; uint *ptroff = &text1[16];
text1[19] = 0x79f90238U; printf("Starting nonce: %08x\n", text1[19]);
SYSTEMTIME st; // Windows API to build a timer... GetSystemTime(&st); uint start = st.wSecond*1000 + st.wMilliseconds;
/** MAIN LOOP **/
const uint endnonce = 0x79f90238U + 1000000U; // 850 Kh/s: hash rate on my Core 2 Duo 2.333 GHz :)
for (text1[19] = 0x79f90238U; text1[19] < endnonce; text1[19]++) { // Increment nonce
res = sha256d(midstate, ptroff); // The kraken. Release it.
if (res == 0) printf("Share found at nonce: %08x SUCCESS\n", text1[19]); }
GetSystemTime(&st); printf("Ending nonce: %08x\n\n", --text1[19]); printf("Total time taken: %f secs\n", ((st.wSecond*1000.0 + st.wMilliseconds)-start)/1000.0); printf("Estimated hashrate (can be very inaccurate): %f Mh/s\n", 1000.0/((st.wSecond*1000.0 + st.wMilliseconds)-start));
// Btw real hash is: // e17e38f81b4af47ab2ff29fe554c8c767c03444aee9119381f00000000000000 // 000000000000001f381991ee4a44037c768c4c55fe29ffb27af44a1bf8387ee1
printf("You can now safely terminate the program.\n"); getchar(); return 0; }
sha256_cpu.h #include <stdio.h> #include <windows.h>
#define uchar unsigned char #define uint unsigned int
uchar htochar(uchar *ptr) { uchar value = 0; char ch = *ptr;
if (ch >= '0' && ch <= '9') value = (value << 4) + (ch - '0'); else value = (value << 4) + (ch - 'a' + 10);
ch = *(++ptr);
if (ch >= '0' && ch <= '9') value = (value << 4) + (ch - '0'); else value = (value << 4) + (ch - 'a' + 10);
return value; }
#define byte_swap4(val) \ (((val & 0xff) << 24) | \ ((val & 0xff00) << 8) | \ ((val & 0xff0000) >> 8) | \ ((val & 0xff000000) >> 24))
#define ROTLEFT(a,b) ((a << b) | (a >> (32-b))) #define ROTRIGHT(a,b) ((a >> b) | (a << (32-b)))
#define CH(x,y,z) ((x & y) ^ (~x & z)) #define MAJ(x,y,z) ((x & y) ^ (x & z) ^ (y & z)) #define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22)) #define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25)) #define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ (x >> 3)) #define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ (x >> 10))
static const uint k[64] = { 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5, 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174, 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da, 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967, 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85, 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070, 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3, 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 };
void sha256_MS(uint data[], uint midstate[]) { uint a,b,c,d,e,f,g,h,i=0,t1,t2,m[64];
a = 0x6a09e667U; b = 0xbb67ae85U; c = 0x3c6ef372U; d = 0xa54ff53aU; e = 0x510e527fU; f = 0x9b05688cU; g = 0x1f83d9abU; h = 0x5be0cd19U;
for (; i < 16; i++) m[i] = data[i];
for (; i < 64; i++) m[i] = SIG1(m[i-2]) + m[i-7] + SIG0(m[i-15]) + m[i-16];
for (i = 0; i < 64; ++i) { t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i]; t2 = EP0(a) + MAJ(a,b,c); h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2; }
midstate[0] = 0x6a09e667U + a; midstate[1] = 0xbb67ae85U + b; midstate[2] = 0x3c6ef372U + c; midstate[3] = 0xa54ff53aU + d; midstate[4] = 0x510e527fU + e; midstate[5] = 0x9b05688cU + f; midstate[6] = 0x1f83d9abU + g; midstate[7] = 0x5be0cd19U + h; }
uint sha256d(uint midstate[], uint text1[]) { uint a,b,c,d,e,f,g,h,i,t1,t2,m[64]; uint ee,eee,eeee;
// Hash One
a = midstate[0]; b = midstate[1]; c = midstate[2]; d = midstate[3]; e = midstate[4]; f = midstate[5]; g = midstate[6]; h = midstate[7];
for (i = 0; i < 16; i++) m[i] = text1[i];
for (; i < 64; i++) m[i] = SIG1(m[i-2]) + m[i-7] + SIG0(m[i-15]) + m[i-16];
for (i = 0; i < 64; i++) { t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i]; t2 = EP0(a) + MAJ(a,b,c); h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2;
}
m[0] = midstate[0] + a; m[1] = midstate[1] + b; m[2] = midstate[2] + c; m[3] = midstate[3] + d; m[4] = midstate[4] + e; m[5] = midstate[5] + f; m[6] = midstate[6] + g; m[7] = midstate[7] + h;
// Hash Two
a = 0x6a09e667U; b = 0xbb67ae85U; c = 0x3c6ef372U; d = 0xa54ff53aU; e = 0x510e527fU; f = 0x9b05688cU; g = 0x1f83d9abU; h = 0x5be0cd19U;
m[8] = 0x80000000U; m[9] = 0x00U; m[10] = 0x00U; m[11] = 0x00U; m[12] = 0x00U; m[13] = 0x00U; m[14] = 0x00U; m[15] = 0x100U;
for (i = 16; i < 64; i++) m[i] = SIG1(m[i-2]) + m[i-7] + SIG0(m[i-15]) + m[i-16];
for (i = 0; i < 57; i++) { t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i]; t2 = EP0(a) + MAJ(a,b,c); h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2; }
eeee = d + h + EP1(e) + CH(e,f,g) + 0x78a5636fU + m[57]; eee = c + g + EP1(eeee) + CH(eeee,e,f) + 0x84c87814U + m[58]; ee = b + f + EP1(eee) + CH(eee,eeee,e) + 0x8cc70208U + m[59]; h = a + e + EP1(ee) + CH(ee,eee,eeee) + 0x90befffaU + m[60];
return 0x5be0cd19U + h; }
#include <windows.h> is only used for setting up the timer. If you work on linux or mac, just get rid of the lines related to the timer.
Title: Re: How fast/simple/short a CPU bitcoin mining core script can be?
Post by: jenga on September 05, 2013, 10:40:27 PM
Anybody has anything to point out? Of course its nothing serious, just for fun and constructive learning.
Title: Re: How fast/simple/short a CPU bitcoin mining core script can be?
Post by: jonjakejingle on March 19, 2014, 10:41:40 AM
Hey! I like it. I compared your code to mine.. Maybe you can help me out with my sha256 function. It keeps giving me the wrong hash. Do you notice anything obvi wrong w/it ? (it's as3) import flash.utils.ByteArray;
var K = [0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5, 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174, 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da, 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967, 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85, 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070, 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3, 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2]
function ch(x,y,z) { return (x & y) | (~x & z); }
function maj(x,y,z) { return (x & y) | (x & z) | (y & z); }
function sigma0(x) { return rotr(2, x) | rotr(13, x) | rotr(22, x) }
function sigma1(x) { return rotr(6, x) | rotr(11, x) | rotr(25,x) }
function lsigma0(x) { return rotr(7, x) | rotr(18, x) | shr(3, x); }
function lsigma1(x) { return rotr(17, x) | rotr(19, x) | shr(10, x); }
// rotate right function rotr(n, x) { return (x >> n) | (x << (32-n)); }
// shift right function shr(n, x) { return x >> n; }
function pad(str) { var msg = new ByteArray(); // msg.length = 512/8; msg.position = 0; // write the characters for( var i=0; i < str.length; i++ ) { msg.writeByte( str.charCodeAt(i) ); } // pad it with k zeroes // (lmod512) +1+ k ≡ 448 var str_length_bits = str.length * 8; var num_zeroes = 448 - 1 - (str_length_bits % 512); // write a 1 msg.writeByte( 0x80 ); // we can only write bytes at a time, so let's see how many bytes it is var num_zeroes_bytes = Math.floor(num_zeroes / 8); // need to wrap to another block? if( num_zeroes_bytes < 0 ) { num_zeroes_bytes += 64 }
trace('num_zeroes_bytes', num_zeroes_bytes); for( var i=0; i < num_zeroes_bytes; i++ ) { msg.writeByte(0); } // 64 bit block that is the length of the message (in # of bits). // we ignore the first 32 bits because I'm not sure how to bitwise // shift a flash number to two 32-bit slices msg.writeUnsignedInt(0); // meaning our message can not be longer than (2**33)-1 in length msg.writeUnsignedInt(str_length_bits); return msg; }
// do eeet var str = "jakes";
var msg = pad(str);
for( var n = 0; n < msg.length; n++ ) { trace( n + ' : ' + left_pad(msg[n].toString(2)) ); }
function left_pad(str, length=8, padstr='0') { while( str.length < length ) { str = padstr + str; } return str; }
// parse into n 512-bit (64 bytes, 16 words) blocks
// set initial hash value var H = [0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19];
var modVal = Math.pow(2, 32);
var block:ByteArray = new ByteArray();
var offset = 0; msg.position = 0; while(true) { try { // read the block msg.readBytes(block, offset, 64); trace('reading block from ' + offset); offset += 64; // construct the message schedule (64 32-bit words) var W = []; block.position = 0; for( var t = 0; t < 64; t++ ) { if( t <= 15 ) { W[t] = block.readByte(); } else { W[t] = lsigma1(W[t-2]) + W[t-7] + lsigma0(W[t-15]) + W[t-16]; } } // 8 working variables var a:uint = H[0]; var b:uint = H[1]; var c:uint = H[2]; var d:uint = H[3]; var e:uint = H[4]; var f:uint = H[5]; var g:uint = H[6]; var h:uint = H[7]; for( t = 0; t < 64; t++ ) { var T1:uint = h + sigma1(e) + ch(e,f,g) + K[t] + W[t]; //T1 = T1%modVal; var T2:uint = sigma0(a) + maj(a,b,c); //T2 = T2%modVal; h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = (T1 + T2); //a = a%modVal; } // compute the i-th intermediate hash value of H(i) H[0] = (a + H[0]) % modVal; H[1] = (b + H[1]) % modVal; H[2] = (c + H[2]) % modVal; H[3] = (d + H[3]) % modVal; H[4] = (e + H[4]) % modVal; H[5] = (f + H[5]) % modVal; H[6] = (g + H[6]) % modVal; H[7] = (h + H[7]) % modVal; } catch(e) { trace('no more blocks, ending at byte ',offset, 'of', msg.length); // no more bytes to read? break; } }
// sha256('jakes') = // 1e03a95d8dc846c1c64271998a9865ed1a46d962634df2c9cff5b095dc8b5aca
var hex = ''; for( n = 0; n < 8; n++ ) { hex += left_pad(H[n].toString(16), 8) + ' '; }
trace( hex.length + ': ' + hex);
|