mcdouglasx (OP)
|
Previously, it was concluded that there was no advantage between prefix and sequential searching in terms of statistical significance. But, to be fair, I don't think the issue is settled there, as this could be expanded a bit further and give a more elaborate idea of what could be achieved. Far from wanting to demonstrate which method is more efficient in a second run, this method only aims to highlight the positive aspects of the prefix method over the sequential method. Breaking this down into two steps:Case 1 - There may be improvements, but these could be minimal in terms of significance. Case 2 - Used as a tool to improve the chances of finding the target, but without expecting a 100% certain result (leaving it to chance). Let's start with Case 1:I've added a second phase of prefix detection that slightly improves the method used in the previous post. Refreshing our memory of the previous method, we divided the scan range into blocks. The size of these blocks was defined by the probability of a 3-character hexadecimal prefix appearing in a data set, which was 1/4096. Therefore, following this line, the blocks were set to an equal or similar size. When we found a 3-character prefix, we skipped the rest of the block and left it for later. The new part below is based on the same principle, but instead of skipping the rest of the blocks, we will now search the following keys for prefixes with 2 hexadecimal characters, i.e., -1 from the block size we chose. This way, we can take advantage of the compound probability and mitigate possible omissions of the target on the first pass. codeimport hashlib import random import time import math import statistics import scipy.stats as stats import statsmodels.stats.power as smp from math import ceil
# Configuration TOTAL_SIZE = 100_000 RANGE_SIZE = 4_096 PREFIX_LENGTH = 3 SIMULATIONS = 10000
SECP256K1_ORDER = int("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141", 16)
print(f""" === Configuration === Total numbers: {TOTAL_SIZE:,} Block size: {RANGE_SIZE:,} Total blocks needed: {ceil(TOTAL_SIZE/RANGE_SIZE)} Prefix: {PREFIX_LENGTH} characters (16^{PREFIX_LENGTH} = {16**PREFIX_LENGTH:,} combinations) Simulations: {SIMULATIONS} secp256k1 order: {SECP256K1_ORDER} """)
def generate_h160(data): h = hashlib.new('ripemd160', str(data).encode('utf-8')) return h.hexdigest()
def shuffled_blck(total_blocks): blocks = list(range(total_blocks)) random.shuffle(blocks) return blocks
def sequential_search(dataset, block_size, target_hash, block_order): checks = 0 for block_idx in block_order: start = block_idx * block_size end = min(start + block_size, len(dataset)) for i in range(start, end): checks += 1 if generate_h160(dataset[i]) == target_hash: return {"checks": checks, "found": True, "index": i} return {"checks": checks, "found": False}
def prefix_search(dataset, block_size, prefix_len, target_hash, block_order): prefix = target_hash[:prefix_len] checks = 0 omitted_ranges = [] for block_idx in block_order: start = block_idx * block_size end = min(start + block_size, len(dataset)) found_3_prefix = False found_2_prefix = False for i in range(start, end): checks += 1 current_hash = generate_h160(dataset[i]) if current_hash == target_hash: return {"checks": checks, "found": True, "index": i} if not found_3_prefix: if current_hash.startswith(prefix): found_3_prefix = True else: if not found_2_prefix and current_hash.startswith(prefix[:2]): found_2_prefix = True if found_3_prefix and found_2_prefix: omitted_ranges.append((i + 1, end)) break for start_omit, end_omit in reversed(omitted_ranges): for j in range(end_omit - 1, start_omit - 1, -1): checks += 1 current_hash = generate_h160(dataset[j]) if current_hash == target_hash: return {"checks": checks, "found": True, "index": j} return {"checks": checks, "found": False}
def comp_cohens_d(list1, list2): if len(list1) < 2 or len(list2) < 2: return float('nan') n1, n2 = len(list1), len(list2) m1, m2 = statistics.mean(list1), statistics.mean(list2) s1, s2 = statistics.stdev(list1), statistics.stdev(list2) pooled_std = math.sqrt(((n1-1)*s1**2 + (n2-1)*s2**2) / (n1+n2-2)) if pooled_std == 0: return float('nan') return (m1 - m2) / pooled_std
def coeff_variation(data): if not data or statistics.mean(data) == 0: return float('nan') return (statistics.stdev(data) / statistics.mean(data)) * 100
def longest_streak(outcomes, letter): max_streak = current = 0 for o in outcomes: current = current + 1 if o == letter else 0 max_streak = max(max_streak, current) return max_streak
def ascii_bar(label, value, max_value, bar_length=50): bar_count = int((value / max_value) * bar_length) if max_value > 0 else 0 return f"{label:12}: {'#' * bar_count} ({value})"
def conf_interval(data, confidence=0.95): if len(data) < 2: return (0, 0) try: return stats.t.interval( confidence=confidence, df=len(data)-1, loc=statistics.mean(data), scale=stats.sem(data) ) except: return (statistics.mean(data), statistics.mean(data)) def statistical_analysis(seq_checks, pre_checks, seq_success, pre_success): analysis = {} analysis['seq_mean'] = statistics.mean(seq_checks) if seq_checks else 0 analysis['pre_mean'] = statistics.mean(pre_checks) if pre_checks else 0 analysis['seq_ci'] = conf_interval(seq_checks) analysis['pre_ci'] = conf_interval(pre_checks) if len(seq_checks) > 1 and len(pre_checks) > 1: analysis['t_test'] = stats.ttest_ind(seq_checks, pre_checks, equal_var=False) analysis['mann_whitney'] = stats.mannwhitneyu(seq_checks, pre_checks) analysis['cohen_d'] = comp_cohens_d(seq_checks, pre_checks) effect_size = abs(analysis['cohen_d']) if effect_size > 0: analysis['power'] = smp.tt_ind_solve_power( effect_size=effect_size, nobs1=len(seq_checks), alpha=0.05, ratio=len(pre_checks)/len(seq_checks) ) else: analysis['power'] = 0 else: analysis['t_test'] = None analysis['mann_whitney'] = None analysis['cohen_d'] = 0 analysis['power'] = 0 analysis['risk_ratio'] = (seq_success/SIMULATIONS) / (pre_success/SIMULATIONS) if pre_success > 0 else 0 return analysis
def compare_methods(): results = { "sequential": {"wins": 0, "success": 0, "checks": [], "times": []}, "prefix": {"wins": 0, "success": 0, "checks": [], "times": []}, "ties": 0 } outcome_history = [] total_blocks = ceil(TOTAL_SIZE / RANGE_SIZE)
for _ in range(SIMULATIONS): max_offset = SECP256K1_ORDER - TOTAL_SIZE - 1 offset = random.randint(0, max_offset) dataset = [offset + i for i in range(TOTAL_SIZE)] target_num = random.choice(dataset) target_hash = generate_h160(target_num) block_order = shuffled_blck(total_blocks)
start = time.perf_counter() seq_res = sequential_search(dataset, RANGE_SIZE, target_hash, block_order) seq_time = time.perf_counter() - start
start = time.perf_counter() pre_res = prefix_search(dataset, RANGE_SIZE, PREFIX_LENGTH, target_hash, block_order) pre_time = time.perf_counter() - start
if seq_res["found"]: results["sequential"]["success"] += 1 results["sequential"]["checks"].append(seq_res["checks"]) results["sequential"]["times"].append(seq_time) if pre_res["found"]: results["prefix"]["success"] += 1 results["prefix"]["checks"].append(pre_res["checks"]) results["prefix"]["times"].append(pre_time) if seq_res["found"] and pre_res["found"]: if seq_res["checks"] < pre_res["checks"]: results["sequential"]["wins"] += 1 outcome_history.append("S") elif pre_res["checks"] < seq_res["checks"]: results["prefix"]["wins"] += 1 outcome_history.append("P") else: results["ties"] += 1 outcome_history.append("T") elif seq_res["found"]: results["sequential"]["wins"] += 1 outcome_history.append("S") elif pre_res["found"]: results["prefix"]["wins"] += 1 outcome_history.append("P") else: results["ties"] += 1 outcome_history.append("T")
def get_stats(data): if not data: return {"mean": 0, "min": 0, "max": 0, "median": 0, "stdev": 0} return { "mean": statistics.mean(data), "min": min(data), "max": max(data), "median": statistics.median(data), "stdev": statistics.stdev(data) if len(data) > 1 else 0 }
seq_stats = get_stats(results["sequential"]["checks"]) pre_stats = get_stats(results["prefix"]["checks"]) seq_time_stats = get_stats(results["sequential"]["times"]) pre_time_stats = get_stats(results["prefix"]["times"])
seq_success_rate = results["sequential"]["success"] / SIMULATIONS pre_success_rate = results["prefix"]["success"] / SIMULATIONS
total_comparisons = results["sequential"]["wins"] + results["prefix"]["wins"] + results["ties"] seq_win_rate = results["sequential"]["wins"] / total_comparisons if total_comparisons > 0 else 0 pre_win_rate = results["prefix"]["wins"] / total_comparisons if total_comparisons > 0 else 0
cv_seq = coeff_variation(results["sequential"]["checks"]) cv_pre = coeff_variation(results["prefix"]["checks"])
stats_analysis = statistical_analysis( seq_checks=results["sequential"]["checks"], pre_checks=results["prefix"]["checks"], seq_success=results["sequential"]["success"], pre_success=results["prefix"]["success"] )
print(f""" === FINAL ANALYSIS ===
[Success Rates] Sequential: {seq_success_rate:.1%} ({results['sequential']['success']}/{SIMULATIONS}) Prefix: {pre_success_rate:.1%} ({results['prefix']['success']}/{SIMULATIONS})
[Performance Metrics] | Sequential | Prefix ---------------+---------------------+-------------------- Checks (mean) | {seq_stats['mean']:>12,.1f} ± {seq_stats['stdev']:,.1f} | {pre_stats['mean']:>12,.1f} ± {pre_stats['stdev']:,.1f} Time (mean ms) | {seq_time_stats['mean']*1000:>12.2f} ± {seq_time_stats['stdev']*1000:.2f} | {pre_time_stats['mean']*1000:>12.2f} ± {pre_time_stats['stdev']*1000:.2f} Min checks | {seq_stats['min']:>12,} | {pre_stats['min']:>12,} Max checks | {seq_stats['max']:>12,} | {pre_stats['max']:>12,} Coef. Variation| {cv_seq:>11.1f}% | {cv_pre:>11.1f}%
[Comparison When Both Succeed] Sequential wins: {results['sequential']['wins']} ({seq_win_rate:.1%}) Prefix wins: {results['prefix']['wins']} ({pre_win_rate:.1%}) Ties: {results['ties']}
=== ADVANCED STATISTICS ===
[Confidence Intervals 95%] Checks Sequential: {seq_stats['mean']:.1f} ({stats_analysis['seq_ci'][0]:.1f} - {stats_analysis['seq_ci'][1]:.1f}) Checks Prefix: {pre_stats['mean']:.1f} ({stats_analysis['pre_ci'][0]:.1f} - {stats_analysis['pre_ci'][1]:.1f})
[Statistical Tests] Welch's t-test: {'t = %.3f, p = %.4f' % (stats_analysis['t_test'].statistic, stats_analysis['t_test'].pvalue) if stats_analysis['t_test'] else 'N/A'} Mann-Whitney U: {'U = %.1f, p = %.4f' % (stats_analysis['mann_whitney'].statistic, stats_analysis['mann_whitney'].pvalue) if stats_analysis['mann_whitney'] else 'N/A'} Effect Size (Cohen's d): {stats_analysis['cohen_d']:.3f}
[Power Analysis] Statistical Power: {stats_analysis['power']:.1%}
[Risk/Benefit Ratio] Success Ratio (Seq/Pre): {stats_analysis['risk_ratio']:.2f}:1 """)
non_tie_outcomes = [o for o in outcome_history if o != "T"] streak_analysis = f""" === STREAK ANALYSIS === Longest Sequential streak: {longest_streak(outcome_history, 'S')} Longest Prefix streak: {longest_streak(outcome_history, 'P')} Expected max streak: {math.log(len(non_tie_outcomes), 2):.1f} (for {len(non_tie_outcomes)} trials) """ print(streak_analysis)
max_wins = max(results["sequential"]["wins"], results["prefix"]["wins"], results["ties"]) print("=== WIN DISTRIBUTION ===") print(ascii_bar("Sequential", results["sequential"]["wins"], max_wins)) print(ascii_bar("Prefix", results["prefix"]["wins"], max_wins)) print(ascii_bar("Ties", results["ties"], max_wins))
if __name__ == '__main__': compare_methods()
result=== FINAL ANALYSIS ===
[Success Rates]
Sequential: 100.0% (10000/10000)
Prefix: 100.0% (10000/10000)
[Performance Metrics]
| Sequential | Prefix
---------------+---------------------+--------------------
Checks (mean) | 50,326.6 ± 28,808.3 | 49,908.5 ± 28,648.4
Time (mean ms) | 126.15 ± 72.73 | 129.80 ± 74.85
Min checks | 7 | 7
Max checks | 99,966 | 99,998
Coef. Variation| 57.2% | 57.4%
[Comparison When Both Succeed]
Sequential wins: 2385 (23.8%)
Prefix wins: 7149 (71.5%)
Ties: 466
=== ADVANCED STATISTICS ===
[Confidence Intervals 95%]
Checks Sequential: 50326.6 (49761.9 - 50891.3)
Checks Prefix: 49908.5 (49346.9 - 50470.1)
[Statistical Tests]
Welch's t-test: t = 1.029, p = 0.3035
Mann-Whitney U: U = 50420196.5, p = 0.3034
Effect Size (Cohen's d): 0.015
[Power Analysis]
Statistical Power: 17.7%
[Risk/Benefit Ratio]
Success Ratio (Seq/Pre): 1.00:1
=== STREAK ANALYSIS ===
Longest Sequential streak: 6
Longest Prefix streak: 30
Expected max streak: 13.2 (for 9534 trials)
=== WIN DISTRIBUTION ===
Sequential : ################ (2385)
Prefix : ################################################## (7149)
Ties : ### (466)
   Case 1.2:In this release, we simply added a way to counteract the fact that skipped ranges are left until last. This will scan every 4 skipped ranges, limiting the worst-case scenarios suggested by newsecurity. codeimport hashlib import random import time import math import statistics import scipy.stats as stats import statsmodels.stats.power as smp from math import ceil
# Configuration TOTAL_SIZE = 100_000 RANGE_SIZE = 4_096 PREFIX_LENGTH = 3 SIMULATIONS = 10000
SECP256K1_ORDER = int("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141", 16)
print(f""" === Configuration === Total numbers: {TOTAL_SIZE:,} Block size: {RANGE_SIZE:,} Total blocks needed: {ceil(TOTAL_SIZE/RANGE_SIZE)} Prefix: {PREFIX_LENGTH} characters (16^{PREFIX_LENGTH} = {16**PREFIX_LENGTH:,} combinations) Simulations: {SIMULATIONS} secp256k1 order: {SECP256K1_ORDER} """)
def generate_h160(data): h = hashlib.new('ripemd160', str(data).encode('utf-8')) return h.hexdigest()
def shuffled_blck(total_blocks): blocks = list(range(total_blocks)) random.shuffle(blocks) return blocks
def sequential_search(dataset, block_size, target_hash, block_order): checks = 0 for block_idx in block_order: start = block_idx * block_size end = min(start + block_size, len(dataset)) for i in range(start, end): checks += 1 if generate_h160(dataset[i]) == target_hash: return {"checks": checks, "found": True, "index": i} return {"checks": checks, "found": False}
def prefix_search(dataset, block_size, prefix_len, target_hash, block_order): prefix = target_hash[:prefix_len] checks = 0 omitted_ranges = [] pending_omissions = [] for block_idx in block_order: start = block_idx * block_size end = min(start + block_size, len(dataset)) found_3_prefix = False found_2_prefix = False for i in range(start, end): checks += 1 current_hash = generate_h160(dataset[i]) if current_hash == target_hash: return {"checks": checks, "found": True, "index": i} if not found_3_prefix: if current_hash.startswith(prefix): found_3_prefix = True else: if not found_2_prefix and current_hash.startswith(prefix[:2]): found_2_prefix = True if found_3_prefix and found_2_prefix:
pending_omissions.append((i + 1, end)) omitted_ranges.append((i + 1, end)) if len(pending_omissions) >= 4: for r_start, r_end in reversed(pending_omissions[-4:]): for j in range(r_end - 1, r_start - 1, -1): checks += 1 if generate_h160(dataset[j]) == target_hash: return {"checks": checks, "found": True, "index": j} pending_omissions = [] break for r_start, r_end in reversed(pending_omissions): for j in range(r_end - 1, r_start - 1, -1): checks += 1 if generate_h160(dataset[j]) == target_hash: return {"checks": checks, "found": True, "index": j} return {"checks": checks, "found": False}
def comp_cohens_d(list1, list2): if len(list1) < 2 or len(list2) < 2: return float('nan') n1, n2 = len(list1), len(list2) m1, m2 = statistics.mean(list1), statistics.mean(list2) s1, s2 = statistics.stdev(list1), statistics.stdev(list2) pooled_std = math.sqrt(((n1-1)*s1**2 + (n2-1)*s2**2) / (n1+n2-2)) if pooled_std == 0: return float('nan') return (m1 - m2) / pooled_std
def coeff_variation(data): if not data or statistics.mean(data) == 0: return float('nan') return (statistics.stdev(data) / statistics.mean(data)) * 100
def longest_streak(outcomes, letter): max_streak = current = 0 for o in outcomes: current = current + 1 if o == letter else 0 max_streak = max(max_streak, current) return max_streak
def ascii_bar(label, value, max_value, bar_length=50): bar_count = int((value / max_value) * bar_length) if max_value > 0 else 0 return f"{label:12}: {'#' * bar_count} ({value})"
def conf_interval(data, confidence=0.95): if len(data) < 2: return (0, 0) try: return stats.t.interval( confidence=confidence, df=len(data)-1, loc=statistics.mean(data), scale=stats.sem(data) ) except: return (statistics.mean(data), statistics.mean(data)) def statistical_analysis(seq_checks, pre_checks, seq_success, pre_success): analysis = {} analysis['seq_mean'] = statistics.mean(seq_checks) if seq_checks else 0 analysis['pre_mean'] = statistics.mean(pre_checks) if pre_checks else 0 analysis['seq_ci'] = conf_interval(seq_checks) analysis['pre_ci'] = conf_interval(pre_checks) if len(seq_checks) > 1 and len(pre_checks) > 1: analysis['t_test'] = stats.ttest_ind(seq_checks, pre_checks, equal_var=False) analysis['mann_whitney'] = stats.mannwhitneyu(seq_checks, pre_checks) analysis['cohen_d'] = comp_cohens_d(seq_checks, pre_checks) effect_size = abs(analysis['cohen_d']) if effect_size > 0: analysis['power'] = smp.tt_ind_solve_power( effect_size=effect_size, nobs1=len(seq_checks), alpha=0.05, ratio=len(pre_checks)/len(seq_checks) ) else: analysis['power'] = 0 else: analysis['t_test'] = None analysis['mann_whitney'] = None analysis['cohen_d'] = 0 analysis['power'] = 0 analysis['risk_ratio'] = (seq_success/SIMULATIONS) / (pre_success/SIMULATIONS) if pre_success > 0 else 0 return analysis
def compare_methods(): results = { "sequential": {"wins": 0, "success": 0, "checks": [], "times": []}, "prefix": {"wins": 0, "success": 0, "checks": [], "times": []}, "ties": 0 } outcome_history = [] total_blocks = ceil(TOTAL_SIZE / RANGE_SIZE)
for _ in range(SIMULATIONS): max_offset = SECP256K1_ORDER - TOTAL_SIZE - 1 offset = random.randint(0, max_offset) dataset = [offset + i for i in range(TOTAL_SIZE)] target_num = random.choice(dataset) target_hash = generate_h160(target_num) block_order = shuffled_blck(total_blocks)
start = time.perf_counter() seq_res = sequential_search(dataset, RANGE_SIZE, target_hash, block_order) seq_time = time.perf_counter() - start
start = time.perf_counter() pre_res = prefix_search(dataset, RANGE_SIZE, PREFIX_LENGTH, target_hash, block_order) pre_time = time.perf_counter() - start
if seq_res["found"]: results["sequential"]["success"] += 1 results["sequential"]["checks"].append(seq_res["checks"]) results["sequential"]["times"].append(seq_time) if pre_res["found"]: results["prefix"]["success"] += 1 results["prefix"]["checks"].append(pre_res["checks"]) results["prefix"]["times"].append(pre_time) if seq_res["found"] and pre_res["found"]: if seq_res["checks"] < pre_res["checks"]: results["sequential"]["wins"] += 1 outcome_history.append("S") elif pre_res["checks"] < seq_res["checks"]: results["prefix"]["wins"] += 1 outcome_history.append("P") else: results["ties"] += 1 outcome_history.append("T") elif seq_res["found"]: results["sequential"]["wins"] += 1 outcome_history.append("S") elif pre_res["found"]: results["prefix"]["wins"] += 1 outcome_history.append("P") else: results["ties"] += 1 outcome_history.append("T")
def get_stats(data): if not data: return {"mean": 0, "min": 0, "max": 0, "median": 0, "stdev": 0} return { "mean": statistics.mean(data), "min": min(data), "max": max(data), "median": statistics.median(data), "stdev": statistics.stdev(data) if len(data) > 1 else 0 }
seq_stats = get_stats(results["sequential"]["checks"]) pre_stats = get_stats(results["prefix"]["checks"]) seq_time_stats = get_stats(results["sequential"]["times"]) pre_time_stats = get_stats(results["prefix"]["times"])
seq_success_rate = results["sequential"]["success"] / SIMULATIONS pre_success_rate = results["prefix"]["success"] / SIMULATIONS
total_comparisons = results["sequential"]["wins"] + results["prefix"]["wins"] + results["ties"] seq_win_rate = results["sequential"]["wins"] / total_comparisons if total_comparisons > 0 else 0 pre_win_rate = results["prefix"]["wins"] / total_comparisons if total_comparisons > 0 else 0
cv_seq = coeff_variation(results["sequential"]["checks"]) cv_pre = coeff_variation(results["prefix"]["checks"])
stats_analysis = statistical_analysis( seq_checks=results["sequential"]["checks"], pre_checks=results["prefix"]["checks"], seq_success=results["sequential"]["success"], pre_success=results["prefix"]["success"] )
print(f""" === FINAL ANALYSIS ===
[Success Rates] Sequential: {seq_success_rate:.1%} ({results['sequential']['success']}/{SIMULATIONS}) Prefix: {pre_success_rate:.1%} ({results['prefix']['success']}/{SIMULATIONS})
[Performance Metrics] | Sequential | Prefix ---------------+---------------------+-------------------- Checks (mean) | {seq_stats['mean']:>12,.1f} ± {seq_stats['stdev']:,.1f} | {pre_stats['mean']:>12,.1f} ± {pre_stats['stdev']:,.1f} Time (mean ms) | {seq_time_stats['mean']*1000:>12.2f} ± {seq_time_stats['stdev']*1000:.2f} | {pre_time_stats['mean']*1000:>12.2f} ± {pre_time_stats['stdev']*1000:.2f} Min checks | {seq_stats['min']:>12,} | {pre_stats['min']:>12,} Max checks | {seq_stats['max']:>12,} | {pre_stats['max']:>12,} Coef. Variation| {cv_seq:>11.1f}% | {cv_pre:>11.1f}%
[Comparison When Both Succeed] Sequential wins: {results['sequential']['wins']} ({seq_win_rate:.1%}) Prefix wins: {results['prefix']['wins']} ({pre_win_rate:.1%}) Ties: {results['ties']}
=== ADVANCED STATISTICS ===
[Confidence Intervals 95%] Checks Sequential: {seq_stats['mean']:.1f} ({stats_analysis['seq_ci'][0]:.1f} - {stats_analysis['seq_ci'][1]:.1f}) Checks Prefix: {pre_stats['mean']:.1f} ({stats_analysis['pre_ci'][0]:.1f} - {stats_analysis['pre_ci'][1]:.1f})
[Statistical Tests] Welch's t-test: {'t = %.3f, p = %.4f' % (stats_analysis['t_test'].statistic, stats_analysis['t_test'].pvalue) if stats_analysis['t_test'] else 'N/A'} Mann-Whitney U: {'U = %.1f, p = %.4f' % (stats_analysis['mann_whitney'].statistic, stats_analysis['mann_whitney'].pvalue) if stats_analysis['mann_whitney'] else 'N/A'} Effect Size (Cohen's d): {stats_analysis['cohen_d']:.3f}
[Power Analysis] Statistical Power: {stats_analysis['power']:.1%}
[Risk/Benefit Ratio] Success Ratio (Seq/Pre): {stats_analysis['risk_ratio']:.2f}:1 """)
non_tie_outcomes = [o for o in outcome_history if o != "T"] streak_analysis = f""" === STREAK ANALYSIS === Longest Sequential streak: {longest_streak(outcome_history, 'S')} Longest Prefix streak: {longest_streak(outcome_history, 'P')} Expected max streak: {math.log(len(non_tie_outcomes), 2):.1f} (for {len(non_tie_outcomes)} trials) """ print(streak_analysis)
max_wins = max(results["sequential"]["wins"], results["prefix"]["wins"], results["ties"]) print("=== WIN DISTRIBUTION ===") print(ascii_bar("Sequential", results["sequential"]["wins"], max_wins)) print(ascii_bar("Prefix", results["prefix"]["wins"], max_wins)) print(ascii_bar("Ties", results["ties"], max_wins))
if __name__ == '__main__': compare_methods()
result=== FINAL ANALYSIS ===
[Success Rates] Sequential: 100.0% (10000/10000) Prefix: 100.0% (10000/10000)
[Performance Metrics] | Sequential | Prefix ---------------+---------------------+-------------------- Checks (mean) | 49,712.4 ± 28,876.0 | 49,644.4 ± 28,930.7 Time (mean ms) | 124.20 ± 72.81 | 127.68 ± 74.95 Min checks | 10 | 10 Max checks | 99,993 | 99,986 Coef. Variation| 58.1% | 58.3%
[Comparison When Both Succeed] Sequential wins: 2180 (21.8%) Prefix wins: 5939 (59.4%) Ties: 1881
=== ADVANCED STATISTICS ===
[Confidence Intervals 95%] Checks Sequential: 49712.4 (49146.4 - 50278.5) Checks Prefix: 49644.4 (49077.3 - 50211.5)
[Statistical Tests] Welch's t-test: t = 0.166, p = 0.8679 Mann-Whitney U: U = 50070425.5, p = 0.8630 Effect Size (Cohen's d): 0.002
[Power Analysis] Statistical Power: 5.3%
[Risk/Benefit Ratio] Success Ratio (Seq/Pre): 1.00:1
=== STREAK ANALYSIS === Longest Sequential streak: 6 Longest Prefix streak: 15 Expected max streak: 13.0 (for 8119 trials)
=== WIN DISTRIBUTION === Sequential : ################## (2180) Prefix : ################################################## (5939) Ties : ############### (1881)    Let's start with Case 2:This is the case that, in my opinion, most puzzle seekers would resort to, or should, since they're just trying their luck and aren't searching for the target with the goal of finding it no matter what. They simply want to perform a partial search that offers them the best chances. At this point, we're 100% sure that the prefix method is undoubtedly the best option. The following comparison is based on the number of times both methods found the target. The entire range isn't considered because it's a direct comparison of probabilistic efficiency. For the prefix method, at this point, you don't need to store ranges since the goal isn't to perform a complete search, but rather to give random chances a chance, with obvious statistical improvements. codeimport hashlib import random import time import math import statistics import scipy.stats as stats import statsmodels.stats.power as smp from math import ceil
# Configuration TOTAL_SIZE = 100_000 RANGE_SIZE = 4_096 PREFIX_LENGTH = 3 SIMULATIONS = 10000
SECP256K1_ORDER = int("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141", 16)
print(f""" === Configuration === Total numbers: {TOTAL_SIZE:,} Block size: {RANGE_SIZE:,} Total blocks needed: {ceil(TOTAL_SIZE/RANGE_SIZE)} Prefix: {PREFIX_LENGTH} characters (16^{PREFIX_LENGTH} = {16**PREFIX_LENGTH:,} combinations) Simulations: {SIMULATIONS} secp256k1 order: {SECP256K1_ORDER} """)
def generate_h160(data): h = hashlib.new('ripemd160', str(data).encode('utf-8')) return h.hexdigest()
def shuffled_blck(total_blocks): blocks = list(range(total_blocks)) random.shuffle(blocks) return blocks
def sequential_search(dataset, block_size, target_hash, block_order): checks = 0 for block_idx in block_order: start = block_idx * block_size end = min(start + block_size, len(dataset)) for i in range(start, end): checks += 1 if generate_h160(dataset[i]) == target_hash: return {"checks": checks, "found": True, "index": i} return {"checks": checks, "found": False}
def prefix_search(dataset, block_size, prefix_len, target_hash, block_order): prefix = target_hash[:prefix_len] checks = 0 for block_idx in block_order: start = block_idx * block_size end = min(start + block_size, len(dataset)) found_prefix = False for i in range(start, end): checks += 1 current_hash = generate_h160(dataset[i]) if current_hash == target_hash: return {"checks": checks, "found": True, "index": i} if not found_prefix and current_hash.startswith(prefix): found_prefix = True break return {"checks": checks, "found": False}
def comp_cohens_d(list1, list2): if len(list1) < 2 or len(list2) < 2: return float('nan') n1, n2 = len(list1), len(list2) m1, m2 = statistics.mean(list1), statistics.mean(list2) s1, s2 = statistics.stdev(list1), statistics.stdev(list2) pooled_std = math.sqrt(((n1-1)*s1**2 + (n2-1)*s2**2) / (n1+n2-2)) if pooled_std == 0: return float('nan') return (m1 - m2) / pooled_std
def coeff_variation(data): if not data or statistics.mean(data) == 0: return float('nan') return (statistics.stdev(data) / statistics.mean(data)) * 100
def longest_streak(outcomes, letter): max_streak = current = 0 for o in outcomes: current = current + 1 if o == letter else 0 max_streak = max(max_streak, current) return max_streak
def ascii_bar(label, value, max_value, bar_length=50): bar_count = int((value / max_value) * bar_length) if max_value > 0 else 0 return f"{label:12}: {'#' * bar_count} ({value})"
def conf_interval(data, confidence=0.95): if len(data) < 2: return (0, 0) try: return stats.t.interval( confidence=confidence, df=len(data)-1, loc=statistics.mean(data), scale=stats.sem(data) ) except: return (statistics.mean(data), statistics.mean(data)) def statistical_analysis(seq_checks, pre_checks): analysis = {} analysis['seq_mean'] = statistics.mean(seq_checks) if seq_checks else 0 analysis['pre_mean'] = statistics.mean(pre_checks) if pre_checks else 0 analysis['seq_ci'] = conf_interval(seq_checks) analysis['pre_ci'] = conf_interval(pre_checks) if len(seq_checks) > 1 and len(pre_checks) > 1: analysis['t_test'] = stats.ttest_ind(seq_checks, pre_checks, equal_var=False) analysis['mann_whitney'] = stats.mannwhitneyu(seq_checks, pre_checks) analysis['cohen_d'] = comp_cohens_d(seq_checks, pre_checks) effect_size = abs(analysis['cohen_d']) if effect_size > 0: analysis['power'] = smp.tt_ind_solve_power( effect_size=effect_size, nobs1=len(seq_checks), alpha=0.05, ratio=len(pre_checks)/len(seq_checks) ) else: analysis['power'] = 0 else: analysis['t_test'] = None analysis['mann_whitney'] = None analysis['cohen_d'] = 0 analysis['power'] = 0 return analysis
def compare_methods(): results = { "sequential": {"wins": 0, "checks": [], "times": []}, "prefix": {"wins": 0, "checks": [], "times": []}, "ties": 0, "both_failed": 0 } outcome_history = [] total_blocks = ceil(TOTAL_SIZE / RANGE_SIZE) valid_cases = 0
for _ in range(SIMULATIONS): max_offset = SECP256K1_ORDER - TOTAL_SIZE - 1 offset = random.randint(0, max_offset) dataset = [offset + i for i in range(TOTAL_SIZE)] target_num = random.choice(dataset) target_hash = generate_h160(target_num) block_order = shuffled_blck(total_blocks)
start = time.perf_counter() seq_res = sequential_search(dataset, RANGE_SIZE, target_hash, block_order) seq_time = time.perf_counter() - start
start = time.perf_counter() pre_res = prefix_search(dataset, RANGE_SIZE, PREFIX_LENGTH, target_hash, block_order) pre_time = time.perf_counter() - start
if seq_res["found"] and pre_res["found"]: valid_cases += 1 results["sequential"]["checks"].append(seq_res["checks"]) results["prefix"]["checks"].append(pre_res["checks"]) results["sequential"]["times"].append(seq_time) results["prefix"]["times"].append(pre_time) if seq_res["checks"] < pre_res["checks"]: results["sequential"]["wins"] += 1 outcome_history.append("S") elif pre_res["checks"] < seq_res["checks"]: results["prefix"]["wins"] += 1 outcome_history.append("P") else: results["ties"] += 1 outcome_history.append("T") elif not seq_res["found"] and not pre_res["found"]: results["both_failed"] += 1 else: continue
def get_stats(data): if not data: return {"mean": 0, "min": 0, "max": 0, "median": 0, "stdev": 0} return { "mean": statistics.mean(data), "min": min(data), "max": max(data), "median": statistics.median(data), "stdev": statistics.stdev(data) if len(data) > 1 else 0 }
seq_stats = get_stats(results["sequential"]["checks"]) pre_stats = get_stats(results["prefix"]["checks"]) seq_time_stats = get_stats(results["sequential"]["times"]) pre_time_stats = get_stats(results["prefix"]["times"])
total_comparisons = results["sequential"]["wins"] + results["prefix"]["wins"] + results["ties"] seq_win_rate = results["sequential"]["wins"] / total_comparisons if total_comparisons > 0 else 0 pre_win_rate = results["prefix"]["wins"] / total_comparisons if total_comparisons > 0 else 0
cv_seq = coeff_variation(results["sequential"]["checks"]) cv_pre = coeff_variation(results["prefix"]["checks"])
stats_analysis = statistical_analysis( seq_checks=results["sequential"]["checks"], pre_checks=results["prefix"]["checks"] )
print(f""" === FINAL ANALYSIS === Valid cases (both found target): {valid_cases}/{SIMULATIONS}
[Performance Metrics] | Sequential | Prefix ---------------+---------------------+-------------------- Checks (mean) | {seq_stats['mean']:>12,.1f} ± {seq_stats['stdev']:,.1f} | {pre_stats['mean']:>12,.1f} ± {pre_stats['stdev']:,.1f} Time (mean ms) | {seq_time_stats['mean']*1000:>12.2f} ± {seq_time_stats['stdev']*1000:.2f} | {pre_time_stats['mean']*1000:>12.2f} ± {pre_time_stats['stdev']*1000:.2f} Min checks | {seq_stats['min']:>12,} | {pre_stats['min']:>12,} Max checks | {seq_stats['max']:>12,} | {pre_stats['max']:>12,} Coef. Variation| {cv_seq:>11.1f}% | {cv_pre:>11.1f}%
[Comparison Results] Sequential wins: {results['sequential']['wins']} ({seq_win_rate:.1%}) Prefix wins: {results['prefix']['wins']} ({pre_win_rate:.1%}) Ties: {results['ties']} Both failed: {results['both_failed']}
=== STATISTICAL ANALYSIS ===
[Confidence Intervals] Checks Sequential: {seq_stats['mean']:.1f} ({stats_analysis['seq_ci'][0]:.1f} - {stats_analysis['seq_ci'][1]:.1f}) Checks Prefix: {pre_stats['mean']:.1f} ({stats_analysis['pre_ci'][0]:.1f} - {stats_analysis['pre_ci'][1]:.1f})
[Statistical Tests] Welch's t-test: {'t = %.3f, p = %.4f' % (stats_analysis['t_test'].statistic, stats_analysis['t_test'].pvalue) if stats_analysis['t_test'] else 'N/A'} Mann-Whitney U: {'U = %.1f, p = %.4f' % (stats_analysis['mann_whitney'].statistic, stats_analysis['mann_whitney'].pvalue) if stats_analysis['mann_whitney'] else 'N/A'} Effect Size (Cohen's d): {stats_analysis['cohen_d']:.3f}
[Power Analysis] Statistical Power: {stats_analysis['power']:.1%} """)
if outcome_history: non_tie_outcomes = [o for o in outcome_history if o != "T"] streak_analysis = f""" === STREAK ANALYSIS === Longest Sequential streak: {longest_streak(outcome_history, 'S')} Longest Prefix streak: {longest_streak(outcome_history, 'P')} Expected max streak: {math.log(len(non_tie_outcomes), 2):.1f} (for {len(non_tie_outcomes)} trials) """ print(streak_analysis)
max_wins = max(results["sequential"]["wins"], results["prefix"]["wins"], results["ties"]) print("=== WIN DISTRIBUTION ===") print(ascii_bar("Sequential", results["sequential"]["wins"], max_wins)) print(ascii_bar("Prefix", results["prefix"]["wins"], max_wins)) print(ascii_bar("Ties", results["ties"], max_wins))
if __name__ == '__main__': compare_methods()
result...continued in the first comment
|