[RC5] Statistics of Key Distribution

Joe Zbiciak j-zbiciak1 at ti.com
Tue Mar 31 21:44:32 EST 1998


| Yes, but if we change where we start, then the middle moves, too.

Irrelevant.

| Seriously, the original poster is confusing two different levels.  If we
| solved a whole bunch of these contests and graphed where in the keyspace
| the "winning" key was, then it would form a bell curve--statistically, the
| key tends to be found about halfway through.

Not true.  The time to find the key is as evenly distributed as the 
likely position of the key relative to the search order we're using.

Period.

The key selection is a uniformally-distributed random function with
respect to our search order (or any search order, for that matter).
Therefore the distribution of times-to-completion is uniformally 
distributed as well.

If you'd like empirical evidence, then run the C program included below.
It simulates a key search for a random key, with a random search order.
It will also simulate a key search for a random key with a linear search
order, if you "#define LINEAR_SEARCH" at the top.  I wrote this program
to convince myself.

This program isn't the fastest it could be, but it does a brute force
simulation of how we're searching for a key.  It runs a large number 
of trials and makes a histogram of the observed times to completion.
It takes this data and prints a histograph of the results.

If you set the "NTRIALS" much larger than the size of the keyspace,
you'll see that the distribution of observed times-to-completion is
nice and flat, not bell-shaped.  I ran this with 1M trials and the 
distribution was *very* flat.

Enjoy,

--Joe

/* Random key search simulation, by Joseph Zbiciak, 3-31-1998 */

/*
 * This code is provided without any waranty of fitness for any 
 * purpose.  Caveat emptor.  Your mileage may vary.  Void where 
 * prohibited or taxed by law.  Not part of this nutritious breakfast.
 */

#include <stdio.h>
#include <time.h>

#define KEYSPACE (256)          /* Size of "keyspace" for simulation    */
#define NTRIALS  (1048576)      /* Number of trials to run              */

/*#define LINEAR_SEARCH*/       /* De-randomize the search if desired   */

static unsigned rand_buf__[128];
unsigned bound_rand(unsigned bound)
{
    /* 
     *  This random number generator comes from Knuth vol 2., 3rd Ed, p27-29.
     *  The algorithm should produce a sequence whose most significant bits
     *  have a period of 2^31 * (2^127 - 1) and whose least significant 
     *  bits have a period of 2^127 - 1.  Not bad, IMHO.
     *
     *  The initialization sequence is my own invention.
     */

    static int ptr = 0;
    double r, b;

    ptr &= 127;
    rand_buf__[ptr] = rand_buf__[(ptr-30)&127] + rand_buf__[(ptr-127)&127];

    b = (double) bound;
    r = (b * (double)rand_buf__[ptr++])/(double)(~0U);

    return (int)r;  /* truncate 'r' */
}

void init_bound_rand(void)
{
    int i, j;
    time_t t;

    t = time(NULL);
    srand(t);

    for (i=0; i<127; i++)
    {
        j = rand();
        rand_buf__[i] = (int) t * rand() + i + j;
    }
}



void randomize_search(unsigned search_order[], int keys)
{
    /*
     * Fill search_order[keys] with a random search order.  Values range
     * from 0 .. keys-1, with no value duplicated.
     */

    int i, k;
    unsigned t;

    /* First initialize array to have each key mentioned once. */
    for (i=0; i<keys; i++)
        search_order[i] = (unsigned)i;

#ifndef LINEAR_SEARCH
    /* 
     * Next, randomize the order by randomly pulling keys to the back.
     */
    for (i=keys-1; i>0; i--)
    {
        k = bound_rand(i);

        /* Swap k'th and i'th elements */
        t               = search_order[k];
        search_order[k] = search_order[i];
        search_order[i] = t;
    }
#endif
}



int run_trial(void)
{
    /* 
     * Run a trial.  This picks a random key in the keyspace then 
     * searches the keyspace in a random order.  The observed time
     * to complete the search is returned to the caller.
     */
    int i;
    unsigned search_order[KEYSPACE];
    unsigned key;

    key = bound_rand(KEYSPACE);
    randomize_search(search_order, KEYSPACE);

    for (i = 0; i < KEYSPACE; i++)
    {
        /* Did we find the key yet? */
        if (search_order[i] == key)  
            /* Yes, after 'i' units of time. */
            return i;   
    }

    /* If we did not find the key, then there is a bug in the program. */
    fprintf(stderr,"Error:  run_trial() failed\n");

    for (i = 0; i < KEYSPACE; i++)
        printf("%3d: %3d\n",i,search_order[i]);

    exit(1);
}

/* 
 * Time to completion histogram.
 *
 * This histogram records the observed times to completion for the set of
 * trials we are running. 
 */

unsigned ttc_histogram[KEYSPACE];

void clear_histogram(void)
{
    int i;

    for (i=0; i<KEYSPACE; i++)
        ttc_histogram[i] = 0;
}

void print_histogram(void)
{
    /*
     * Prints a bar chart representation of the histogram.  Since the
     * the resulting histograph may have a large number of buckets,
     * the graph is printed with the bars going horizontally, with each
     * bucket on its own line.
     * 
     * The decimal count of entries in each bucket is also printed on each
     * line.
     */
    int i,j,l,t;
    int max=1;


    /* 
     * Find the largest bucket, and use it to scale the other buckets. 
     * "max" is at least 1 to prevent divide-by-zero errors.
     */
    for (i=0; i<KEYSPACE; i++)
        if (ttc_histogram[i]>max)
            max = ttc_histogram[i];

    /* 
     * Print out the histograph.  Place a vertical bar in the column which
     * corresponds to the "average" value.
     */

    t = (64 * NTRIALS)/(max*KEYSPACE);
    for (i=0; i<KEYSPACE; i++)
    {
        printf("%10d : ", ttc_histogram[i]);

        l = (64 * ttc_histogram[i] + max/2) / max;
        for (j=0; j<l || j<=t; j++)
            putchar(j==t?'|':j<l?'#':' ');

        putchar('\n');
    }
}


/*
 * In the beginning, there was a main(), 
 * and the C was without enum and void... 
 */

main()
{
    int trial, ttc, pct, pct_upd;

    clear_histogram();
    init_bound_rand();

    for (trial = 0, pct_upd = NTRIALS; trial < NTRIALS; trial++)
    {
        if (++pct_upd >= (NTRIALS/100))
        {
            fprintf(stderr,"\b\b\b\b%3d%%",100*trial/NTRIALS);
            pct_upd = 0;
        }
        ttc_histogram[run_trial()]++;
    }
    fprintf(stderr,"\b\b\b\b    \b\b\b\b");

    print_histogram();

    return 0;
}   

/*
-- 
 +----------- Joseph Zbiciak ----------+
 | - - - -  j-zbiciak1 at ti.com  - - - - |      Don't answer, don't ask,
 |- http://www.primenet.com/~im14u2c/ -|      Don't try to make sense.
 | - - -Texas Instruments, Dallas- - - |          -- "Numb", U2
 +-----#include "std_disclaimer.h"-----+
 */
--
To unsubscribe, send 'unsubscribe rc5' to majordomo at lists.distributed.net
rc5-digest subscribers replace rc5 with rc5-digest



More information about the rc5 mailing list