Jump to content

Algorithm Implementation/Pseudorandom Numbers/Chi-Square Test

From Wikibooks, open books for an open world

VB.NET

[edit | edit source]

The following is a program to calculate the chi-square value for N positive integers less than r.

    'Calculates the chi-square value for N positive integers less than r
    'Source: "Algorithms in C" - Robert Sedgewick - pp. 517
    'NB: Sedgewick recommends: "...to be sure, the test should be tried a few times,
    'since it could be wrong in about one out of ten times."
    Public Function IsChiRandom(ByVal randomNums() As Integer, ByVal r As Integer) As Boolean

        'Calculate the number of samples - N
        Dim N As Integer = randomNums.Length

        'According to Sedgewick: "This is valid if N is greater than about 10r"
        If N <= (10 * r) Then
            Return False
        End If

        Dim N_r As Double = N / r
        Dim HT As Hashtable
        Dim chi_square As Double = 0

        'PART A: Get frequency of randoms
        HT = Me.RandomFrequency(randomNums)

        'PART B: Calculate chi-square - this approach is in Sedgewick
        Dim f As Integer
        For Each Item As DictionaryEntry In HT
            f = Integer.Parse(Item.Value) - N_r
            chi_square += Math.Pow(f, 2)
        Next
        chi_square = chi_square / N_r

        'PART C: According to Sedgewick: "The statistic should be within 2(r)^1/2 of r
        'This is valid if N is greater than about 10r"

        If (r - 2 * Math.Sqrt(r) <= chi_square) And (r + 2 * Math.Sqrt(r) >= chi_square) Then
            Return True
        Else
            Return False
        End If

    End Function

    'Gets the frequency of occurrence of a randomly generated array of integers
    'Output: A hashtable, key being the random number and value its frequency
    Private Function RandomFrequency(ByVal randomNums() As Integer) As Hashtable

        Dim HT As New Hashtable
        Dim N As Integer = randomNums.Length

        Dim count As Integer = 0
        For i As Integer = 0 To N - 1
            If HT.ContainsKey(randomNums(i)) Then
                HT.Item(randomNums(i)) = HT.Item(randomNums(i)) + 1
            Else
                HT.Add(randomNums(i), 1)
            End If
        Next

        Return HT

    End Function
	// Calculates the chi-square value for N positive integers less than r
	// Source: "Algorithms in C" - Robert Sedgewick - pp. 517
	// NB: Sedgewick recommends: "...to be sure, the test should be tried a few times,
	// since it could be wrong in about one out of ten times."
	public bool IsRandom(int[] randomNums, int r)
	{
		//Calculate the number of samples - N
		int N = randomNums.Length;

		//According to Sedgewick: "This is valid if N is greater than about 10r"
		if (N <= 10*r)
			return false;

		double N_r = N / r;
		double chi_square = 0;
		Hashtable HT;

		//PART A: Get frequency of randoms
		HT = this.RandomFrequency (randomNums);

		//PART B: Calculate chi-square - this approach is in Sedgewick
		double f;
		foreach (DictionaryEntry Item in HT)
		{
			f = (int)(Item.Value) - N_r;
			chi_square += Math.Pow(f, 2);
		}
		chi_square = chi_square / N_r;

		//PART C: According to Swdgewick: "The statistic should be within 2(r)^1/2 of r
		//This is valid if N is greater than about 10r"
		if((r - 2 * Math.Sqrt (r) <= chi_square) &  (r + 2 * Math.Sqrt (r) >= chi_square))
			return true;
		else
			return false;
	}

	//Gets the frequency of occurrence of a randomly generated array of integers
	//Output: A hashtable, key being the random number and value its frequency
	private Hashtable RandomFrequency(int[] randomNums)
	{
		Hashtable HT = new Hashtable();
		int N = randomNums.Length;

		for(int i = 0; i <= N-1; i++)
		{
			if (HT.ContainsKey(randomNums[i]))
				HT[randomNums[i]] = (int) HT[randomNums[i]] + 1;
			else
				HT[randomNums[i]] = 1;
		}
	
		return HT;
	}
	/**
	 * Calculates the chi-square value for N positive integers less than r
	 * Source: "Algorithms in C" - Robert Sedgewick - pp. 517
	 * NB: Sedgewick recommends: "...to be sure, the test should be tried a few times,
	 * since it could be wrong in about one out of ten times."
	 * @param  randomNums  a uniformly-randomly-generated array of integers
	 * @param  r           upper bound for the random range
	 * @return             whether it is likely to be uniformly randomly generated
	 */
	public static boolean isRandom(int[] randomNums, int r)
	{
		//According to Sedgewick: "This is valid if N is greater than about 10r"
		if (randomNums.length <= 10 * r)
			return false;

		//PART A: Get frequency of randoms
		Map<Integer,Integer> ht = getFrequencies(randomNums);

		//PART B: Calculate chi-square - this approach is in Sedgewick
		double n_r = (double)randomNums.length / r;
		double chiSquare = 0;

		for (int v : ht.values())
		{
			double f = v - n_r;
			chiSquare += f * f;
		}
		chiSquare /= n_r;

		//PART C: According to Swdgewick: "The statistic should be within 2(r)^1/2 of r
		//This is valid if N is greater than about 10r"
		return Math.abs(chiSquare - r) <= 2 * Math.sqrt(r);
	}

	/**
	 * @param  nums  an array of integers
	 * @return       a Map, key being the number and value its frequency
	 */
	private static Map<Integer,Integer> getFrequencies(int[] nums)
	{
		Map<Integer,Integer> freqs = new HashMap<Integer,Integer>();

		for (int x : nums)
		{
			if (freqs.containsKey(x))
				freqs.put(x, freqs.get(x) + 1);
			else
				freqs.put(x, 1);
		}
	
		return freqs;
	}

Python

[edit | edit source]
from math import sqrt
from collections import Counter

def is_random(random_nums, r: int):
    """Calculates the chi-square value for n positive integers less than r

    Arguments:
    - random_nums: list of uniformly-randomly generated integers
    - r: int, upper bound for the random range

    Source: "Algorithms in C" - Robert Sedgewick - pp. 517

    NB: Sedgewick recommends:

        ...to be sure, the test should be tried a few times, since it could be
        wrong in about one out of ten times.

    Example:

        >>> import os
        >>> r = 256
        >>> sample = os.urandom(r * 11)
        >>> is_random(sample, r)
        True

    """
    # Calculate the number of samples - n
    n = len(random_nums)

    # According to Sedgewick:
    # This is valid if n is greater than about 10r
    if n <= 10 * r:
        return False

    n_r = n / r

    # PART A: Get frequency of randoms
    ht = Counter(random_nums)

    # PART B: Calculate chi-square - this approach is in Sedgewick
    chi_square = sum((v - n_r)**2 for v in ht.values()) / n_r

    # PART C: According to Sedgewick:
    # The statistic should be within 2(r)^1/2 of r
    # This is valid if N is greater than about 10r
    return abs(chi_square - r) <= 2 * sqrt(r)