View Raw SPL
/*****************************************************************************
*                                                                            *
*   RANDSAMPLE.SPL Copyright (C) 2024 DSP Development Corporation            *
*                               All Rights Reserved                          *
*                                                                            *
*   Author:      Randy Race                                                  *
*                                                                            *
*   Synopsis:    Select K samples from a population N                        *
*                                                                            *
*   Revisions:    1 Mar 2024  RRR  Creation                                  *
*                                                                            *
*****************************************************************************/

#if @HELP_RANDSAMPLE

    RANDSAMPLE

    Purpose: Randomly select K samples from a population N.

    Syntax:  RANDSAMPLE(n, k, replace)

                    n - An integer or array, the input population. If N is
                        an integer, the population is the series 1..N.

                    k - Optional. An integer, the number of samples to
                        randomly select from population N. Defaults to
                        length(N).

              replace - Optional. An integer, the replacement flag.

                        0: do not replace values from the population (default)
                        1: replace values from the population


    Returns: A series or array, the randomly selected samples.

    Example:
             W1: randsample(1..5)
             W2: randsample(5)

             W1 randomly resamples the series {1, 2, 3, 4, 5}.

             W2 performs the same. The results are not identical because
             different samples are randomly selected.

    Example:
             W3: seedrand(100);randsample(1..5)
             W4: seedrand(100);randsample(5)

             Same as above except the ramdom number generator is set to
             the same seed value in both random selections, resulting in
             W3 == W4.

    Example:
             W1: 11..20
             W2: randsample(w1, 8, 0)
             W3: randsample(w1, 8, 1)

             W1 contains the series 11..20.

             W2 randomly selects 8 values from W1 without replacement. The
             8 resulting values are unique and fall in the range between
             11 and 20.

             W3 randomly selects 8 values from W1 with replacement. The
             8 resulting values fall in the range between 11 and 20 and
             some of the values may be repeated.

    Example:
             W1: gnorm(10, 1);
             W2: randsample(w1, 8, 0)
             W3: randsample(w1, 8, 1)

             Same as above except the source array in W1 consists of 10
             normally distributed random values.

             W2 contains always contains unique values whereas W3 could
             contain repeated values.

    Remarks:
             RANDSAMPLE randomly selects K samples from a population N.

             The population is set to 1..N if N is a scalar.

             REPLACE must be 1 if K, the number of selections is greater
             than the population size specified by N.

    See Also:
             Colrandomize
             Randomize
             Randperm
             Rowrandomize
#endif


/* select K samples from a population N */
ITERATE randsample(n = {}, k = {}, replace = 0)
{
        local p, r;

        if ((argc < 1) || (not(isscalar(n)) && not(isarray(n))))
        {
                error(sprintf("%s - input series or integer and integer number of elements required", __FUNC__));
        }

        if (isempty(n))
        {
                return([]);
        }

        /* size */
        p = isarray(n) ? length(n) : n;

        /* number to select */
        k = isempty(k) ? p : k;

        if (not(isscalar(k)))
        {
                error(sprintf("%s - number of samples to select, k, must be an integer", __FUNC__));
        }

        if (replace)
        {
                /* k randomized indices from 1 to p */
                r = int(grand(k, 1, 1, p + 1));

                if (isarray(n))
                {
                        /* select from input array */
                        r = n[r];
                }
        }
        else
        {
                if (k > p)
                {
                        error(sprintf("%s - REPLACE must be True to choose %ld samples from a population of size %ld", __FUNC__, k, p));
                }

                r = randperm(p, k);

                if (isarray(n))
                {
                        /* select from input array */
                        r = n[r];
                }
        }

        return(r);
}