View Raw SPL
/*****************************************************************************
*                                                                            *
*   PEARSON.SPL   Copyright (C) 1998 DSP Development Corporation             *
*                               All Rights Reserved                          *
*                                                                            *
*   Author:      Randy Race                                                  *
*                                                                            *
*   Synopsis:    Calculates Pearson's Correlation Coefficient                *
*                                                                            *
*   Revisions:    1 Dec 1998  RRR  Creation - from PEARSON.MAC               *
*                                                                            *
*****************************************************************************/

#if @HELP_PEARSON

    PEARSON

    Purpose: Calculates Pearson's Linear Correlation Coefficient

    Syntax:  PEARSON(x, y)

             x - input series
             y - input series

    Returns: A number, the correlation coefficient

    Example:
             W1: Gsin(100, .01, 4)
             W2: Gsin(100, .01, 4, pi/3)

             Pearson(w1, w2)
             Returns: 0.5

             Pearson(w1, w1)
             Returns: 1.0

             Pearson(w1, w1/2)
             Returns: 1.0

             Pearson(w1, -w1)
             Returns -1.0

   Remarks:
             Pearson returns the degree of linear correlation between
             the two input series. The result ranges from -1 to 1.

             Pearson requires X and Y have the same number of points.

    See Also:
             Autocor
             Crosscor
#endif


/* Pearson's Linear Correlation Coefficient */
pearson(x, y)
{
        local nrx, nry, ncx, ncy, r, p, rho, pval, j;

        if (argc < 2)
        {
                if (argc < 1) error(sprintf("%s - input array required", __FUNC__));

                y = refseries(x);
        }

        (nrx, ncx) = size(x);
        (nry, ncy) = size(y);

        if (nrx != nry)
        {
                error(sprintf("%s - input lengths must match", __FUNC__));
        }

        /* pearson coefficients */
        rho  = zeros(ncx, ncy);

        loop (j = 1..ncx)
        {
                /* auto iterates through y columns */
                rho[j, ..] = pearson_iter(col(x, j), y);
        }

        /* pvals */
        pval = zeros(ncx, ncy);

        /* loop through all columns */
        if (outargc > 1)
        {
                pval = pearson_pval(length(x), rho);
        }

        if (ncx == 1)
        {
                /* scalar result */
                rho  = rho[1];
                pval = pval[1];
        }

        if (outargc > 1)
        {
                /* return tau and p value */
                return(rho, pval);
        }
        else
        {
                return(rho);
        }
}


/* column interation pearson computation */
ITERATE pearson_iter(x, y)
{
        local xm, ym, r, N, pval;

        if (any(isnan(x) || isnan(y)))
        {
                r = nan;
        }
        else
        {
                /* remove mean values */
                xm = x - mean(x);
                ym = y - mean(y);

                /* calculate coefficient */
                r = real(sum(xm * ym) / sqrt(sum(xm * xm) * sum(ym * ym)));
        }

        return(r);
}


/* pearson statistic */
pearson_pval(N, r)
{
        local df, rsqr, tsqr, pval;

        /* dof */
        df = N - 2;

        /* significance */
        rsqr = r * r;
        tsqr = rsqr * (df / (1 - rsqr));

        pval = betainc(df / (df + tsqr), 0.5 * df, 0.5);

        if (numcols(r) > 1)
        {
                pval += eye(pval);
        }

        /* handle NaN */
        pval[find(isnan(pval))] = 0;

        return(pval);
}