Source code for fancytools.math.linRegressUsingMasked2dArrays

import numpy as np
from scipy.stats import linregress



[docs]def linRegressUsingMasked2dArrays(xVals, arrays, badMask):
    '''
    if you have multiple 2d arrays each with position given by 
    xVals[array-index]
    and you want to do a linear regression on all cells
    but you also might mask different areas in each 2darray
    
    returns ascent, offset, RMS-error
    '''
    
    assert arrays.ndim == 3, 'need multiple 2d arrays'
    assert arrays.shape == badMask.shape, 'mask needs to have same shape'
    
    s = arrays.shape
    #flatten to create array of 1d arrays:
    y = arrays.reshape(s[0],s[1]*s[2]).astype(float)
    s = s[1:]
    #solve linear regression:
    A = np.vstack([xVals, np.ones(len(xVals))]).T
    solution = np.linalg.lstsq(A, y)[0]

    offset = solution[1].reshape(s)
    ascent = solution[0].reshape(s) 

    #do linear regression for all masked areas
    #trying to find multiple 2d arrays that are not masked together
    x,y = np.where(np.sum(badMask, axis=0))
    for xi,yi in zip(x,y):
        valid = ~badMask[:,xi,yi]
        asc, offs, _, _, _ = linregress(x[valid],arrays[valid,xi,yi])
        ascent[xi,yi]=asc
        offset[xi,yi]=offs

    #calculate RMSE of the regression:
    error = np.empty(shape=arrays.shape)
    for n, (y,x) in enumerate(zip(arrays,xVals)):
        error[n] = (y - (x*ascent + offset))**2    
    error = error.mean(axis=0)**0.5
    
    return ascent, offset, error

#TODO: test case