Source code for fancytools.fit.linregressIgnoringOutliers

from scipy.stats import linregress


[docs]def linregressIgnoringOutliers(x, y, n_iter=3, nstd=2): ''' do linear regression [n_iter] times successive removing [outliers] return result of normal linregress ''' for _ in range(n_iter): m, n = linregress(x, y)[:2] y_fit = x*m+n dy = y-y_fit std = (dy**2).mean()**0.5 inliers = abs(dy) < nstd*std if inliers.sum() > 2: x = x[inliers] y = y[inliers] else: break return linregress(x, y)
if __name__ == '__main__': import numpy as np import pylab as plt import sys x = np.arange(100) y = np.linspace(0, 10, 100) # add noise: y += np.random.rand(100)*3 # add some outliers: pos = np.random.randint(0, 10, 100) > 4 vals = np.random.rand(pos.sum())*20 y[pos] = vals # ACTION m, n = linregress(x, y)[:2] y_fit1 = m*x+n m, n = linregressIgnoringOutliers(x, y)[:2] y_fit2 = m*x+n if 'no_window' not in sys.argv: # PLOT plt.plot(x, y, label='values') plt.plot(x, y_fit1, label='normal fit') plt.plot(x, y_fit2, label='fit ignoring outliers') plt.legend() plt.show()