預測數值型資料:迴歸
一,標準迴歸函式
from numpy import *
def loadDataSet(fileName): #general function to parse tab -delimited floats
numFeat = len(open(fileName).readline().split('\t')) - 1 #get number of fields
dataMat = []; labelMat = []
fr = open(fileName)
for line in fr.readlines():
lineArr =[]
curLine = line.strip().split('\t')
for i in range(numFeat):
lineArr.append(float(curLine[i]))
dataMat.append(lineArr)
labelMat.append(float(curLine[-1]))
return dataMat,labelMat
def standRegres(xArr, yArr):
xMat = mat(xArr); yMat = mat(yArr).T
xTx = xMat.T*xMat
if linalg.det(xTx) == 0.0:
print "This matrix is singular, cannot do inverse"
return
ws = xTx.I * (xMat.T*yMat)
return ws
>>> import regression
>>> from numpy import *
>>> xArr,yArr=regression.loadDataSet('ex0.txt')
>>> xArr[0:2]
[[1.0, 0.067732], [1.0, 0.42781]]
>>> ws = regression.standRegres(xArr,yArr)
>>> ws
matrix([[ 3.00774324],
[ 1.69532264]])
>>> xMat=mat(xArr)
>>> yMat=mat(yArr)
>>> import matplotlib.pyplot as plt
>>> fig = plt.figure()
>>> ax = fig.add_subplot(111)
>>> ax.scatter(xMat[:,1].flatten().A[0],yMat.T[:,0].flatten().A[0])
<matplotlib.collections.PathCollection object at 0x7f4905380fd0>
>>> xCopy=xMat.copy()
>>> xCopy.sort(0)
>>> yHat=xCopy*ws
>>> ax.plot(xCopy[:,1],yHat)
[<matplotlib.lines.Line2D object at 0x7f4905391950>]
>>> plt.show()
二,區域性加權線性迴歸
線性迴歸的一個問題是有可能出現欠擬合現象,因為它求的是具有最小均方誤差的無偏估計.所以有些方法允許在估計中引入一些偏差,從而降低預測的均方誤差
def lwlr(testPoint,xArr,yArr,k=1.0):
xMat = mat(xArr); yMat = mat(yArr).T
m = shape(xMat)[0]
weights = mat(eye((m)))
for j in range(m):
diffMat = testPoint - xMat[j,:]
weights[j,j] = exp(diffMat*diffMat.T/(-2.0*k**2))
xTx = xMat.T * (weights * xMat)
if linalg.det(xTx) == 0.0:
print "This matrix is singular, cannot do inverse"
return
ws = xTx.I * (xMat.T * (weights*yMat))
return testPoint*ws
def lwlrTest(testArr,xArr,yArr,k=1.0):
m = shape(testArr)[0]
yHat = zeros(m)
for i in range(m):
yHat[i] = lwlr(testArr[i],xArr,yArr,k)
return yHat
>>> reload(regression)
<module 'regression' from 'regression.py'>
>>> xArr,yArr=regression.loadDataSet('ex0.txt')
>>> yArr[0]
3.176513
>>> regression.lwlr(xArr[0],xArr,yArr,1.0)
matrix([[ 3.12204471]])
>>> regression.lwlr(xArr[0],xArr,yArr,0.001)
matrix([[ 3.20175729]])
>>> yHat=regression.lwlrTest(xArr,xArr,yArr,0.003)
>>> xMat=mat(xArr)
>>> srtInd=xMat[:,1].argsort(0)
>>> xSort=xMat[srtInd][:,0,:]
>>> import matplotlib.pyplot as plt
>>> fig = plt.figure()
>>> ax = fig.add_subplot(111)
>>> ax.plot(xSort[:,1],yHat[srtInd])
[<matplotlib.lines.Line2D object at 0x7f4905a37390>]
>>> ax.scatter(xMat[:,1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red')
<matplotlib.collections.PathCollection object at 0x7f4905a37a50>
>>> plt.show()
三,示例:預測鮑魚的年齡
def rssError(yArr,yHatArr): #yArr and yHatArr both need to be arrays
return ((yArr-yHatArr)**2).sum()
四,縮減係數來"理解"資料
如果資料的特徵比樣本點還多應該怎麼辦?是否還可以使用線性迴歸和之前的方法來做預測?答案是否定的,這是因為在計算(X.T*X)^-1的時候會出錯
為了解決這個問題,統計學家引入了嶺迴歸(ridge regression)
def ridgeRegres(xMat, yMat, lam=0.2):
xTx = xMat.T*xMat
denom = xTx + eye(shape(xMat)[1])*lam
if linalg.det(denom) == 0.0:
print "This matrix is singular, cannot do inverse"
return
ws = denom.I * (xMat.T*yMat)
return ws
def ridgeTest(xArr,yArr):
xMat = mat(xArr); yMat = mat(yArr).T
yMean = mean(yMat, 0)
yMat = yMat - yMean
#regularize X's
xMeans = mean(xMat,0)
xVar = var(xMat,0)
xMat = (xMat - xMeans)/xVar
numTestPts = 30
wMat = zeros((numTestPts,shape(xMat)[1]))
for i in range(numTestPts):
ws = ridgeRegres(xMat,yMat,exp(i-10))
wMat[i,:]=ws.T
return wMat
>>> reload(regression)
<module 'regression' from 'regression.py'>
>>> abX,abY=regression.loadDataSet('abalone.txt')
>>> ridgeWeights=regression.ridgeTest(abX,abY)
>>> import matplotlib.pyplot as plt
>>> fig = plt.figure()
>>> ax = fig.add_subplot(111)
>>> ax.plot(ridgeWeights)
[<matplotlib.lines.Line2D object at 0x7f4905a8e410>, <matplotlib.lines.Line2D object at 0x7f4905a8e750>, <matplotlib.lines.Line2D object at 0x7f4905a8ecd0>, <matplotlib.lines.Line2D object at 0x7f4905a8ee50>, <matplotlib.lines.Line2D object at 0x7f490534d4d0>, <matplotlib.lines.Line2D object at 0x7f490534d6d0>, <matplotlib.lines.Line2D object at 0x7f490534de90>, <matplotlib.lines.Line2D object at 0x7f490534dad0>]
>>> plt.show()
五,lasso迴歸
def regularize(xMat):
inMat = xMat.copy()
inMeans = mean(inMat,0)
inVar = var(inMat,0)
inMat = (inMat - inMeans)/inVar
return inMat
def stageWise(xArr,yArr,eps=0.01,numIt=100):
xMat = mat(xArr); yMat=mat(yArr).T
yMean = mean(yMat,0)
yMat = yMat - yMean #can also regularize ys but will get smaller coef
xMat = regularize(xMat)
m,n=shape(xMat)
#returnMat = zeros((numIt,n)) #testing code remove
ws = zeros((n,1)); wsTest = ws.copy(); wsMax = ws.copy()
for i in range(numIt):
print ws.T
lowestError = inf;
for j in range(n):
for sign in [-1,1]:
wsTest = ws.copy()
wsTest[j] += eps*sign
yTest = xMat*wsTest
rssE = rssError(yMat.A,yTest.A)
if rssE < lowestError:
lowestError = rssE
wsMax = wsTest
ws = wsMax.copy()
#returnMat[i,:]=ws.T
#return returnMat
>>> reload(regression)
>>> xArr,yArr=regression.loadDataSet('abalone.txt')
>>> regression.stageWise(xArr,yArr,0.01,200)
>>> regression.stageWise(xArr,yArr,0.001,5000)
>>> xMat=mat(xArr)
>>> yMat=mat(yArr).T
>>> xMat=regression.regularize(xMat)
>>> yM = mean(yMat,0)
>>> yMat=yMat-yM
>>> weights=regression.standRegres(xMat,yMat.T)
>>> weights.T
matrix([[ 0.0430442 , -0.02274163, 0.13214087, 0.02075182, 2.22403814,
-0.99895312, -0.11725427, 0.16622915]])
相關文章
- 機器學習實戰專案-預測數值型迴歸機器學習
- 機器學習之迴歸分析--預測值機器學習
- XSD 數值資料型別資料型別
- FLOAT:浮點數值資料的大致數值資料型別資料型別
- 【機器學習】線性迴歸預測機器學習
- 用線性迴歸無編碼實現文章瀏覽數預測
- 線性迴歸-如何對資料進行迴歸分析
- ML.NET 示例:迴歸之價格預測
- ML.NET 示例:迴歸之銷售預測
- 預測演算法之多元線性迴歸演算法
- [機器學習實戰-Logistic迴歸]使用Logistic迴歸預測各種例項機器學習
- 資料分析:線性迴歸
- 迴圈神經網路LSTM RNN迴歸:sin曲線預測神經網路RNN
- 2024企業數字化轉型應迴歸理性
- 【機器學習】--迴歸問題的數值優化機器學習優化
- 資料預處理-資料歸約
- python 內建資料結構-數值型Python資料結構
- 使用線性迴歸模型預測黃金ETF價格模型
- Alink漫談(十) :線性迴歸實現 之 資料預處理
- 【Matlab 041期】【數學建模】Matlab 電力預測預測之灰度預測組合預測指數平滑回歸分析Matlab
- 詳解迴歸測試
- 數值分析:最小二乘與嶺迴歸(Pytorch實現)PyTorch
- 採用線性迴歸實現訓練和預測(Python)Python
- 0-4 Python 基礎資料型別-數值型別Python資料型別
- js基本語法之 值型別(資料型別)(變數型別)JS資料型別變數
- 讓價值迴歸價值—讀懂黑馬POE
- 迴歸本源:JavaScript 之中的值和引用JavaScript
- 數值解和解析解/softmax迴歸/泰勒展開/牛頓法
- 迴歸資料分析,資料運營的三種角色!
- 泰坦尼克號生存預測邏輯迴歸,kaggle渣渣排名邏輯迴歸
- Python機器學習筆記:使用Keras進行迴歸預測Python機器學習筆記Keras
- 讓業務實現迴歸資料庫資料庫
- 資料結構:歸併排序(非遞迴)資料結構排序遞迴
- 資料探勘從入門到放棄(一):線性迴歸和邏輯迴歸邏輯迴歸
- 對數機率迴歸(邏輯迴歸)原理與Python實現邏輯迴歸Python
- 基本資料型別,for迴圈資料型別
- 軟體迴歸測試常用方法有哪些?迴歸測試報告收費貴嗎?測試報告
- 區別值型別資料和引用型別資料型別
- ML.NET教程之計程車車費預測(迴歸問題)