Matlab Linear Regression Sample Code

Three type of datasets have been analyzed for this technique:

(1) Linearly separable data(LS)

(2) Inseparable data(NLS)

(3) Banana data (BD)

For a binary classification problem. Split the datasets into 70% training and 30% testing randomly in five folds. This is a old code done as a part of an assignment. Compatibility with newer versions may happen.

% This is for binary class problem, where we are fitting in k dimension.

%This is the code for classification using linear regression.

function linearRegression_LSData()
disp(‘..Starting linear regression Algorithm….’);

%reading data

A = load('BananaData.mat');
data = A.data;    
[N,col]= size(data);

vtot=[0, 0, 0, 0,0, 0,  0 , 0];

%5 folds with 70-30 ratio

for i = 1:5

    P=.3;
    groups=data(:,3);
    [train,test] = crossvalind('holdout',groups, P);
    train1= data(train, 1: 3);
    test1=data(test, 1:3);
    [trainLengthRow, trainLengthCol]=size(train1);
    [rowtest,coltest]= size(test1);

    trainingSet = train1(1:trainLengthRow, 1 : trainLengthCol -1 );
    trainingLabels = train1(1:trainLengthRow, trainLengthCol );

    testSet = test1(1:rowtest, 1 : coltest -1 );
    testLabels = test1(1:rowtest, coltest );


    disp('training length');
    disp(trainLengthCol);
    disp(trainLengthRow);
    disp(rowtest);
    disp(coltest);       

    weights(1:trainLengthCol)=0;
    [weights] = train1LinearReg(trainingSet,trainingLabels);
    disp(weights);

    [correctlyClassified,count0,count1,unClassified,v] =  testLinearReg(testSet,testLabels, weights);
    vtot = vtot +v ;

end
disp(‘TP, TN, FP, FN, TP/(TP+FP), TP/P, 2PR / (P+R) , correctlyClassified/trainLengthRow’);
%taking average of all quantaties of TP, TN, FP, FN etc.
vtot = vtot ./ 5
end

%Method to do train the Linear Regression Model
%——-Paramaeters————
%trainingSet: The training set
%trainingLabels: The traingin labels corresponding to training Set
%————Return Type——————-
%weights: weights computed by linear regression
function [weights] = train1LinearReg(trainingSet,trainingLabels)

[trainLengthRow, trainLengthCol] = size(trainingSet);
weights(1:trainLengthCol+1)=0;
Y=trainingLabels;
X(1:trainLengthRow, 1:trainLengthCol+1) = 0;
X(1:trainLengthRow, 1)= 1;
X(1:trainLengthRow, 2:trainLengthCol+1) = trainingSet;
disp(X);
disp(Y);
Z= X'* X;
weights = (inv(Z)) *(X' * Y);    


%the following is for display of plots and surfaces 
y1=weights' * X';

%Plotting the regression line
 for p=1:trainLengthRow
   x1(p)= trainingSet(p,1);
 end;

 for p=1:trainLengthRow
   x2(p)= trainingSet(p,2);
 end;


 for p=1:trainLengthRow
   yOrginal(p)= trainingLabels(p);
 end;

     k=5;

 figure     
 scatter3(x1,x2,trainingLabels,10);     
 axis([-1*k,k,-1*k,k,-1*k,k])

 figure     
 plot3(x1,x2,y1); 
 axis([-1*k,k,-1*k,k,-1*k,k])


 xx=(-1*k:1:k);
 yy=(-1*k:1:k);
 [xx1,yy1]=meshgrid(xx,yy);

 %drawing the surface
 sum = -1 .* (weights(1)+weights(2).*xx1+weights(3).*yy1);
 zz= 1 ./(1 + expm(sum));     
 figure
 surf(xx1,yy1,zz);
 title('Surface');
 xlabel('x');
 ylabel('y')
 zlabel('z');

end

%This function tests the test set with testlabels and the computed outputs
%———–paramaters———–
%testSet: The Test Set for testing
%testLables: Corresponding test labels
%weights: weights computed from the training
%—————Return—————–
% correctlyClassified: correctly classified number of samples
% unClassified: 10 unclassified samples
%v: vector that stores TP ;TN;FP; FN ; P; R; F; Accuracy
%count0: Number of class -1 unclassified upto max val of 5
%count1: Number of class +1 unclassified upto max val of 5
function [correctlyClassified,count0,count1,unClassified,v] = testLinearReg(testSet,testLabels, weights)

correctlyClassified = 0;
count0 = 0; count1=0;   TP=0;    TN=0;     FP=0;     FN =0; P=0; R=0; F=0;

[testLengthRow,testLengthCol]=size(testSet);
unClassified(1:10 ,1: testLengthCol) = 0;

% checking accuracy by  number of correctly classified   

for k=(1: testLengthRow )
    x=[1, testSet(k,1:testLengthCol)];
    O1=    x' .* weights ;

    %computing the value of vector with plane
    sum =0;
    for p=1:length(O1)
        sum = sum +O1(p);
    end

    % setting the outputs
    if(sum > 0)
        O=1;
    else 
        O=-1;
    end


     %    error as output approaching target
    if (O == testLabels(k))
        % correctly classified examples
        correctlyClassified=correctlyClassified+1;

        %compute  TP, TN
        if(testLabels(k)==1)
            TP = TP+1;
        else
            TN = TN +1;
        end

    else
        % wrongly classified examples
        if(testLabels(k)==1)
            FN = FN+1;
        else
            FP = FP +1;
        end
        %storing 5 misclassified  classes from each class
        if(count1<5 && testLabels(k)==1)
            count1 = count1 + 1;
            unClassified(count1,1: testLengthCol) = testSet(k,1: testLengthCol);
        end
        if(count0<5 && testLabels(k)==-1 )
            count0 = count0 + 1;
            unClassified(count0,1: testLengthCol) = testSet(k,1: testLengthCol);                
        end
    end

end

  k
P= TP/(TP+FP)
R=  TP/(TP+FN)
v=[TP,    TN,     FP,     FN,     P,     R,      2*P*R / (P+R) , correctlyClassified/testLengthRow]
disp('TP,    TN,     FP,     FN,     TP/(TP+FP),      TP/P,      2*P*R / (P+R) , correctlyClassified/trainLengthRow');


 unClassified;
 accuracy = correctlyClassified/testLengthRow  ;   
 accuracy

end

Published by Nidhika

Hi, Apart from profession, I have inherent interest in writing especially about Global Issues of Concern, fiction blogs, poems, stories, doing painting, cooking, photography, music to mention a few! And most important on this website you can find my suggestions to latest problems, views and ideas, my poems, stories, novels, some comments, proposals, blogs, personal experiences and occasionally very short glimpses of my research work as well.

%d bloggers like this: