% % this is for two class problem for more than two class code changes
% % ————-Parameters————-
% numIteration =1000; The Number of maximum iterations
% % errorBound = 0.0001; This is the permissible error.
% The experiments have been done keep in view both the error condition
% reached or maximum iteration reached whichever comes first.
% % eta = 0.5; This is the learning rate, experiments have been
% done on various learning rates
% I have written two codes for logistic regression one using matrices other
% using array indexes.
function logisticRegression_Matrix_LSData()
disp(‘..Starting logistic regression Algorithm….’);
%reading data A = load('BananaData.mat'); data = A.data; [N,col]= size(data); vtot=[0, 0, 0, 0,0, 0, 0 , 0];
%5 folds with 70-30 ratio
for i = 1:5
P=.3; groups=data(:,3); [train,test] = crossvalind('holdout',groups, P); train1= data(train, 1: 3); test1=data(test, 1:3); [trainLengthRow, trainLengthCol]=size(train1); [rowtest,coltest]= size(test1); trainingSet = train1(1:trainLengthRow, 1 : trainLengthCol -1 ); trainingLabels = train1(1:trainLengthRow, trainLengthCol ); %converting data to 1 and 0 class instead of 1 and -1 class as %using logit for p=1:trainLengthRow if trainingLabels(p)== -1 trainingLabels(p) = 0; end end testSet = test1(1:rowtest, 1 : coltest -1 ); testLabels = test1(1:rowtest, coltest ); %converting data to 1 and 0 class instead of 1 and -1 class as %using logit for p=1:rowtest if testLabels(p)== -1 testLabels(p) = 0; end end
[weights] = trainLogReg( trainingSet,trainingLabels);
[correctlyClassified,count0,count1,unClassified,v] = testLogReg(testSet,testLabels, weights) ;
vtot = vtot +v ;
end
% taking average of all such entries
disp(‘TP, TN, FP, FN, TP/(TP+FP), TP/P, 2PR / (P+R) , correctlyClassified/trainLengthRow’);
vtot = vtot ./ 5
end
%This mathod is for tarining the logestic regression problem
% —–Parameters—-
%trainingSet: the training set
%trainingLabels: the labels corresponding to the traiining set
%—–Return Types——
%weights: the final weights obtained from traiining
%
function [weights] = trainLogReg( trainingSet,trainingLabels)
numIteration =1000; eta = 0.5; errorBound = 0.0001; [trainLengthRow, trainLengthCol] = size(trainingSet); errorVec=ones(trainLengthCol+1); weightsFinal = zeros(trainLengthCol+1); W_Old(1:trainLengthCol+1) = 0; W_New(1:trainLengthCol+1) = 0; P(1:trainLengthRow) = 0; Y = trainingLabels'; X(1:trainLengthRow , 1:trainLengthCol+1) = 0; X(1:trainLengthRow ,1) = 1; X(1:trainLengthRow ,2:trainLengthCol+1) = trainingSet(1:trainLengthRow,1:trainLengthCol); error=1.0; k=1; while ((error >= errorBound ) && (k < numIteration)) error=0; for t=1: trainLengthRow sum = W_Old(1); for j=1: trainLengthCol sum = sum + W_Old(j+1)*trainingSet(t,j); end; P(t) = 1/(1+ exp(-1*sum)); end; Z= (X') * (Y-P)'; %computing the new weights W_New = W_Old + eta * Z'; W_New ; errorVec = (W_New - W_Old).*(W_New - W_Old) for i = 1: trainLengthCol+1 error=error+errorVec(i); end error= sqrt(error); k=k+1; W_Old = W_New; end %Now computing the final y using the final weights disp('final weights : number of iterations : error'); W_Old k error y1(1:trainLengthRow)=0; y0(1:trainLengthRow)=0; for i =1: trainLengthRow sum = W_Old(1); for j=1: trainLengthCol sum = sum + W_Old(j+1)*trainingSet(i,j); end; y1(i) = 1/(1+ exp(-1*sum)); y0(i) = 1/(1+ exp(sum)); end; data(1:trainLengthRow, 1:trainLengthCol+1)=0; data(1:trainLengthRow, 1:trainLengthCol)= trainingSet; for p=1:trainLengthRow data(p, trainLengthCol+1)= y1(p); end; labels={'x1','x2','y'}; %figure % parallelcoords(data,'Labels',labels); for p=1:trainLengthRow x1(p)= trainingSet(p,1); end; for p=1:trainLengthRow x2(p)= trainingSet(p,2); end; for p=1:trainLengthRow yOrginal(p)= trainingLabels(p); end; figure(1) scatter3(x1,x2,trainingLabels,10); axis([-5,5,-5,5,-5,5]) figure(1) plot3(x1,x2,y1); axis([-5,5,-5,5,-5,5]) xx=(-10:1:10); yy=(-10:1:10); [xx1,yy1]=meshgrid(xx,yy); sum1 = -1 .* (W_Old(1)+W_Old(2).*xx1+W_Old(3).*yy1); zz= 1 ./(1 + expm(sum1)); [p1,m1]=size(zz); for p=1: p1 for m=1:m1 % if (zz(p,m)== NaN) % zz(p,m) = 1; % as we are using logit function expVal = exp(sum1(p,m)); zz(p,m) = 1 / (1+ expVal); % end end end figure(1) surf(xx1,yy1,zz); title('title'); xlabel('x'); ylabel('y') zlabel('z'); weights =W_Old;
end
function [correctlyClassified,count0,count1,unClassified,v] = testLogReg(testSet,testLabels, weights)
correctlyClassified = 0; count0 = 0; count1=0; TP=0; TN=0; FP=0; FN =0; P=0; R=0; F=0; [testLengthRow,testLengthCol]=size(testSet); unClassified(1:10 ,1: testLengthCol) = 0; % checking accuracy by number of correctly classified for k=(1: testLengthRow ) x=[1, testSet(k,1:testLengthCol)]; O1= x' .* weights' ; %computing the value of vector with plane sum =0; for p=1:length(O1) sum = sum +O1(p); end y1x = 1/(1+ exp(-1*sum)); if(y1x>=0.5) %disp('class 1'); O =1; else %disp('class 0'); O =0; end % error as output approaching target if (O == testLabels(k)) % correctly classified examples correctlyClassified=correctlyClassified+1; %compute TP, TN if(testLabels(k)==1) TP = TP+1; else TN = TN +1; end else % wrongly classified examples if(testLabels(k)==1) FN = FN+1; else FP = FP +1; end %storing 5 misclassified classes from each class if(count1<5 && testLabels(k)==1) count1 = count1 + 1; unClassified(count1,1: testLengthCol) = testSet(k,1: testLengthCol); end if(count0<5 && testLabels(k)==0 ) count0 = count0 + 1; unClassified(count0,1: testLengthCol) = testSet(k,1: testLengthCol); end end end k P= TP/(TP+FP) R= TP/(TP+FN) v=[TP, TN, FP, FN, P, R, 2*P*R / (P+R) , correctlyClassified/testLengthRow] disp('TP, TN, FP, FN, TP/(TP+FP), TP/P, 2*P*R / (P+R) , correctlyClassified/trainLengthRow'); unClassified; accuracy = correctlyClassified/testLengthRow ; accuracy
end
You must log in to post a comment.