When creating a post, please add:
- Week # must be added in the tags option of the post.
- Link to the classroom item you are referring to:
- Description (include relevant info but please do not post solution code or your entire notebook)
I’m not sure where my logistic regression is going wrong or my x and y labels. over all my test are all failing every time the prediction comes as 0.
Please help me where i’m getting wrong.
i have 2 files positive tweet & Negative tweets which look like below json:
build the frequencies like
{
public string Word { get; set; }
public bool PositiveOrNegative { get; set; }
public int Count { get; set; }
}
split the training and testing data:
//Split Training and Testing Data → here dividing by half
var x_train_data_Positive = totalPositiveStringArr.Skip(0).Take((int)(totalPositiveStringArr.Length * .80) - 1).ToList();
var x_train_data_Negative = totalNegativeStringArr.Skip(0).Take((int)(totalNegativeStringArr.Length * .80) - 1).ToList();
var x_test_data_Positive = totalPositiveStringArr.Skip((int)(totalPositiveStringArr.Length * .80)).Take((totalPositiveStringArr.Length) - 1).ToList();
var x_test_data_Negative = totalNegativeStringArr.Skip((int)(totalNegativeStringArr.Length * .80)).Take((totalNegativeStringArr.Length) - 1).ToList();
var x_train_data = x_train_data_Positive.Concat(x_train_data_Negative).ToArray();
var y_train_data = Enumerable.Repeat(1.0, x_train_data_Positive.Count()).Concat(Enumerable.Repeat(0.0, x_train_data_Negative.Count)).ToList();
use logistic regression to train.
theta is like -499.90791893414746, -146972.07046940498, -99836.69980142715
var logisticRegression = new LogisticRegression(x_train_data_vector_Matrix[0].Length, 0.1, 10000);
logisticRegression.Train(x_train_data_vector_Matrix.ToArray(), y_train_data.ToArray());
public class LogisticRegression
{
///
/// logistic regression parameters.
///
private double _theta;
/// <summary>
/// learning rate for training the model.
/// </summary>
private double _alpha;
/// <summary>
/// Number of iteration for training the model.
/// </summary>
private int _epoch;
public LogisticRegression(int thethaLength, double alpha, int epoch )
{
_theta = new double[thethaLength];
_alpha = alpha;
_epoch = epoch;
}
public void Train(double[][] trainingData_x, double[] trainingLabels_y)
{
for (int i = 0; i < _epoch; i++)
{
var tempTheta = new double[_theta.Length];
for (int j = 0; j < _theta.Length; j++)
{
var costFuncValue = CostFunctionDerivative(j, trainingData_x, trainingLabels_y);
//Console.WriteLine(costFuncValue.ToString());
tempTheta[j] = _theta[j] - _alpha*costFuncValue;
}
for (int j = 0; j < _theta.Length; j++)
{
_theta[j] = tempTheta[j];
}
Console.WriteLine($"{_theta[0]}, {_theta[1]}, {_theta[2]}");
}
}
private double CostFunctionDerivative(int xColumnId, double[][] x, double[] y)
{
var m = x.Length;
var sum = 0.0;
for (int i = 0; i< m; i++)
{
var yhat = Predict(x[i], _theta);
sum += (yhat - y[i]) * x[i][xColumnId];
}
var retVal = (-1 * sum )/ m ;
return retVal;
}
public double Predict(double[] x, double[] theta = null)
{
if (theta == null)
{
theta = _theta;
}
var xtheta_dotproduct = x.MatrixDotProduct(theta);
var sigmoidValue = Sigmoid(xtheta_dotproduct);
return sigmoidValue;
}
private double Sigmoid(double value)
{
return 1.0 / (1 + Math.Exp(-value));
}
}