Greetings,
I've been studying a user defined function that produces MLE's for independent variables, etc,etc. I am having trouble understanding how information generated in the array based function is presented to the user in a spreadsheet.
Having run the full code a few times and having only generated a single cell of data (though the output value was expected) I am stumped as to why the full set of output values was not produced in the case I set "Stats" to False. This section is immediately below. Further below is the chunk of code that serves as the UDF; its a longer script.
Basically, I would like to rebuild parts of the output layout and understand what might be limiting the current output to a single cell reading "Beta0".
Below is the whole script
I've been studying a user defined function that produces MLE's for independent variables, etc,etc. I am having trouble understanding how information generated in the array based function is presented to the user in a spreadsheet.
Having run the full code a few times and having only generated a single cell of data (though the output value was expected) I am stumped as to why the full set of output values was not produced in the case I set "Stats" to False. This section is immediately below. Further below is the chunk of code that serves as the UDF; its a longer script.
Basically, I would like to rebuild parts of the output layout and understand what might be limiting the current output to a single cell reading "Beta0".
Code:
Dim output() As Variant
If Stats = False Then 'if stats not selected the output betas and labels
ReDim output(1 To 2, 1 To M) As Variant
For ii = 1 To M
output(1, ii) = "Beta" & (ii - 1)
output(2, ii) = Betas(ii)
Next ii
Logit = output
Exit Function
End If
Below is the whole script
Code:
Function Logit(known_y As Range, known_x As Range, Cutoff As Double, Optional Constant As Boolean = True, Optional Stats = False)
Dim Intercept As Double
If Constant = True Then
Intercept = 1
ElseIf Constant = False Then
Intercept = 0
End If
Dim M As Integer: M = known_x.Columns.Count + Intercept 'number of independent variables
Dim N As Integer: N = known_x.Rows.Count 'number of rows of observations
Dim IndVar() As Double: ReDim IndVar(1 To N, 1 To M) As Double
Dim ii As Integer: ii = 1
Dim jj As Integer: jj = 1
Dim kk As Integer: kk = 1
Dim y_bar As Double: y_bar = 0
For ii = 1 To N
y_bar = y_bar + known_y(ii) ' calculates the average y
If Intercept = 1 Then
IndVar(ii, 1) = 1 'create intercept
End If
For jj = 1 + Intercept To M
IndVar(ii, jj) = known_x(ii, jj - Intercept) ' load in independent variables
Next jj
Next ii
y_bar = y_bar / N 'Average y for calculation of R2
Dim MaxIt As Integer: MaxIt = 100 'maximum number of iterations in Newton Algo
Dim cc As Integer: cc = 1 'main loop counter
Dim Epsilon As Double: Epsilon = 0.000001 'convergence criteria of Newton Algo
Dim Err As Double: Err = 1 'measure of convergence of Newton Algo
Dim y_hats() As Double: ReDim y_hats(1 To N) As Double 'Model Forecast
Dim Betas() As Double: ReDim Betas(1 To M) As Double 'Estimated Betas
Dim z() As Double: ReDim z(1 To N) As Double
Dim j() As Double: ReDim j(1 To M) As Double
Dim H() As Double: ReDim H(1 To M, 1 To M) As Double
Dim Newt() As Variant: ReDim Newt(1 To M) As Variant 'Newton Gain
Dim LogLikelihood As Double: LogLikelihood = 1
Dim LogLikelihoodP As Double: LogLikelihoodP = 1
'This next section implements Newton's Method to estimate the beta coefficients
Do While cc < MaxIt
For ii = 1 To N
For jj = 1 To M
z(ii) = z(ii) + Betas(jj) * IndVar(ii, jj)
Next jj
y_hats(ii) = 1 / (1 + Exp(-1 * z(ii))) 'model estimate
For jj = 1 To M
j(jj) = j(jj) + (known_y(ii) - y_hats(ii)) * IndVar(ii, jj) 'Jacobian
For kk = 1 To M
H(jj, kk) = H(jj, kk) - y_hats(ii) * (1 - y_hats(ii)) * IndVar(ii, jj) * IndVar(ii, kk) 'Hessian
Next kk
Next jj
LogLikelihood = LogLikelihood + (known_y(ii) * Log(y_hats(ii)) + (1 - known_y(ii)) * Log(1 - y_hats(ii)))
Next ii
If Abs(LogLikelihood - LogLikelihoodP) < Epsilon Then Exit Do 'check if converged, exit if true
LogLikelihoodP = LogLikelihood
Newt = Application.WorksheetFunction.MMult(j, Application.WorksheetFunction.MInverse(H))
For jj = 1 To M
Betas(jj) = Betas(jj) - Newt(jj)
Next jj
ReDim j(1 To M): ReDim H(1 To M, 1 To M): ReDim z(1 To N) As Double 'Clear Jacobian and Hessian Matrices
LogLikelihood = 0
cc = cc + 1
Loop
'GoodNess of Fit Statistics
Dim output() As Variant
If Stats = False Then 'if stats not selected the output betas and labels
ReDim output(1 To 2, 1 To M) As Variant
For ii = 1 To M
output(1, ii) = "Beta" & (ii - 1)
output(2, ii) = Betas(ii)
Next ii
Logit = output
Exit Function
End If
Dim HInv() As Variant: ReDim HInv(1 To M, 1 To M) As Variant
Dim Tstat() As Double: ReDim Tstat(1 To M) As Double
HInv = Application.WorksheetFunction.MInverse(H)
ReDim output(1 To 25, 1 To M + 1) As Variant
For ii = 1 To M
output(1, ii + 1) = "Beta" & (ii - 1) 'label
output(2, ii + 1) = Betas(ii) 'betas
output(3, ii + 1) = Sqr(-HInv(ii, ii)) 'standard errors
output(4, ii + 1) = output(2, ii + 1) / output(3, ii + 1)
output(5, ii + 1) = (1 - Application.WorksheetFunction.NormSDist(Abs(output(4, ii + 1)))) * 2 'p-value
Next ii
output(1, 1) = ""
output(2, 1) = "Coeff"
output(3, 1) = "SE (Beta)"
output(4, 1) = "z-stat"
output(5, 1) = "p-value"
Dim LogLikelihood0 As Double: LogLikelihood0 = N * (y_bar * Log(y_bar) + (1 - y_bar) * Log(1 - y_bar))
output(6, 1) = "McFaddenR2": output(6, 2) = 1 - LogLikelihood / LogLikelihood0
output(7, 1) = "Cox&SnellR2": output(7, 2) = 1 - Exp(-2 / N * (LogLikelihood - LogLikelihood0))
output(8, 1) = "Iterations": output(8, 2) = cc - 1
output(9, 1) = "LR": output(9, 2) = 2 * (LogLikelihood - LogLikelihood0)
output(10, 1) = "LR p-value": output(10, 2) = Application.WorksheetFunction.ChiDist(output(9, 2), M - 1) 'p-value for LR
'This section calculates the contingency table and classification statistics
Dim N_TP As Integer: Dim N_TN As Integer
Dim N_FP As Integer: Dim N_FN As Integer
For ii = 1 To N
If known_y(ii) = 1 And (y_hats(ii) - Cutoff) > 0 Then
N_TP = N_TP + 1
ElseIf known_y(ii) = 0 And (y_hats(ii) - Cutoff) <= 0 Then
N_TN = N_TN + 1
ElseIf known_y(ii) = 0 And (y_hats(ii) - Cutoff) > 0 Then
N_FP = N_FP + 1
ElseIf known_y(ii) = 1 And (y_hats(ii) - Cutoff) <= 0 Then
N_FN = N_FN + 1
End If
Next ii
output(12, 1) = "": output(12, 2) = "Actual Response":
output(13, 1) = "Prediction": output(13, 2) = "Positive": output(13, 3) = "Negative"
output(14, 1) = "Positive": output(15, 1) = "Negative"
output(14, 2) = N_TP: output(14, 3) = N_FP
output(15, 2) = N_FN: output(15, 3) = N_TN
output(17, 1) = "Accuracy": output(17, 2) = (N_TP + N_TN) / (N_TP + N_TN + N_FP + N_FN)
output(18, 1) = "Error Rate": output(18, 2) = 1 - output(17, 2)
output(19, 1) = "HitRate": output(19, 2) = N_TP / (N_TP + N_FN)
output(20, 1) = "TrueNegRate": output(20, 2) = N_TN / (N_TN + N_FP)
output(21, 1) = "FalsePos": output(21, 2) = 1 - output(20, 2)
output(22, 1) = "Precision": If N_TP + N_FP = 0 Then output(22, 2) = "Error" Else output(22, 2) = N_TP / (N_TP + N_FP)
output(23, 1) = "NegPredVal": If N_TN + N_FP = 0 Then output(23, 2) = "Error" Else output(23, 2) = N_TN / (N_TN + N_FN)
output(24, 1) = "FalseDiscover": If N_FP + N_TP = 0 Then output(24, 2) = "Error" Else output(24, 2) = N_FP / (N_FP + N_TP)
For ii = 1 To M + 1
output(11, ii) = "xxxxxx": output(16, ii) = "xxxxxx": output(15, ii) = "xxxxxx"
Next ii
For ii = 3 To M + 1
output(6, ii) = "": output(7, ii) = "": output(8, ii) = "":
output(9, ii) = "": output(10, ii) = "": output(12, ii) = "":
output(17, ii) = "": output(18, ii) = "": output(19, ii) = "":
output(20, ii) = "": output(21, ii) = "": output(22, ii) = "":
output(23, ii) = "": output(24, ii) = "":
Next ii
For ii = 4 To M + 1
output(13, ii) = "": output(14, ii) = "": output(15, ii) = ""
Next ii
Logit = output
End Function