Assignment #11: Debugging and Defensive Programming in R

> # Step 0: Original helper function and buggy function
> # ---------------------------------------------------------
> 
> tukey.outlier <- function(x, k = 1.5) {
+   q1 <- quantile(x, 0.25, na.rm = TRUE)
+   q3 <- quantile(x, 0.75, na.rm = TRUE)
+   iqr <- q3 - q1
+   x < (q1 - k * iqr) | x > (q3 + k * iqr)
+ }
> 
> tukey_multiple <- function(x) {
+   outliers <- array(TRUE, dim = dim(x))
+   for (j in 1:ncol(x)) {
+     outliers[, j] <- outliers[, j] && tukey.outlier(x[, j])
+   }
+   outlier.vec <- vector("logical", length = nrow(x))
+   for (i in 1:nrow(x)) {
+     outlier.vec[i] <- all(outliers[i, ])
+   }
+   return(outlier.vec)
+ }
> # Step 1: Reproduce the error
> # ---------------------------------------------------------
> 
> set.seed(123)
> test_mat <- matrix(rnorm(50), nrow = 10)
> 
> # Print the test matrix if you want to inspect it
> print("Test matrix:")
[1] "Test matrix:"
> print(test_mat)
             [,1]       [,2]       [,3]        [,4]        [,5]
 [1,] -0.56047565  1.2240818 -1.0678237  0.42646422 -0.69470698
 [2,] -0.23017749  0.3598138 -0.2179749 -0.29507148 -0.20791728
 [3,]  1.55870831  0.4007715 -1.0260044  0.89512566 -1.26539635
 [4,]  0.07050839  0.1106827 -0.7288912  0.87813349  2.16895597
 [5,]  0.12928774 -0.5558411 -0.6250393  0.82158108  1.20796200
 [6,]  1.71506499  1.7869131 -1.6866933  0.68864025 -1.12310858
 [7,]  0.46091621  0.4978505  0.8377870  0.55391765 -0.40288484
 [8,] -1.26506123 -1.9666172  0.1533731 -0.06191171 -0.46665535
 [9,] -0.68685285  0.7013559 -1.1381369 -0.30596266  0.77996512
[10,] -0.44566197 -0.4727914  1.2538149 -0.38047100 -0.08336907
> 
> # Run the buggy function
> print("Running original buggy function:")
[1] "Running original buggy function:"
> tukey_multiple(test_mat)
Error in outliers[, j] && tukey.outlier(x[, j]) : 'length = 10' in coercion to 'logical(1)'
> # Step 2: Diagnose the bug > # --------------------------------------------------------- > > # Explanation: > # The issue is the use of &&, which only evaluates the first element > # of a logical vector. Since we need to compare all rows, we must use > # & for element-wise logical operations. > # Step 3: Fix the code > # --------------------------------------------------------- > > corrected_tukey <- function(x) { + outliers <- array(TRUE, dim = dim(x)) + for (j in seq_len(ncol(x))) { + outliers[, j] <- outliers[, j] & tukey.outlier(x[, j]) + } + outlier.vec <- logical(nrow(x)) + for (i in seq_len(nrow(x))) { + outlier.vec[i] <- all(outliers[i, ]) + } + outlier.vec + } > > # Step 4: Validate the fix > # --------------------------------------------------------- > > print("Running corrected function:") [1] "Running corrected function:" > fixed_result <- corrected_tukey(test_mat) > print(fixed_result) [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE > > print("Length of returned vector:") [1] "Length of returned vector:" > print(length(fixed_result)) [1] 10 > > # This should return a logical vector of length 10 without error. > > > # Step 5: Defensive enhancements (optional) > # --------------------------------------------------------- > > corrected_tukey_safe <- function(x) { + if (!is.matrix(x)) { + stop("x must be a matrix.") + } + + if (!is.numeric(x)) { + stop("x must be a numeric matrix.") + } + + outliers <- array(TRUE, dim = dim(x)) + + for (j in seq_len(ncol(x))) { + outliers[, j] <- outliers[, j] & tukey.outlier(x[, j]) + } + + outlier.vec <- logical(nrow(x)) + + for (i in seq_len(nrow(x))) { + outlier.vec[i] <- all(outliers[i, ]) + } + + outlier.vec + } > > # Test the safe corrected version > print("Running safe corrected function:") [1] "Running safe corrected function:" > safe_result <- corrected_tukey_safe(test_mat) > print(safe_result) [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE > > print("Length of safe returned vector:") [1] "Length of safe returned vector:" > print(length(safe_result))


Comments

Popular posts from this blog

Module # 4 Programming structure assignment

Assignment #10: Building Your Own R Package

Module # 8 Input/Output, string manipulation and plyr package