Skip to content

Commit

Permalink
Change Jaccard to parallel version. Add more testing for it
Browse files Browse the repository at this point in the history
  • Loading branch information
sneumann committed Aug 23, 2024
1 parent 666240d commit d4a9566
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 20 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ Depends:
plotly
Imports:
egg,
foreach,
graphics,
grDevices,
methods,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ exportPattern("^[^\\.]")
import(egg)
importFrom("grDevices", "as.raster", "rainbow", "rgb")
importFrom("methods", "as")
importFrom("foreach", "foreach", "%do%", "%dopar%")
importFrom("graphics", "axis", "mtext", "par", "plot.new",
"plot.window", "points", "rasterImage", "rect", "segments",
"title")
Expand Down
36 changes: 17 additions & 19 deletions R/Analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,26 +123,24 @@ calculateDistanceMatrix <- function(dataList, filter, distanceMeasure = "Jaccard
"Jaccard"={
featureIndeces <- dataList$featureIndeces[filter]

distanceMatrix <- matrix(nrow = numberOfPrecursors, ncol = numberOfPrecursors)
for(i in seq_len(numberOfPrecursors)){
time <- proc.time()["user.self"]
if(time - lastOut > 1){
lastOut <- time
precursorProgress <- (i - lastPrecursor) / numberOfPrecursors
lastPrecursor <- i
if(!is.na(progress)) if(progress) incProgress(amount = precursorProgress, detail = paste("Distance ", i, " / ", numberOfPrecursors, sep = "")) else print(paste("Distance ", i, " / ", numberOfPrecursors, sep = ""))
}

for(j in seq_len(numberOfPrecursors)){
if(i == j){
distanceMatrix[i, j] <- 0
next
}
distanceMatrix <- do.call(rbind, parallel::mclapply(featureIndeces, function(x) {
# Convert vectors to sets
set1 <- unique(x)

row <- sapply(featureIndeces, function(y) {
# Convert vectors to sets
set2 <- unique(y)

intersectionCount <- sum(featureIndeces[[i]] %in% featureIndeces[[j]])
distanceMatrix[i, j] <- 1 - intersectionCount / (length(featureIndeces[[i]]) + length(featureIndeces[[j]]) - intersectionCount)
}
}
# Calculate the intersection and union of the two sets
intersection <- length(intersect(set1, set2))
union <- length(union(set1, set2))

# Calculate the Jaccard index
jaccard <- 1- intersection / union
jaccard
})
row
}))
},
"Jaccard (intensity-weighted pure)"={
featureIndeces <- dataList$featureIndeces[filter]
Expand Down
16 changes: 15 additions & 1 deletion tests/testthat/test_dataprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,28 @@ expect_equal(min(result$dataFrameMeasurements[,1]), 0)

})

test_that("HCA works", {
test_that("calculateDistanceMatrix works", {
load(system.file("extdata/testdata/calculateDistanceMatrix.Rdata", package = "MetFamily"))

cluster <- parallel::makeForkCluster()
doParallel::registerDoParallel(cl=cluster)
result <- calculateDistanceMatrix(dataList=dataList,
filter=filter,
distanceMeasure = "Jaccard", progress = FALSE)
parallel::stopCluster(cluster)
expect_equal(sum(result$distanceMatrix), 46522.7, tolerance = 0.005)
expect_equal(sum(result$filter), 294508)
expect_true(result$distanceMeasure == "Jaccard")
expect_true(all(diag(result$distanceMatrix)==0))

i<-c(1, 215, 39, 107, 48, 49, 219)
j<-c(219, 139, 130, 147, 13, 90, 1)
a <- c(1, 0.96875, 0.875, 1, 1, 0.888888888888889, 0, 1, 0.918032786885246,
1, 1, 1, 1, 0.978260869565217, 1, 0.925925925925926, 1, 1, 1, 1,
0.973684210526316, 1, 0.968253968253968, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0.976744186046512, 0.947368421052632, 1, 0.95, 0.95,
0.91304347826087, 0, 1, 1, 1, 1, 1, 1)

expect_equal(as.numeric(result$distanceMatrix[i,j]), a)

})

0 comments on commit d4a9566

Please sign in to comment.