Skip to content

Commit

Permalink
More efficient distribution sampling.
Browse files Browse the repository at this point in the history
  • Loading branch information
aalexandrov committed Mar 25, 2016
1 parent 7655a28 commit 6cf73c6
Showing 1 changed file with 14 additions and 6 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package de.tu_berlin.dima.experiments.flink.hashagg.datagen.flink

import org.apache.commons.math3.distribution.{ZipfDistribution, BinomialDistribution, UniformIntegerDistribution}
import org.apache.commons.math3.distribution._

object Distributions {

Expand All @@ -13,13 +13,21 @@ object Distributions {
def sample(cp: Double) = distribution.inverseCumulativeProbability(cp)
}

case class Binomial(sampleSize: Int, successProbability: Double) extends DiscreteDistribution {
val distribution = new BinomialDistribution(sampleSize - 1, successProbability)
def sample(cp: Double) = distribution.inverseCumulativeProbability(cp) - 1
// approximated by NormalDistribution, since BinomialDistribution is too slow
// for the relationship between the two, see this article
// http://www.real-statistics.com/binomial-and-related-distributions/relationship-binomial-and-normal-distributions/
case class Binomial(sampleSize: Int, p: Double) extends DiscreteDistribution {
val n = sampleSize - 1
val distribution = new NormalDistribution(n * p, Math.sqrt(n * p * (1 - p)))
def sample(cp: Double) = (distribution.inverseCumulativeProbability(cp) - 1).toInt % n
}

// TODO: since BinomialDistribution is too slow, either approximate with ParetoDistribution or write custom CDF^{-1}
// for the relationship between the two, see this article
// http://www.hpl.hp.com/research/idl/papers/ranking/ranking.html
case class Zipf(sampleSize: Int, exponent: Double) extends DiscreteDistribution {
val distribution = new ZipfDistribution(sampleSize, exponent)
def sample(cp: Double) = distribution.inverseCumulativeProbability(cp) - 1
val n = sampleSize
val distribution = new ParetoDistribution(1, 1 + 1/exponent)
def sample(cp: Double) = (distribution.inverseCumulativeProbability(cp) - 1).toInt % n
}
}

0 comments on commit 6cf73c6

Please sign in to comment.