-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | On-line accumulation of rank-based statistics
--   
--   A new data structure for accurate on-line accumulation of rank-based
--   statistics such as quantiles and trimmed means.
--   
--   See original paper: "Computing extremely accurate quantiles using
--   t-digest" by Ted Dunning and Otmar Ertl for more details
--   <a>https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf</a>.
@package tdigest
@version 0.1


-- | Internals of <a>TDigest</a>.
--   
--   Tree implementation is based on <i>Adams’ Trees Revisited</i> by Milan
--   Straka <a>http://fox.ucw.cz/papers/bbtree/bbtree.pdf</a>
module Data.TDigest.Internal
assert :: Bool -> String -> a -> a
type Mean = Double
type Weight = Double
type Centroid = (Mean, Weight)
type Size = Int

-- | <a>TDigest</a> is a tree of centroids.
--   
--   <tt>compression</tt> is a <tt>1/δ</tt>. The greater the value of
--   <tt>compression</tt> the less likely value merging will happen.
data TDigest (compression :: Nat)

-- | Tree node
Node :: {-# UNPACK #-} !Size -> {-# UNPACK #-} !Mean -> {-# UNPACK #-} !Weight -> {-# UNPACK #-} !Weight -> !(TDigest compression) -> !(TDigest compression) -> TDigest

-- | Empty tree
Nil :: TDigest

-- | Both <a>cons</a> and <a>snoc</a> are <a>insert</a>

-- | <a>TDigest</a> has only strict fields.

-- | <a>TDigest</a> isn't compressed after de-serialisation, but it can be
--   still smaller.
getCentroids :: TDigest comp -> [Centroid]

-- | Total count of samples.
--   
--   <pre>
--   &gt;&gt;&gt; totalWeight (tdigest [1..100] :: TDigest 5)
--   100.0
--   </pre>
totalWeight :: TDigest comp -> Weight
size :: TDigest comp -> Int

-- | Center of left-most centroid. Note: may be different than min element
--   inserted.
--   
--   <pre>
--   &gt;&gt;&gt; minimumValue (tdigest [1..100] :: TDigest 3)
--   1.0
--   </pre>
minimumValue :: TDigest comp -> Mean

-- | Center of right-most centroid. Note: may be different than max element
--   inserted.
--   
--   <pre>
--   &gt;&gt;&gt; maximumValue (tdigest [1..100] :: TDigest 3)
--   99.0
--   </pre>
maximumValue :: TDigest comp -> Mean
emptyTDigest :: TDigest comp
combineDigest :: KnownNat comp => TDigest comp -> TDigest comp -> TDigest comp
insertCentroid :: forall comp. KnownNat comp => Centroid -> TDigest comp -> TDigest comp

-- | Constructor which calculates size and total weight.
node :: Mean -> Weight -> TDigest comp -> TDigest comp -> TDigest comp

-- | Balance after right insertion.
balanceR :: Mean -> Weight -> TDigest comp -> TDigest comp -> TDigest comp

-- | Balance after left insertion.
balanceL :: Mean -> Weight -> TDigest comp -> TDigest comp -> TDigest comp

-- | Alias to <a>Node</a>
node' :: Int -> Mean -> Weight -> Weight -> TDigest comp -> TDigest comp -> TDigest comp

-- | Create singular node.
singNode :: Mean -> Weight -> TDigest comp

-- | Add two weighted means together.
combinedCentroid :: Mean -> Weight -> Mean -> Weight -> Centroid

-- | Calculate the threshold, i.e. maximum weight of centroid.
threshold :: Double -> Double -> Double -> Double

-- | Compress <a>TDigest</a>.
--   
--   Reinsert the centroids in "better" order (in original paper: in
--   random) so they have opportunity to merge.
--   
--   Compression will happen only if size is both: bigger than
--   <tt><a>relMaxSize</a> * comp</tt> and bigger than <a>absMaxSize</a>.
compress :: forall comp. KnownNat comp => TDigest comp -> TDigest comp

-- | Perform compression, even if current size says it's not necessary.
forceCompress :: forall comp. KnownNat comp => TDigest comp -> TDigest comp
toMVector :: forall comp s. KnownNat comp => TDigest comp -> ST s (MVector s (Centroid, Double))

-- | Relative size parameter. Hard-coded value: 25.
relMaxSize :: Int

-- | Absolute size parameter. Hard-coded value: 1000.
absMaxSize :: Int
balOmega :: Int
balAlpha :: Int

-- | Output the <a>TDigest</a> tree.
debugPrint :: TDigest comp -> IO ()

-- | <pre>
--   <a>isRight</a> . <a>validate</a>
--   </pre>
valid :: TDigest comp -> Bool

-- | Check various invariants in the <a>TDigest</a> tree.
validate :: TDigest comp -> Either String (TDigest comp)
eq :: Double -> Double -> Bool
negInf :: Double
posInf :: Double

-- | Insert single value into <a>TDigest</a>.
insert :: KnownNat comp => Double -> TDigest comp -> TDigest comp

-- | Insert single value, don't compress <a>TDigest</a> even if needed.
--   
--   For sensibly bounded input, it makes sense to let <a>TDigest</a> grow
--   (it might grow linearly in size), and after that compress it once.
insert' :: KnownNat comp => Double -> TDigest comp -> TDigest comp

-- | Make a <a>TDigest</a> of a single data point.
singleton :: KnownNat comp => Double -> TDigest comp

-- | Strict <a>foldl'</a> over <a>Foldable</a> structure.
tdigest :: (Foldable f, KnownNat comp) => f Double -> TDigest comp
instance GHC.Show.Show (Data.TDigest.Internal.TDigest compression)
instance GHC.TypeNats.KnownNat comp => Data.Semigroup.Semigroup (Data.TDigest.Internal.TDigest comp)
instance GHC.TypeNats.KnownNat comp => Data.Semigroup.Reducer.Reducer GHC.Types.Double (Data.TDigest.Internal.TDigest comp)
instance GHC.TypeNats.KnownNat comp => GHC.Base.Monoid (Data.TDigest.Internal.TDigest comp)
instance Control.DeepSeq.NFData (Data.TDigest.Internal.TDigest comp)
instance GHC.TypeNats.KnownNat comp => Data.Binary.Class.Binary (Data.TDigest.Internal.TDigest comp)


-- | <a>TDigest</a> postprocessing functions.
--   
--   These are re-exported from <a>Data.TDigest</a> module.
module Data.TDigest.Postprocess

-- | Calculate histogram based on the <a>TDigest</a>.
histogram :: TDigest comp -> Maybe (NonEmpty HistBin)

-- | Histogram bin
data HistBin
HistBin :: !Double -> !Double -> !Double -> !Double -> !Double -> HistBin

-- | lower bound
[hbMin] :: HistBin -> !Double

-- | upper bound
[hbMax] :: HistBin -> !Double

-- | original value: <tt>(mi + ma) / 2</tt>
[hbValue] :: HistBin -> !Double

-- | weight ("area" of the bar)
[hbWeight] :: HistBin -> !Double

-- | weight from the right
[hbCumWeight] :: HistBin -> !Double

-- | Median, i.e. <tt><a>quantile</a> 0.5</tt>.
median :: TDigest comp -> Maybe Double

-- | Calculate quantile of a specific value.
quantile :: Double -> TDigest comp -> Maybe Double

-- | Mean.
--   
--   <pre>
--   &gt;&gt;&gt; mean (tdigest [1..100] :: TDigest 10)
--   Just 50.5
--   </pre>
--   
--   <i>Note:</i> if you only need the mean, calculate it directly.
mean :: TDigest comp -> Maybe Double

-- | Mean from the histogram.
mean' :: NonEmpty HistBin -> Double

-- | Variance.
variance :: TDigest comp -> Maybe Double

-- | Variance from the histogram.
variance' :: NonEmpty HistBin -> Double

-- | Cumulative distribution function.
--   
--   <i>Note:</i> if this is the only thing you need, it's more efficient
--   to count this directly.
cdf :: Double -> TDigest comp -> Double

-- | Alias of <a>quantile</a>.
icdf :: Double -> TDigest comp -> Maybe Double

-- | Histogram from centroids
histogram' :: NonEmpty (Mean, Weight) -> NonEmpty HistBin

-- | Quantile from the histogram.
quantile' :: Double -> Weight -> NonEmpty HistBin -> Double

-- | Validate that list of <a>HistBin</a> is a valid "histogram".
validateHistogram :: Foldable f => f HistBin -> Either String (f HistBin)
instance GHC.Show.Show Data.TDigest.Postprocess.HistBin
instance Data.Semigroup.Semigroup Data.TDigest.Postprocess.Variance
instance Data.Semigroup.Semigroup Data.TDigest.Postprocess.Mean'


-- | A new data structure for accurate on-line accumulation of rank-based
--   statistics such as quantiles and trimmed means. . See original paper:
--   "Computing extremely accurate quantiles using t-digest" by Ted Dunning
--   and Otmar Ertl for more details
--   <a>https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf</a>.
--   
--   <h3>Examples</h3>
--   
--   <pre>
--   &gt;&gt;&gt; quantile 0.99 (tdigest [1..1000] :: TDigest 25)
--   Just 990.5
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; quantile 0.99 (tdigest [1..1000] :: TDigest 3)
--   Just 989.0...
--   </pre>
--   
--   t-Digest is more precise in tails, especially median is imprecise:
--   
--   <pre>
--   &gt;&gt;&gt; median (forceCompress $ tdigest [1..1000] :: TDigest 25)
--   Just 497.6...
--   </pre>
--   
--   <h3>Semigroup</h3>
--   
--   This operation isn't strictly associative, but statistical variables
--   shouldn't be affected.
--   
--   <pre>
--   &gt;&gt;&gt; let td xs = tdigest xs :: TDigest 10
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; median (td [1..500] &lt;&gt; (td [501..1000] &lt;&gt; td [1001..1500]))
--   Just 802...
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; median ((td [1..500] &lt;&gt; td [501..1000]) &lt;&gt; td [1001..1500])
--   Just 726...
--   </pre>
--   
--   The linear is worst-case scenario:
--   
--   <pre>
--   &gt;&gt;&gt; let td' xs = tdigest (fairshuffle xs) :: TDigest 10
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; median (td' [1..500] &lt;&gt; (td' [501..1000] &lt;&gt; td' [1001..1500]))
--   Just 750.3789...
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; median ((td' [1..500] &lt;&gt; td' [501..1000]) &lt;&gt; td' [1001..1500])
--   Just 750.3789...
--   </pre>
module Data.TDigest

-- | <a>TDigest</a> is a tree of centroids.
--   
--   <tt>compression</tt> is a <tt>1/δ</tt>. The greater the value of
--   <tt>compression</tt> the less likely value merging will happen.
data TDigest (compression :: Nat)

-- | Strict <a>foldl'</a> over <a>Foldable</a> structure.
tdigest :: (Foldable f, KnownNat comp) => f Double -> TDigest comp

-- | Make a <a>TDigest</a> of a single data point.
singleton :: KnownNat comp => Double -> TDigest comp

-- | Insert single value into <a>TDigest</a>.
insert :: KnownNat comp => Double -> TDigest comp -> TDigest comp

-- | Insert single value, don't compress <a>TDigest</a> even if needed.
--   
--   For sensibly bounded input, it makes sense to let <a>TDigest</a> grow
--   (it might grow linearly in size), and after that compress it once.
insert' :: KnownNat comp => Double -> TDigest comp -> TDigest comp

-- | Compress <a>TDigest</a>.
--   
--   Reinsert the centroids in "better" order (in original paper: in
--   random) so they have opportunity to merge.
--   
--   Compression will happen only if size is both: bigger than
--   <tt><a>relMaxSize</a> * comp</tt> and bigger than <a>absMaxSize</a>.
compress :: forall comp. KnownNat comp => TDigest comp -> TDigest comp

-- | Perform compression, even if current size says it's not necessary.
forceCompress :: forall comp. KnownNat comp => TDigest comp -> TDigest comp

-- | Total count of samples.
--   
--   <pre>
--   &gt;&gt;&gt; totalWeight (tdigest [1..100] :: TDigest 5)
--   100.0
--   </pre>
totalWeight :: TDigest comp -> Weight

-- | Center of left-most centroid. Note: may be different than min element
--   inserted.
--   
--   <pre>
--   &gt;&gt;&gt; minimumValue (tdigest [1..100] :: TDigest 3)
--   1.0
--   </pre>
minimumValue :: TDigest comp -> Mean

-- | Center of right-most centroid. Note: may be different than max element
--   inserted.
--   
--   <pre>
--   &gt;&gt;&gt; maximumValue (tdigest [1..100] :: TDigest 3)
--   99.0
--   </pre>
maximumValue :: TDigest comp -> Mean

-- | Calculate histogram based on the <a>TDigest</a>.
histogram :: TDigest comp -> Maybe (NonEmpty HistBin)

-- | Histogram bin
data HistBin
HistBin :: !Double -> !Double -> !Double -> !Double -> !Double -> HistBin

-- | lower bound
[hbMin] :: HistBin -> !Double

-- | upper bound
[hbMax] :: HistBin -> !Double

-- | original value: <tt>(mi + ma) / 2</tt>
[hbValue] :: HistBin -> !Double

-- | weight ("area" of the bar)
[hbWeight] :: HistBin -> !Double

-- | weight from the right
[hbCumWeight] :: HistBin -> !Double

-- | Median, i.e. <tt><a>quantile</a> 0.5</tt>.
median :: TDigest comp -> Maybe Double

-- | Calculate quantile of a specific value.
quantile :: Double -> TDigest comp -> Maybe Double

-- | Mean.
--   
--   <pre>
--   &gt;&gt;&gt; mean (tdigest [1..100] :: TDigest 10)
--   Just 50.5
--   </pre>
--   
--   <i>Note:</i> if you only need the mean, calculate it directly.
mean :: TDigest comp -> Maybe Double

-- | Variance.
variance :: TDigest comp -> Maybe Double

-- | Standard deviation, square root of variance.
--   
--   <pre>
--   &gt;&gt;&gt; stddev (tdigest $ fairshuffle [0..100] :: TDigest 10)
--   Just 29.1...
--   </pre>
stddev :: TDigest comp -> Maybe Double

-- | Cumulative distribution function.
--   
--   <i>Note:</i> if this is the only thing you need, it's more efficient
--   to count this directly.
cdf :: Double -> TDigest comp -> Double

-- | Alias of <a>quantile</a>.
icdf :: Double -> TDigest comp -> Maybe Double

-- | <pre>
--   <a>isRight</a> . <a>validate</a>
--   </pre>
valid :: TDigest comp -> Bool

-- | Check various invariants in the <a>TDigest</a> tree.
validate :: TDigest comp -> Either String (TDigest comp)

-- | Output the <a>TDigest</a> tree.
debugPrint :: TDigest comp -> IO ()

-- | Validate that list of <a>HistBin</a> is a valid "histogram".
validateHistogram :: Foldable f => f HistBin -> Either String (f HistBin)


-- | This is non empty version of <a>TDigest</a>, i.e. this is not a
--   <a>Monoid</a>, but on the other hand, <a>quantile</a> returns
--   <a>Double</a> not <tt><a>Maybe</a> <a>Double</a></tt>.
--   
--   See <a>Data.TDigest</a> for documentation. The exports should be
--   similar, sans non-<a>Maybe</a> results.
--   
--   <h3>Examples</h3>
--   
--   <pre>
--   &gt;&gt;&gt; quantile 0.99 (tdigest (1 :| [2..1000]) :: TDigest 25)
--   990.5
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; quantile 0.99 (tdigest (1 :| [2..1000]) :: TDigest 3)
--   989.0...
--   </pre>
--   
--   t-Digest is more precise in tails, especially median is imprecise:
--   
--   <pre>
--   &gt;&gt;&gt; median (forceCompress $ tdigest (1 :| [2..1000]) :: TDigest 25)
--   497.6...
--   </pre>
module Data.TDigest.NonEmpty
data TDigest comp
tdigest :: (Foldable1 f, KnownNat comp) => f Double -> TDigest comp
singleton :: KnownNat comp => Double -> TDigest comp
insert :: KnownNat comp => Double -> TDigest comp -> TDigest comp
insert' :: KnownNat comp => Double -> TDigest comp -> TDigest comp
compress :: forall comp. KnownNat comp => TDigest comp -> TDigest comp
forceCompress :: forall comp. KnownNat comp => TDigest comp -> TDigest comp
totalWeight :: TDigest comp -> Weight
minimumValue :: TDigest comp -> Mean
maximumValue :: TDigest comp -> Mean
histogram :: TDigest comp -> NonEmpty HistBin

-- | Histogram bin
data HistBin
HistBin :: !Double -> !Double -> !Double -> !Double -> !Double -> HistBin

-- | lower bound
[hbMin] :: HistBin -> !Double

-- | upper bound
[hbMax] :: HistBin -> !Double

-- | original value: <tt>(mi + ma) / 2</tt>
[hbValue] :: HistBin -> !Double

-- | weight ("area" of the bar)
[hbWeight] :: HistBin -> !Double

-- | weight from the right
[hbCumWeight] :: HistBin -> !Double
median :: TDigest comp -> Double
quantile :: Double -> TDigest comp -> Double
mean :: TDigest comp -> Double
variance :: TDigest comp -> Double
stddev :: TDigest comp -> Double
cdf :: Double -> TDigest comp -> Double

-- | Alias of <a>quantile</a>.
icdf :: Double -> TDigest comp -> Double
instance Control.DeepSeq.NFData (Data.TDigest.NonEmpty.TDigest comp)
instance GHC.Show.Show (Data.TDigest.NonEmpty.TDigest comp)
instance GHC.TypeNats.KnownNat comp => Data.Semigroup.Semigroup (Data.TDigest.NonEmpty.TDigest comp)
instance GHC.TypeNats.KnownNat comp => Data.Semigroup.Reducer.Reducer GHC.Types.Double (Data.TDigest.NonEmpty.TDigest comp)
instance GHC.TypeNats.KnownNat comp => Data.Binary.Class.Binary (Data.TDigest.NonEmpty.TDigest comp)
