-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Statistical functions from the statistics package implemented as
--   Folds.
--   
--   The use of this package allows statistics to be computed using at most
--   two passes over the input data, one to compute a mean and one to
--   compute a further statistic such as variance and <i>n</i>th central
--   moments. All algorithms are the obvious implementation of Bryan
--   O'Sullivan's <a>statistics</a> package imeplemented as <a>Fold</a>s
--   from the <a>foldl</a> package.
@package foldl-statistics
@version 0.1.5.0


module Control.Foldl.Statistics

-- | The difference between the largest and smallest elements of a sample.
range :: Fold Double Double

-- | A numerically stable sum using Kahan-Babuška-Neumaier summation from
--   <a>Numeric.Sum</a>
sum' :: Fold Double Double

-- | Create a histogram of each value of type a. Useful for folding over
--   categorical values, for example, a CSV where you have a data type for
--   a selection of categories.
--   
--   It should not be used for continuous values which would lead to a high
--   number of keys. One way to avoid this is to use the <a>Profunctor</a>
--   instance for <a>Fold</a> to break your values into categories. For an
--   example of doing this, see <a>ordersOfMagnitude</a>.
histogram :: Ord a => Fold a (Map a Int)

-- | Like <a>histogram</a>, but for use when hashmaps would be more
--   efficient for the particular type <tt>a</tt>.
histogram' :: (Hashable a, Eq a) => Fold a (HashMap a Int)

-- | Provides a histogram of the orders of magnitude of the values in a
--   series. Negative values are placed in the <tt>0.0</tt> category due to
--   the behaviour of <a>logBase</a>. it may be useful to use <tt>lmap
--   abs</tt> on this Fold to get a histogram of the absolute magnitudes.
ordersOfMagnitude :: Fold Double (Map Double Int)

-- | Arithmetic mean. This uses Kahan-Babuška-Neumaier summation, so is
--   more accurate than <a>welfordMean</a> unless the input values are very
--   large.
--   
--   Since foldl-1.2.2, <a>Foldl</a> exports a <a>mean</a> function, so you
--   will have to hide one.
mean :: Fold Double Double

-- | Arithmetic mean. This uses Welford's algorithm to provide numerical
--   stability, using a single pass over the sample data.
--   
--   Compared to <a>mean</a>, this loses a surprising amount of precision
--   unless the inputs are very large.
welfordMean :: Fold Double Double

-- | Arithmetic mean for weighted sample. It uses a single-pass algorithm
--   analogous to the one used by <a>welfordMean</a>.
meanWeighted :: Fold (Double, Double) Double

-- | Harmonic mean.
harmonicMean :: Fold Double Double

-- | Geometric mean of a sample containing no negative values.
geometricMean :: Fold Double Double

-- | Compute the <i>k</i>th central moment of a sample. The central moment
--   is also known as the moment about the mean.
--   
--   This function requires the mean of the data to compute the central
--   moment.
--   
--   For samples containing many values very close to the mean, this
--   function is subject to inaccuracy due to catastrophic cancellation.
centralMoment :: Int -> Double -> Fold Double Double

-- | Compute the <i>k</i>th and <i>j</i>th central moments of a sample.
--   
--   This fold requires the mean of the data to be known.
--   
--   For samples containing many values very close to the mean, this
--   function is subject to inaccuracy due to catastrophic cancellation.
centralMoments :: Int -> Int -> Double -> Fold Double (Double, Double)

-- | Compute the <i>k</i>th and <i>j</i>th central moments of a sample.
--   
--   This fold requires the mean of the data to be known.
--   
--   This variation of <a>centralMoments</a> uses Kahan-Babuška-Neumaier
--   summation to attempt to improve the accuracy of results, which may
--   make computation slower.
centralMoments' :: Int -> Int -> Double -> Fold Double (Double, Double)

-- | Compute the skewness of a sample. This is a measure of the asymmetry
--   of its distribution.
--   
--   A sample with negative skew is said to be <i>left-skewed</i>. Most of
--   its mass is on the right of the distribution, with the tail on the
--   left.
--   
--   <pre>
--   skewness $ U.to [1,100,101,102,103]
--   ==&gt; -1.497681449918257
--   </pre>
--   
--   A sample with positive skew is said to be <i>right-skewed</i>.
--   
--   <pre>
--   skewness $ U.to [1,2,3,4,100]
--   ==&gt; 1.4975367033335198
--   </pre>
--   
--   A sample's skewness is not defined if its <a>variance</a> is zero.
--   
--   This fold requires the mean of the data to be known.
--   
--   For samples containing many values very close to the mean, this
--   function is subject to inaccuracy due to catastrophic cancellation.
skewness :: Double -> Fold Double Double

-- | Compute the excess kurtosis of a sample. This is a measure of the
--   "peakedness" of its distribution. A high kurtosis indicates that more
--   of the sample's variance is due to infrequent severe deviations,
--   rather than more frequent modest deviations.
--   
--   A sample's excess kurtosis is not defined if its <a>variance</a> is
--   zero.
--   
--   This fold requires the mean of the data to be known.
--   
--   For samples containing many values very close to the mean, this
--   function is subject to inaccuracy due to catastrophic cancellation.
kurtosis :: Double -> Fold Double Double

-- | Maximum likelihood estimate of a sample's variance. Also known as the
--   population variance, where the denominator is <i>n</i>.
variance :: Double -> Fold Double Double

-- | Unbiased estimate of a sample's variance. Also known as the sample
--   variance, where the denominator is <i>n</i>-1.
varianceUnbiased :: Double -> Fold Double Double

-- | Standard deviation. This is simply the square root of the unbiased
--   estimate of the variance.
stdDev :: Double -> Fold Double Double

-- | Weighted variance. This is biased estimation. Requires the weighted
--   mean of the input data.
varianceWeighted :: Double -> Fold (Double, Double) Double

-- | Maximum likelihood estimate of a sample's variance.
fastVariance :: Fold Double Double

-- | Maximum likelihood estimate of a sample's variance.
fastVarianceUnbiased :: Fold Double Double

-- | Standard deviation. This is simply the square root of the maximum
--   likelihood estimate of the variance.
fastStdDev :: Fold Double Double

-- | Efficiently compute the <b>length, mean, variance, skewness and
--   kurtosis</b> with a single pass.
--   
--   <i>Since: 0.1.1.0</i>
fastLMVSK :: Fold Double LMVSK

-- | Efficiently compute the <b>length, mean, unbiased variance, skewness
--   and kurtosis</b> with a single pass.
--   
--   <i>Since: 0.1.3.0</i>
fastLMVSKu :: Fold Double LMVSK

-- | When returned by <a>fastLMVSK</a>, contains the count, mean, variance,
--   skewness and kurtosis of a series of samples.
--   
--   <i>Since: 0.1.1.0</i>
data LMVSK
LMVSK :: {-# UNPACK #-} !Int -> {-# UNPACK #-} !Double -> {-# UNPACK #-} !Double -> {-# UNPACK #-} !Double -> {-# UNPACK #-} !Double -> LMVSK
[lmvskCount] :: LMVSK -> {-# UNPACK #-} !Int
[lmvskMean] :: LMVSK -> {-# UNPACK #-} !Double
[lmvskVariance] :: LMVSK -> {-# UNPACK #-} !Double
[lmvskSkewness] :: LMVSK -> {-# UNPACK #-} !Double
[lmvskKurtosis] :: LMVSK -> {-# UNPACK #-} !Double
data LMVSKState

-- | Performs the heavy lifting of fastLMVSK. This is exposed because the
--   internal <a>LMVSKState</a> is monoidal, allowing you to run these
--   statistics in parallel over datasets which are split and then combine
--   the results.
--   
--   <i>Since: 0.1.2.0</i>
foldLMVSKState :: Fold Double LMVSKState

-- | Returns the stats which have been computed in a LMVSKState.
--   
--   <i>Since: 0.1.2.0</i>
getLMVSK :: LMVSKState -> LMVSK

-- | Returns the stats which have been computed in a LMVSKState, with the
--   unbiased variance.
--   
--   <i>Since: 0.1.2.0</i>
getLMVSKu :: LMVSKState -> LMVSK

-- | Computes the <b>slope, (Y) intercept and correlation</b> of
--   <tt>(x,y)</tt> pairs, as well as the <a>LMVSK</a> stats for both the x
--   and y series.
--   
--   <pre>
--   &gt;&gt;&gt; F.fold fastLinearReg $ map (\x -&gt; (x,3*x+7)) [1..100]
--   LinRegResult
--     {lrrSlope = 3.0
--     , lrrIntercept = 7.0
--     , lrrCorrelation = 100.0
--     , lrrXStats = LMVSK
--         {lmvskCount = 100
--         , lmvskMean = 50.5
--         , lmvskVariance = 833.25
--         , lmvskSkewness = 0.0
--         , lmvskKurtosis = -1.2002400240024003}
--     , lrrYStats = LMVSK
--         {lmvskCount = 100
--         , lmvskMean = 158.5
--         , lmvskVariance = 7499.25
--         , lmvskSkewness = 0.0
--         , lmvskKurtosis = -1.2002400240024003}
--     }
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; F.fold fastLinearReg $ map (\x -&gt; (x,0.005*x*x+3*x+7)) [1..100]
--   LinRegResult
--     {lrrSlope = 3.5049999999999994
--     , lrrIntercept = -1.5849999999999795
--     , lrrCorrelation = 99.93226275740273
--     , lrrXStats = LMVSK
--         {lmvskCount = 100
--         , lmvskMean = 50.5
--         , lmvskVariance = 833.25
--         , lmvskSkewness = 0.0
--         , lmvskKurtosis = -1.2002400240024003}
--     , lrrYStats = LMVSK
--         {lmvskCount = 100
--         , lmvskMean = 175.4175
--         , lmvskVariance = 10250.37902625
--         , lmvskSkewness = 9.862971188165422e-2
--         , lmvskKurtosis = -1.1923628437011482}
--     }
--   </pre>
--   
--   <i>Since: 0.1.1.0</i>
fastLinearReg :: Fold (Double, Double) LinRegResult

-- | Performs the heavy lifting for <tt>fastLinReg</tt>. Exposed because
--   <a>LinRegState</a> is a <a>Monoid</a>, allowing statistics to be
--   computed on datasets in parallel and combined afterwards.
--   
--   <i>Since: 0.1.4.0</i>
foldLinRegState :: Fold (Double, Double) LinRegState

-- | Produces the slope, Y intercept, correlation and LMVSK stats from a
--   <a>LinRegState</a>.
--   
--   <i>Since: 0.1.4.0</i>
getLinRegResult :: LinRegState -> LinRegResult

-- | When returned by <a>fastLinearReg</a>, contains the count, slope,
--   intercept and correlation of combining <tt>(x,y)</tt> pairs.
--   
--   <i>Since: 0.1.1.0</i>
data LinRegResult
LinRegResult :: {-# UNPACK #-} !Double -> {-# UNPACK #-} !Double -> {-# UNPACK #-} !Double -> {-# UNPACK #-} !LMVSK -> {-# UNPACK #-} !LMVSK -> LinRegResult
[lrrSlope] :: LinRegResult -> {-# UNPACK #-} !Double
[lrrIntercept] :: LinRegResult -> {-# UNPACK #-} !Double
[lrrCorrelation] :: LinRegResult -> {-# UNPACK #-} !Double
[lrrXStats] :: LinRegResult -> {-# UNPACK #-} !LMVSK
[lrrYStats] :: LinRegResult -> {-# UNPACK #-} !LMVSK

-- | The Monoidal state used to compute linear regression, see
--   <a>fastLinearReg</a>.
--   
--   <i>Since: 0.1.4.0</i>
data LinRegState

-- | The number of elements which make up this <a>LinRegResult</a>
--   <i>Since: 0.1.4.1</i>
lrrCount :: LinRegResult -> Int

-- | Given the mean and standard deviation of two distributions, computes
--   the correlation between them, given the means and standard deviation
--   of the <tt>x</tt> and <tt>y</tt> series. The results may be more
--   accurate than those returned by <a>fastLinearReg</a>
correlation :: (Double, Double) -> (Double, Double) -> Fold (Double, Double) Double
instance GHC.Classes.Eq Control.Foldl.Statistics.LinRegResult
instance GHC.Show.Show Control.Foldl.Statistics.LinRegResult
instance GHC.Classes.Eq Control.Foldl.Statistics.LMVSK
instance GHC.Show.Show Control.Foldl.Statistics.LMVSK
instance Data.Semigroup.Semigroup Control.Foldl.Statistics.LinRegState
instance GHC.Base.Monoid Control.Foldl.Statistics.LinRegState
instance GHC.Base.Monoid Control.Foldl.Statistics.LMVSKState
instance Data.Semigroup.Semigroup Control.Foldl.Statistics.LMVSKState
