-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Automatic Differentiation
--   
--   Forward-, reverse- and mixed- mode automatic differentiation
--   combinators with a common API.
--   
--   Type-level "branding" is used to both prevent the end user from
--   confusing infinitesimals and to limit unsafe access to the
--   implementation details of each Mode.
--   
--   Each mode has a separate module full of combinators.
--   
--   <ul>
--   <li><tt>Numeric.AD.Mode.Forward</tt> provides basic forward-mode AD.
--   It is good for computing simple derivatives.</li>
--   <li><tt>Numeric.AD.Mode.Reverse</tt> uses benign side-effects to
--   compute reverse-mode AD. It is good for computing gradients in one
--   pass. It generates a Wengert list (linear tape) using
--   <tt>Data.Reflection</tt>.</li>
--   <li><tt>Numeric.AD.Mode.Kahn</tt> uses benign side-effects to compute
--   reverse-mode AD. It is good for computing gradients in one pass. It
--   generates a tree-like tape that needs to be topologically sorted in
--   the end.</li>
--   <li><tt>Numeric.AD.Mode.Sparse</tt> computes a sparse forward-mode AD
--   tower. It is good for higher derivatives or large numbers of
--   outputs.</li>
--   <li><tt>Numeric.AD.Mode.Tower</tt> computes a dense forward-mode AD
--   tower useful for higher derivatives of single input functions.</li>
--   <li><tt>Numeric.AD</tt> computes using whichever mode or combination
--   thereof is suitable to each individual combinator.</li>
--   </ul>
--   
--   While not every mode can provide all operations, the following basic
--   operations are supported, modified as appropriate by the suffixes
--   below:
--   
--   <ul>
--   <li><a>grad</a> computes the gradient (partial derivatives) of a
--   function at a point.</li>
--   <li><a>jacobian</a> computes the Jacobian matrix of a function at a
--   point.</li>
--   <li><a>diff</a> computes the derivative of a function at a point.</li>
--   <li><a>du</a> computes a directional derivative of a function at a
--   point.</li>
--   <li><a>hessian</a> computes the Hessian matrix (matrix of second
--   partial derivatives) of a function at a point.</li>
--   </ul>
--   
--   The following suffixes alter the meanings of the functions above as
--   follows:
--   
--   <ul>
--   <li><tt>'</tt> -- also return the answer</li>
--   <li><tt>With</tt> lets the user supply a function to blend the input
--   with the output</li>
--   <li><tt>F</tt> is a version of the base function lifted to return a
--   <a>Traversable</a> (or <a>Functor</a>) result</li>
--   <li><tt>s</tt> means the function returns all higher derivatives in a
--   list or f-branching <a>Stream</a></li>
--   <li><tt>T</tt> means the result is transposed with respect to the
--   traditional formulation.</li>
--   <li><tt>0</tt> means that the resulting derivative list is padded with
--   0s at the end.</li>
--   <li><tt>NoEq</tt> means that an infinite list of converging values is
--   returned rather than truncating the list when they become
--   constant</li>
--   </ul>
@package ad
@version 4.3.5


module Numeric.AD.Jet

-- | A <a>Jet</a> is a tower of all (higher order) partial derivatives of a
--   function
--   
--   At each step, a <tt><a>Jet</a> f</tt> is wrapped in another layer
--   worth of <tt>f</tt>.
--   
--   <pre>
--   a :- f a :- f (f a) :- f (f (f a)) :- ...
--   </pre>
data Jet f a
(:-) :: a -> Jet f (f a) -> Jet f a

-- | Take the head of a <a>Jet</a>.
headJet :: Jet f a -> a

-- | Take the tail of a <a>Jet</a>.
tailJet :: Jet f a -> Jet f (f a)

-- | Construct a <a>Jet</a> by unzipping the layers of a <a>Cofree</a>
--   <tt>Comonad</tt>.
jet :: Functor f => Cofree f a -> Jet f a
instance GHC.Show.Show Numeric.AD.Jet.Showable
instance (GHC.Base.Functor f, GHC.Show.Show (f Numeric.AD.Jet.Showable), GHC.Show.Show a) => GHC.Show.Show (Numeric.AD.Jet.Jet f a)
instance GHC.Base.Functor f => GHC.Base.Functor (Numeric.AD.Jet.Jet f)
instance Data.Foldable.Foldable f => Data.Foldable.Foldable (Numeric.AD.Jet.Jet f)
instance Data.Traversable.Traversable f => Data.Traversable.Traversable (Numeric.AD.Jet.Jet f)


module Numeric.AD.Mode
class (Num t, Num (Scalar t)) => Mode t where {
    type family Scalar t;
}

-- | allowed to return False for items with a zero derivative, but we'll
--   give more NaNs than strictly necessary
isKnownConstant :: Mode t => t -> Bool

-- | allowed to return False for zero, but we give more NaN's than strictly
--   necessary
isKnownZero :: Mode t => t -> Bool

-- | Embed a constant
auto :: Mode t => Scalar t -> t

-- | Scalar-vector multiplication
(*^) :: Mode t => Scalar t -> t -> t

-- | Vector-scalar multiplication
(^*) :: Mode t => t -> Scalar t -> t

-- | Scalar division
(^/) :: (Mode t, Fractional (Scalar t)) => t -> Scalar t -> t

-- | <pre>
--   <a>zero</a> = <tt>lift</tt> 0
--   </pre>
zero :: Mode t => t
instance Numeric.AD.Mode.Mode GHC.Types.Double
instance Numeric.AD.Mode.Mode GHC.Types.Float
instance Numeric.AD.Mode.Mode GHC.Types.Int
instance Numeric.AD.Mode.Mode GHC.Integer.Type.Integer
instance Numeric.AD.Mode.Mode GHC.Int.Int8
instance Numeric.AD.Mode.Mode GHC.Int.Int16
instance Numeric.AD.Mode.Mode GHC.Int.Int32
instance Numeric.AD.Mode.Mode GHC.Int.Int64
instance Numeric.AD.Mode.Mode GHC.Natural.Natural
instance Numeric.AD.Mode.Mode GHC.Types.Word
instance Numeric.AD.Mode.Mode GHC.Word.Word8
instance Numeric.AD.Mode.Mode GHC.Word.Word16
instance Numeric.AD.Mode.Mode GHC.Word.Word32
instance Numeric.AD.Mode.Mode GHC.Word.Word64
instance GHC.Float.RealFloat a => Numeric.AD.Mode.Mode (Data.Complex.Complex a)
instance GHC.Real.Integral a => Numeric.AD.Mode.Mode (GHC.Real.Ratio a)


module Numeric.AD.Jacobian

-- | <a>Jacobian</a> is useful for defining new AD primitives in a fairly
--   generic way.
class (Mode t, Mode (D t), Num (D t)) => Jacobian t where {
    type family D t :: *;
}
unary :: Jacobian t => (Scalar t -> Scalar t) -> D t -> t -> t
lift1 :: Jacobian t => (Scalar t -> Scalar t) -> (D t -> D t) -> t -> t
lift1_ :: Jacobian t => (Scalar t -> Scalar t) -> (D t -> D t -> D t) -> t -> t
binary :: Jacobian t => (Scalar t -> Scalar t -> Scalar t) -> D t -> D t -> t -> t -> t
lift2 :: Jacobian t => (Scalar t -> Scalar t -> Scalar t) -> (D t -> D t -> (D t, D t)) -> t -> t -> t
lift2_ :: Jacobian t => (Scalar t -> Scalar t -> Scalar t) -> (D t -> D t -> D t -> (D t, D t)) -> t -> t -> t

module Numeric.AD.Internal.Type
newtype AD s a
AD :: a -> AD s a
[runAD] :: AD s a -> a
instance Data.Number.Erf.InvErf a => Data.Number.Erf.InvErf (Numeric.AD.Internal.Type.AD s a)
instance Data.Number.Erf.Erf a => Data.Number.Erf.Erf (Numeric.AD.Internal.Type.AD s a)
instance GHC.Float.RealFloat a => GHC.Float.RealFloat (Numeric.AD.Internal.Type.AD s a)
instance GHC.Real.RealFrac a => GHC.Real.RealFrac (Numeric.AD.Internal.Type.AD s a)
instance GHC.Enum.Enum a => GHC.Enum.Enum (Numeric.AD.Internal.Type.AD s a)
instance GHC.Float.Floating a => GHC.Float.Floating (Numeric.AD.Internal.Type.AD s a)
instance GHC.Real.Fractional a => GHC.Real.Fractional (Numeric.AD.Internal.Type.AD s a)
instance GHC.Real.Real a => GHC.Real.Real (Numeric.AD.Internal.Type.AD s a)
instance GHC.Num.Num a => GHC.Num.Num (Numeric.AD.Internal.Type.AD s a)
instance GHC.Enum.Bounded a => GHC.Enum.Bounded (Numeric.AD.Internal.Type.AD s a)
instance GHC.Read.Read a => GHC.Read.Read (Numeric.AD.Internal.Type.AD s a)
instance GHC.Show.Show a => GHC.Show.Show (Numeric.AD.Internal.Type.AD s a)
instance GHC.Classes.Ord a => GHC.Classes.Ord (Numeric.AD.Internal.Type.AD s a)
instance GHC.Classes.Eq a => GHC.Classes.Eq (Numeric.AD.Internal.Type.AD s a)
instance Numeric.AD.Mode.Mode a => Numeric.AD.Mode.Mode (Numeric.AD.Internal.Type.AD s a)


module Numeric.AD.Internal.Or

-- | The choice between two AD modes is an AD mode in its own right
data Or s a b
[L] :: a -> Or F a b
[R] :: b -> Or T a b
data F
data T
runL :: Or F a b -> a
runR :: Or T a b -> b
class Chosen s
choose :: Chosen s => a -> b -> Or s a b
chosen :: (a -> r) -> (b -> r) -> Or s a b -> r
unary :: (a -> a) -> (b -> b) -> Or s a b -> Or s a b
binary :: (a -> a -> a) -> (b -> b -> b) -> Or s a b -> Or s a b -> Or s a b
instance Numeric.AD.Internal.Or.Chosen Numeric.AD.Internal.Or.F
instance Numeric.AD.Internal.Or.Chosen Numeric.AD.Internal.Or.T
instance (GHC.Enum.Enum a, GHC.Enum.Enum b, Numeric.AD.Internal.Or.Chosen s) => GHC.Enum.Enum (Numeric.AD.Internal.Or.Or s a b)
instance (GHC.Enum.Bounded a, GHC.Enum.Bounded b, Numeric.AD.Internal.Or.Chosen s) => GHC.Enum.Bounded (Numeric.AD.Internal.Or.Or s a b)
instance (GHC.Num.Num a, GHC.Num.Num b, Numeric.AD.Internal.Or.Chosen s) => GHC.Num.Num (Numeric.AD.Internal.Or.Or s a b)
instance (GHC.Real.Real a, GHC.Real.Real b, Numeric.AD.Internal.Or.Chosen s) => GHC.Real.Real (Numeric.AD.Internal.Or.Or s a b)
instance (GHC.Real.Fractional a, GHC.Real.Fractional b, Numeric.AD.Internal.Or.Chosen s) => GHC.Real.Fractional (Numeric.AD.Internal.Or.Or s a b)
instance (GHC.Real.RealFrac a, GHC.Real.RealFrac b, Numeric.AD.Internal.Or.Chosen s) => GHC.Real.RealFrac (Numeric.AD.Internal.Or.Or s a b)
instance (GHC.Float.Floating a, GHC.Float.Floating b, Numeric.AD.Internal.Or.Chosen s) => GHC.Float.Floating (Numeric.AD.Internal.Or.Or s a b)
instance (Data.Number.Erf.Erf a, Data.Number.Erf.Erf b, Numeric.AD.Internal.Or.Chosen s) => Data.Number.Erf.Erf (Numeric.AD.Internal.Or.Or s a b)
instance (Data.Number.Erf.InvErf a, Data.Number.Erf.InvErf b, Numeric.AD.Internal.Or.Chosen s) => Data.Number.Erf.InvErf (Numeric.AD.Internal.Or.Or s a b)
instance (GHC.Float.RealFloat a, GHC.Float.RealFloat b, Numeric.AD.Internal.Or.Chosen s) => GHC.Float.RealFloat (Numeric.AD.Internal.Or.Or s a b)
instance (Numeric.AD.Mode.Mode a, Numeric.AD.Mode.Mode b, Numeric.AD.Internal.Or.Chosen s, Numeric.AD.Mode.Scalar a ~ Numeric.AD.Mode.Scalar b) => Numeric.AD.Mode.Mode (Numeric.AD.Internal.Or.Or s a b)
instance (GHC.Classes.Eq a, GHC.Classes.Eq b) => GHC.Classes.Eq (Numeric.AD.Internal.Or.Or s a b)
instance (GHC.Classes.Ord a, GHC.Classes.Ord b) => GHC.Classes.Ord (Numeric.AD.Internal.Or.Or s a b)


module Numeric.AD.Internal.On

-- | The composition of two AD modes is an AD mode in its own right
newtype On t
On :: t -> On t
[off] :: On t -> t
instance GHC.Float.RealFloat t => GHC.Float.RealFloat (Numeric.AD.Internal.On.On t)
instance Data.Number.Erf.InvErf t => Data.Number.Erf.InvErf (Numeric.AD.Internal.On.On t)
instance Data.Number.Erf.Erf t => Data.Number.Erf.Erf (Numeric.AD.Internal.On.On t)
instance GHC.Float.Floating t => GHC.Float.Floating (Numeric.AD.Internal.On.On t)
instance GHC.Real.RealFrac t => GHC.Real.RealFrac (Numeric.AD.Internal.On.On t)
instance GHC.Real.Fractional t => GHC.Real.Fractional (Numeric.AD.Internal.On.On t)
instance GHC.Real.Real t => GHC.Real.Real (Numeric.AD.Internal.On.On t)
instance GHC.Num.Num t => GHC.Num.Num (Numeric.AD.Internal.On.On t)
instance GHC.Enum.Bounded t => GHC.Enum.Bounded (Numeric.AD.Internal.On.On t)
instance GHC.Classes.Ord t => GHC.Classes.Ord (Numeric.AD.Internal.On.On t)
instance GHC.Enum.Enum t => GHC.Enum.Enum (Numeric.AD.Internal.On.On t)
instance GHC.Classes.Eq t => GHC.Classes.Eq (Numeric.AD.Internal.On.On t)
instance (Numeric.AD.Mode.Mode t, Numeric.AD.Mode.Mode (Numeric.AD.Mode.Scalar t)) => Numeric.AD.Mode.Mode (Numeric.AD.Internal.On.On t)


module Numeric.AD.Internal.Identity
newtype Id a
Id :: a -> Id a
[runId] :: Id a -> a
probe :: a -> Id a
unprobe :: Id a -> a
probed :: Functor f => f a -> f (Id a)
unprobed :: Functor f => f (Id a) -> f a
instance Data.Number.Erf.InvErf a => Data.Number.Erf.InvErf (Numeric.AD.Internal.Identity.Id a)
instance Data.Number.Erf.Erf a => Data.Number.Erf.Erf (Numeric.AD.Internal.Identity.Id a)
instance Data.Data.Data a => Data.Data.Data (Numeric.AD.Internal.Identity.Id a)
instance GHC.Base.Monoid a => GHC.Base.Monoid (Numeric.AD.Internal.Identity.Id a)
instance Data.Semigroup.Semigroup a => Data.Semigroup.Semigroup (Numeric.AD.Internal.Identity.Id a)
instance GHC.Float.RealFloat a => GHC.Float.RealFloat (Numeric.AD.Internal.Identity.Id a)
instance GHC.Real.RealFrac a => GHC.Real.RealFrac (Numeric.AD.Internal.Identity.Id a)
instance GHC.Float.Floating a => GHC.Float.Floating (Numeric.AD.Internal.Identity.Id a)
instance GHC.Real.Fractional a => GHC.Real.Fractional (Numeric.AD.Internal.Identity.Id a)
instance GHC.Real.Real a => GHC.Real.Real (Numeric.AD.Internal.Identity.Id a)
instance GHC.Num.Num a => GHC.Num.Num (Numeric.AD.Internal.Identity.Id a)
instance GHC.Enum.Bounded a => GHC.Enum.Bounded (Numeric.AD.Internal.Identity.Id a)
instance GHC.Enum.Enum a => GHC.Enum.Enum (Numeric.AD.Internal.Identity.Id a)
instance GHC.Show.Show a => GHC.Show.Show (Numeric.AD.Internal.Identity.Id a)
instance GHC.Classes.Ord a => GHC.Classes.Ord (Numeric.AD.Internal.Identity.Id a)
instance GHC.Classes.Eq a => GHC.Classes.Eq (Numeric.AD.Internal.Identity.Id a)
instance GHC.Num.Num a => Numeric.AD.Mode.Mode (Numeric.AD.Internal.Identity.Id a)


module Numeric.AD.Internal.Tower

-- | <tt>Tower</tt> is an AD <a>Mode</a> that calculates a tangent tower by
--   forward AD, and provides fast <tt>diffsUU</tt>, <tt>diffsUF</tt>
newtype Tower a
Tower :: [a] -> Tower a
[getTower] :: Tower a -> [a]
zeroPad :: Num a => [a] -> [a]
zeroPadF :: (Functor f, Num a) => [f a] -> [f a]
transposePadF :: (Foldable f, Functor f) => a -> f [a] -> [f a]
d :: Num a => [a] -> a
d' :: Num a => [a] -> (a, a)
withD :: (a, a) -> Tower a
tangents :: Tower a -> Tower a
bundle :: a -> Tower a -> Tower a
apply :: Num a => (Tower a -> b) -> a -> b
getADTower :: Tower a -> [a]
tower :: [a] -> Tower a
instance Data.Data.Data a => Data.Data.Data (Numeric.AD.Internal.Tower.Tower a)
instance GHC.Show.Show a => GHC.Show.Show (Numeric.AD.Internal.Tower.Tower a)
instance GHC.Num.Num a => Numeric.AD.Mode.Mode (Numeric.AD.Internal.Tower.Tower a)
instance GHC.Num.Num a => Numeric.AD.Jacobian.Jacobian (Numeric.AD.Internal.Tower.Tower a)
instance (GHC.Num.Num a, GHC.Classes.Eq a) => GHC.Classes.Eq (Numeric.AD.Internal.Tower.Tower a)
instance (GHC.Num.Num a, GHC.Classes.Ord a) => GHC.Classes.Ord (Numeric.AD.Internal.Tower.Tower a)
instance (GHC.Num.Num a, GHC.Enum.Bounded a) => GHC.Enum.Bounded (Numeric.AD.Internal.Tower.Tower a)
instance GHC.Num.Num a => GHC.Num.Num (Numeric.AD.Internal.Tower.Tower a)
instance GHC.Real.Fractional a => GHC.Real.Fractional (Numeric.AD.Internal.Tower.Tower a)
instance GHC.Float.Floating a => GHC.Float.Floating (Numeric.AD.Internal.Tower.Tower a)
instance (GHC.Num.Num a, GHC.Enum.Enum a) => GHC.Enum.Enum (Numeric.AD.Internal.Tower.Tower a)
instance GHC.Real.Real a => GHC.Real.Real (Numeric.AD.Internal.Tower.Tower a)
instance GHC.Float.RealFloat a => GHC.Float.RealFloat (Numeric.AD.Internal.Tower.Tower a)
instance GHC.Real.RealFrac a => GHC.Real.RealFrac (Numeric.AD.Internal.Tower.Tower a)
instance Data.Number.Erf.Erf a => Data.Number.Erf.Erf (Numeric.AD.Internal.Tower.Tower a)
instance Data.Number.Erf.InvErf a => Data.Number.Erf.InvErf (Numeric.AD.Internal.Tower.Tower a)


-- | Unsafe and often partial combinators intended for internal usage.
--   
--   Handle with care.
module Numeric.AD.Internal.Sparse
newtype Monomial
Monomial :: (IntMap Int) -> Monomial
emptyMonomial :: Monomial
addToMonomial :: Int -> Monomial -> Monomial
indices :: Monomial -> [Int]

-- | We only store partials in sorted order, so the map contained in a
--   partial will only contain partials with equal or greater keys to that
--   of the map in which it was found. This should be key for efficiently
--   computing sparse hessians. there are only (n + k - 1) choose (k - 1)
--   distinct nth partial derivatives of a function with k inputs.
data Sparse a
Sparse :: !a -> (IntMap (Sparse a)) -> Sparse a
Zero :: Sparse a
apply :: (Traversable f, Num a) => (f (Sparse a) -> b) -> f a -> b
vars :: (Traversable f, Num a) => f a -> f (Sparse a)
d :: (Traversable f, Num a) => f b -> Sparse a -> f a
d' :: (Traversable f, Num a) => f a -> Sparse a -> (a, f a)
ds :: (Traversable f, Num a) => f b -> Sparse a -> Cofree f a
skeleton :: Traversable f => f a -> f Int
spartial :: Num a => [Int] -> Sparse a -> Maybe a
partial :: Num a => [Int] -> Sparse a -> a
vgrad :: Grad i o o' a => i -> o
vgrad' :: Grad i o o' a => i -> o'
vgrads :: Grads i o a => i -> o
class Num a => Grad i o o' a | i -> a o o', o -> a i o', o' -> a i o
pack :: Grad i o o' a => i -> [Sparse a] -> Sparse a
unpack :: Grad i o o' a => ([a] -> [a]) -> o
unpack' :: Grad i o o' a => ([a] -> (a, [a])) -> o'
class Num a => Grads i o a | i -> a o, o -> a i
packs :: Grads i o a => i -> [Sparse a] -> Sparse a
unpacks :: Grads i o a => ([a] -> Cofree [] a) -> o

-- | The value of the derivative of (f*g) of order mi is
--   
--   <pre>
--   <a>sum</a> [a * <a>primal</a> (<a>partialS</a> (<a>indices</a> b) f) * <a>primal</a> (<a>partialS</a> (<a>indices</a> c) g) | (a,b,c) &lt;- <a>terms</a> mi ]
--   </pre>
--   
--   It is a bit more complicated in <a>mul</a> below, since we build the
--   whole tree of derivatives and want to prune the tree with <a>Zero</a>s
--   as much as possible. The number of terms in the sum for order mi as of
--   differentiation has <tt><a>sum</a> (<a>map</a> (+1) as)</tt> terms, so
--   this is *much* more efficient than the naive recursive differentiation
--   with <tt>2^<a>sum</a> as</tt> terms. The coefficients <tt>a</tt>,
--   which collect equivalent derivatives, are suitable products of
--   binomial coefficients.
terms :: Monomial -> [(Integer, Monomial, Monomial)]
primal :: Num a => Sparse a -> a
instance Data.Data.Data a => Data.Data.Data (Numeric.AD.Internal.Sparse.Sparse a)
instance GHC.Show.Show a => GHC.Show.Show (Numeric.AD.Internal.Sparse.Sparse a)
instance GHC.Num.Num a => Numeric.AD.Internal.Sparse.Grads (Numeric.AD.Internal.Sparse.Sparse a) (Control.Comonad.Cofree.Cofree [] a) a
instance Numeric.AD.Internal.Sparse.Grads i o a => Numeric.AD.Internal.Sparse.Grads (Numeric.AD.Internal.Sparse.Sparse a -> i) (a -> o) a
instance GHC.Num.Num a => Numeric.AD.Internal.Sparse.Grad (Numeric.AD.Internal.Sparse.Sparse a) [a] (a, [a]) a
instance Numeric.AD.Internal.Sparse.Grad i o o' a => Numeric.AD.Internal.Sparse.Grad (Numeric.AD.Internal.Sparse.Sparse a -> i) (a -> o) (a -> o') a
instance GHC.Num.Num a => Numeric.AD.Mode.Mode (Numeric.AD.Internal.Sparse.Sparse a)
instance GHC.Num.Num a => Numeric.AD.Jacobian.Jacobian (Numeric.AD.Internal.Sparse.Sparse a)
instance (GHC.Num.Num a, GHC.Classes.Eq a) => GHC.Classes.Eq (Numeric.AD.Internal.Sparse.Sparse a)
instance (GHC.Num.Num a, GHC.Classes.Ord a) => GHC.Classes.Ord (Numeric.AD.Internal.Sparse.Sparse a)
instance (GHC.Num.Num a, GHC.Enum.Bounded a) => GHC.Enum.Bounded (Numeric.AD.Internal.Sparse.Sparse a)
instance GHC.Num.Num a => GHC.Num.Num (Numeric.AD.Internal.Sparse.Sparse a)
instance GHC.Real.Fractional a => GHC.Real.Fractional (Numeric.AD.Internal.Sparse.Sparse a)
instance GHC.Float.Floating a => GHC.Float.Floating (Numeric.AD.Internal.Sparse.Sparse a)
instance (GHC.Num.Num a, GHC.Enum.Enum a) => GHC.Enum.Enum (Numeric.AD.Internal.Sparse.Sparse a)
instance GHC.Real.Real a => GHC.Real.Real (Numeric.AD.Internal.Sparse.Sparse a)
instance GHC.Float.RealFloat a => GHC.Float.RealFloat (Numeric.AD.Internal.Sparse.Sparse a)
instance GHC.Real.RealFrac a => GHC.Real.RealFrac (Numeric.AD.Internal.Sparse.Sparse a)
instance Data.Number.Erf.Erf a => Data.Number.Erf.Erf (Numeric.AD.Internal.Sparse.Sparse a)
instance Data.Number.Erf.InvErf a => Data.Number.Erf.InvErf (Numeric.AD.Internal.Sparse.Sparse a)


-- | Reverse-Mode Automatic Differentiation using a single Wengert list (or
--   "tape").
--   
--   This version uses <tt>Data.Reflection</tt> to find and update the
--   tape.
--   
--   This is asymptotically faster than using <tt>Kahn</tt>, which is
--   forced to reify and topologically sort the graph, but it requires a
--   fairly expensive rendezvous during construction when updated using
--   multiple threads.
module Numeric.AD.Internal.Reverse
data Reverse s a
[Zero] :: Reverse s a
[Lift] :: a -> Reverse s a
[Reverse] :: {-# UNPACK #-} !Int -> a -> Reverse s a
newtype Tape
Tape :: IORef Head -> Tape
[getTape] :: Tape -> IORef Head
data Head
Head :: {-# UNPACK #-} !Int -> Cells -> Head
data Cells
[Nil] :: Cells
[Unary] :: {-# UNPACK #-} !Int -> a -> Cells -> Cells
[Binary] :: {-# UNPACK #-} !Int -> {-# UNPACK #-} !Int -> a -> a -> Cells -> Cells

-- | Construct a tape that starts with <tt>n</tt> variables.
reifyTape :: Int -> (forall s. Reifies s Tape => Proxy s -> r) -> r

-- | Extract the partials from the current chain for a given AD variable.
partials :: forall s a. (Reifies s Tape, Num a) => Reverse s a -> [a]

-- | Return an <a>Array</a> of <a>partials</a> given bounds for the
--   variable IDs.
partialArrayOf :: (Reifies s Tape, Num a) => Proxy s -> (Int, Int) -> Reverse s a -> Array Int a

-- | Return an <a>IntMap</a> of sparse partials
partialMapOf :: (Reifies s Tape, Num a) => Proxy s -> Reverse s a -> IntMap a

-- | Helper that extracts the derivative of a chain when the chain was
--   constructed with 1 variable.
derivativeOf :: (Reifies s Tape, Num a) => Proxy s -> Reverse s a -> a

-- | Helper that extracts both the primal and derivative of a chain when
--   the chain was constructed with 1 variable.
derivativeOf' :: (Reifies s Tape, Num a) => Proxy s -> Reverse s a -> (a, a)
bind :: Traversable f => f a -> (f (Reverse s a), (Int, Int))
unbind :: Functor f => f (Reverse s a) -> Array Int a -> f a
unbindMap :: (Functor f, Num a) => f (Reverse s a) -> IntMap a -> f a
unbindWith :: (Functor f, Num a) => (a -> b -> c) -> f (Reverse s a) -> Array Int b -> f c
unbindMapWithDefault :: (Functor f, Num a) => b -> (a -> b -> c) -> f (Reverse s a) -> IntMap b -> f c
var :: a -> Int -> Reverse s a
varId :: Reverse s a -> Int
primal :: Num a => Reverse s a -> a
instance GHC.Show.Show a => GHC.Show.Show (Numeric.AD.Internal.Reverse.Reverse s a)
instance (Data.Reflection.Reifies s Numeric.AD.Internal.Reverse.Tape, GHC.Num.Num a) => Numeric.AD.Mode.Mode (Numeric.AD.Internal.Reverse.Reverse s a)
instance (Data.Reflection.Reifies s Numeric.AD.Internal.Reverse.Tape, GHC.Num.Num a) => Numeric.AD.Jacobian.Jacobian (Numeric.AD.Internal.Reverse.Reverse s a)
instance (Data.Reflection.Reifies s Numeric.AD.Internal.Reverse.Tape, GHC.Num.Num a, GHC.Classes.Eq a) => GHC.Classes.Eq (Numeric.AD.Internal.Reverse.Reverse s a)
instance (Data.Reflection.Reifies s Numeric.AD.Internal.Reverse.Tape, GHC.Num.Num a, GHC.Classes.Ord a) => GHC.Classes.Ord (Numeric.AD.Internal.Reverse.Reverse s a)
instance (Data.Reflection.Reifies s Numeric.AD.Internal.Reverse.Tape, GHC.Num.Num a, GHC.Enum.Bounded a) => GHC.Enum.Bounded (Numeric.AD.Internal.Reverse.Reverse s a)
instance (Data.Reflection.Reifies s Numeric.AD.Internal.Reverse.Tape, GHC.Num.Num a) => GHC.Num.Num (Numeric.AD.Internal.Reverse.Reverse s a)
instance (Data.Reflection.Reifies s Numeric.AD.Internal.Reverse.Tape, GHC.Real.Fractional a) => GHC.Real.Fractional (Numeric.AD.Internal.Reverse.Reverse s a)
instance (Data.Reflection.Reifies s Numeric.AD.Internal.Reverse.Tape, GHC.Float.Floating a) => GHC.Float.Floating (Numeric.AD.Internal.Reverse.Reverse s a)
instance (Data.Reflection.Reifies s Numeric.AD.Internal.Reverse.Tape, GHC.Num.Num a, GHC.Enum.Enum a) => GHC.Enum.Enum (Numeric.AD.Internal.Reverse.Reverse s a)
instance (Data.Reflection.Reifies s Numeric.AD.Internal.Reverse.Tape, GHC.Real.Real a) => GHC.Real.Real (Numeric.AD.Internal.Reverse.Reverse s a)
instance (Data.Reflection.Reifies s Numeric.AD.Internal.Reverse.Tape, GHC.Float.RealFloat a) => GHC.Float.RealFloat (Numeric.AD.Internal.Reverse.Reverse s a)
instance (Data.Reflection.Reifies s Numeric.AD.Internal.Reverse.Tape, GHC.Real.RealFrac a) => GHC.Real.RealFrac (Numeric.AD.Internal.Reverse.Reverse s a)
instance (Data.Reflection.Reifies s Numeric.AD.Internal.Reverse.Tape, Data.Number.Erf.Erf a) => Data.Number.Erf.Erf (Numeric.AD.Internal.Reverse.Reverse s a)
instance (Data.Reflection.Reifies s Numeric.AD.Internal.Reverse.Tape, Data.Number.Erf.InvErf a) => Data.Number.Erf.InvErf (Numeric.AD.Internal.Reverse.Reverse s a)


-- | This module provides reverse-mode Automatic Differentiation
--   implementation using linear time topological sorting after the fact.
--   
--   For this form of reverse-mode AD we use <a>StableName</a> to recover
--   sharing information from the tape to avoid combinatorial explosion,
--   and thus run asymptotically faster than it could without such sharing
--   information, but the use of side-effects contained herein is benign.
module Numeric.AD.Internal.Kahn

-- | <tt>Kahn</tt> is a <a>Mode</a> using reverse-mode automatic
--   differentiation that provides fast <tt>diffFU</tt>, <tt>diff2FU</tt>,
--   <tt>grad</tt>, <tt>grad2</tt> and a fast <tt>jacobian</tt> when you
--   have a significantly smaller number of outputs than inputs.
newtype Kahn a
Kahn :: (Tape a (Kahn a)) -> Kahn a

-- | A <tt>Tape</tt> records the information needed back propagate from the
--   output to each input during reverse <a>Mode</a> AD.
data Tape a t
Zero :: Tape a t
Lift :: !a -> Tape a t
Var :: !a -> {-# UNPACK #-} !Int -> Tape a t
Binary :: !a -> a -> a -> t -> t -> Tape a t
Unary :: !a -> a -> t -> Tape a t

-- | This returns a list of contributions to the partials. The variable ids
--   returned in the list are likely <i>not</i> unique!
partials :: forall a. Num a => Kahn a -> [(Int, a)]

-- | Return an <a>Array</a> of <a>partials</a> given bounds for the
--   variable IDs.
partialArray :: Num a => (Int, Int) -> Kahn a -> Array Int a

-- | Return an <a>IntMap</a> of sparse partials
partialMap :: Num a => Kahn a -> IntMap a
derivative :: Num a => Kahn a -> a
derivative' :: Num a => Kahn a -> (a, a)
vgrad :: Grad i o o' a => i -> o
vgrad' :: Grad i o o' a => i -> o'
class Num a => Grad i o o' a | i -> a o o', o -> a i o', o' -> a i o
pack :: Grad i o o' a => i -> [Kahn a] -> Kahn a
unpack :: Grad i o o' a => ([a] -> [a]) -> o
unpack' :: Grad i o o' a => ([a] -> (a, [a])) -> o'
bind :: Traversable f => f a -> (f (Kahn a), (Int, Int))
unbind :: Functor f => f (Kahn a) -> Array Int a -> f a
unbindMap :: (Functor f, Num a) => f (Kahn a) -> IntMap a -> f a
unbindWith :: (Functor f, Num a) => (a -> b -> c) -> f (Kahn a) -> Array Int b -> f c
unbindMapWithDefault :: (Functor f, Num a) => b -> (a -> b -> c) -> f (Kahn a) -> IntMap b -> f c
primal :: Num a => Kahn a -> a
var :: a -> Int -> Kahn a
varId :: Kahn a -> Int
instance GHC.Show.Show a => GHC.Show.Show (Numeric.AD.Internal.Kahn.Kahn a)
instance (Data.Data.Data t, Data.Data.Data a) => Data.Data.Data (Numeric.AD.Internal.Kahn.Tape a t)
instance (GHC.Show.Show t, GHC.Show.Show a) => GHC.Show.Show (Numeric.AD.Internal.Kahn.Tape a t)
instance GHC.Num.Num a => Numeric.AD.Internal.Kahn.Grad (Numeric.AD.Internal.Kahn.Kahn a) [a] (a, [a]) a
instance Numeric.AD.Internal.Kahn.Grad i o o' a => Numeric.AD.Internal.Kahn.Grad (Numeric.AD.Internal.Kahn.Kahn a -> i) (a -> o) (a -> o') a
instance Data.Reify.MuRef (Numeric.AD.Internal.Kahn.Kahn a)
instance GHC.Num.Num a => Numeric.AD.Mode.Mode (Numeric.AD.Internal.Kahn.Kahn a)
instance GHC.Num.Num a => Numeric.AD.Jacobian.Jacobian (Numeric.AD.Internal.Kahn.Kahn a)
instance (GHC.Num.Num a, GHC.Classes.Eq a) => GHC.Classes.Eq (Numeric.AD.Internal.Kahn.Kahn a)
instance (GHC.Num.Num a, GHC.Classes.Ord a) => GHC.Classes.Ord (Numeric.AD.Internal.Kahn.Kahn a)
instance (GHC.Num.Num a, GHC.Enum.Bounded a) => GHC.Enum.Bounded (Numeric.AD.Internal.Kahn.Kahn a)
instance GHC.Num.Num a => GHC.Num.Num (Numeric.AD.Internal.Kahn.Kahn a)
instance GHC.Real.Fractional a => GHC.Real.Fractional (Numeric.AD.Internal.Kahn.Kahn a)
instance GHC.Float.Floating a => GHC.Float.Floating (Numeric.AD.Internal.Kahn.Kahn a)
instance (GHC.Num.Num a, GHC.Enum.Enum a) => GHC.Enum.Enum (Numeric.AD.Internal.Kahn.Kahn a)
instance GHC.Real.Real a => GHC.Real.Real (Numeric.AD.Internal.Kahn.Kahn a)
instance GHC.Float.RealFloat a => GHC.Float.RealFloat (Numeric.AD.Internal.Kahn.Kahn a)
instance GHC.Real.RealFrac a => GHC.Real.RealFrac (Numeric.AD.Internal.Kahn.Kahn a)
instance Data.Number.Erf.Erf a => Data.Number.Erf.Erf (Numeric.AD.Internal.Kahn.Kahn a)
instance Data.Number.Erf.InvErf a => Data.Number.Erf.InvErf (Numeric.AD.Internal.Kahn.Kahn a)

module Numeric.AD.Internal.Forward.Double
data ForwardDouble
ForwardDouble :: {-# UNPACK #-} !Double -> ForwardDouble
[primal, tangent] :: ForwardDouble -> {-# UNPACK #-} !Double
bundle :: Double -> Double -> ForwardDouble
unbundle :: ForwardDouble -> (Double, Double)
apply :: (ForwardDouble -> b) -> Double -> b
bind :: Traversable f => (f ForwardDouble -> b) -> f Double -> f b
bind' :: Traversable f => (f ForwardDouble -> b) -> f Double -> (b, f b)
bindWith :: Traversable f => (Double -> b -> c) -> (f ForwardDouble -> b) -> f Double -> f c
bindWith' :: Traversable f => (Double -> b -> c) -> (f ForwardDouble -> b) -> f Double -> (b, f c)
transposeWith :: (Functor f, Foldable f, Traversable g) => (b -> f a -> c) -> f (g a) -> g b -> g c
instance GHC.Show.Show Numeric.AD.Internal.Forward.Double.ForwardDouble
instance GHC.Read.Read Numeric.AD.Internal.Forward.Double.ForwardDouble
instance Numeric.AD.Mode.Mode Numeric.AD.Internal.Forward.Double.ForwardDouble
instance Numeric.AD.Jacobian.Jacobian Numeric.AD.Internal.Forward.Double.ForwardDouble
instance GHC.Classes.Eq Numeric.AD.Internal.Forward.Double.ForwardDouble
instance GHC.Classes.Ord Numeric.AD.Internal.Forward.Double.ForwardDouble
instance GHC.Num.Num Numeric.AD.Internal.Forward.Double.ForwardDouble
instance GHC.Real.Fractional Numeric.AD.Internal.Forward.Double.ForwardDouble
instance GHC.Float.Floating Numeric.AD.Internal.Forward.Double.ForwardDouble
instance GHC.Enum.Enum Numeric.AD.Internal.Forward.Double.ForwardDouble
instance GHC.Real.Real Numeric.AD.Internal.Forward.Double.ForwardDouble
instance GHC.Float.RealFloat Numeric.AD.Internal.Forward.Double.ForwardDouble
instance GHC.Real.RealFrac Numeric.AD.Internal.Forward.Double.ForwardDouble
instance Data.Number.Erf.Erf Numeric.AD.Internal.Forward.Double.ForwardDouble
instance Data.Number.Erf.InvErf Numeric.AD.Internal.Forward.Double.ForwardDouble


-- | Unsafe and often partial combinators intended for internal usage.
--   
--   Handle with care.
module Numeric.AD.Internal.Forward

-- | <a>Forward</a> mode AD
data Forward a
Forward :: !a -> a -> Forward a
Lift :: !a -> Forward a
Zero :: Forward a
primal :: Num a => Forward a -> a

-- | Calculate the <a>tangent</a> using forward mode AD.
tangent :: Num a => Forward a -> a
bundle :: a -> a -> Forward a
unbundle :: Num a => Forward a -> (a, a)
apply :: Num a => (Forward a -> b) -> a -> b
bind :: (Traversable f, Num a) => (f (Forward a) -> b) -> f a -> f b
bind' :: (Traversable f, Num a) => (f (Forward a) -> b) -> f a -> (b, f b)
bindWith :: (Traversable f, Num a) => (a -> b -> c) -> (f (Forward a) -> b) -> f a -> f c
bindWith' :: (Traversable f, Num a) => (a -> b -> c) -> (f (Forward a) -> b) -> f a -> (b, f c)
transposeWith :: (Functor f, Foldable f, Traversable g) => (b -> f a -> c) -> f (g a) -> g b -> g c
instance Data.Data.Data a => Data.Data.Data (Numeric.AD.Internal.Forward.Forward a)
instance GHC.Show.Show a => GHC.Show.Show (Numeric.AD.Internal.Forward.Forward a)
instance GHC.Num.Num a => Numeric.AD.Mode.Mode (Numeric.AD.Internal.Forward.Forward a)
instance GHC.Num.Num a => Numeric.AD.Jacobian.Jacobian (Numeric.AD.Internal.Forward.Forward a)
instance (GHC.Num.Num a, GHC.Classes.Eq a) => GHC.Classes.Eq (Numeric.AD.Internal.Forward.Forward a)
instance (GHC.Num.Num a, GHC.Classes.Ord a) => GHC.Classes.Ord (Numeric.AD.Internal.Forward.Forward a)
instance (GHC.Num.Num a, GHC.Enum.Bounded a) => GHC.Enum.Bounded (Numeric.AD.Internal.Forward.Forward a)
instance GHC.Num.Num a => GHC.Num.Num (Numeric.AD.Internal.Forward.Forward a)
instance GHC.Real.Fractional a => GHC.Real.Fractional (Numeric.AD.Internal.Forward.Forward a)
instance GHC.Float.Floating a => GHC.Float.Floating (Numeric.AD.Internal.Forward.Forward a)
instance (GHC.Num.Num a, GHC.Enum.Enum a) => GHC.Enum.Enum (Numeric.AD.Internal.Forward.Forward a)
instance GHC.Real.Real a => GHC.Real.Real (Numeric.AD.Internal.Forward.Forward a)
instance GHC.Float.RealFloat a => GHC.Float.RealFloat (Numeric.AD.Internal.Forward.Forward a)
instance GHC.Real.RealFrac a => GHC.Real.RealFrac (Numeric.AD.Internal.Forward.Forward a)
instance Data.Number.Erf.Erf a => Data.Number.Erf.Erf (Numeric.AD.Internal.Forward.Forward a)
instance Data.Number.Erf.InvErf a => Data.Number.Erf.InvErf (Numeric.AD.Internal.Forward.Forward a)


-- | Dense Forward AD. Useful when the result involves the majority of the
--   input elements. Do not use for <a>hessian</a> and beyond, since they
--   only contain a small number of unique <tt>n</tt>th derivatives --
--   <tt>(n + k - 1) <tt>choose</tt> k</tt> for functions of <tt>k</tt>
--   inputs rather than the <tt>k^n</tt> that would be generated by using
--   <a>Dense</a>, not to mention the redundant intermediate derivatives
--   that would be calculated over and over during that process!
--   
--   Assumes all instances of <tt>f</tt> have the same number of elements.
--   
--   NB: We don't need the full power of <a>Traversable</a> here, we could
--   get by with a notion of zippable that can plug in 0's for the missing
--   entries. This might allow for gradients where <tt>f</tt> has
--   exponentials like <tt>((-&gt;) a)</tt>
module Numeric.AD.Internal.Dense
data Dense f a
Lift :: !a -> Dense f a
Dense :: !a -> (f a) -> Dense f a
Zero :: Dense f a
ds :: f a -> Dense f a -> f a
ds' :: Num a => f a -> Dense f a -> (a, f a)
vars :: (Traversable f, Num a) => f a -> f (Dense f a)
apply :: (Traversable f, Num a) => (f (Dense f a) -> b) -> f a -> b
instance GHC.Show.Show a => GHC.Show.Show (Numeric.AD.Internal.Dense.Dense f a)
instance (GHC.Num.Num a, Data.Traversable.Traversable f) => Numeric.AD.Mode.Mode (Numeric.AD.Internal.Dense.Dense f a)
instance (Data.Traversable.Traversable f, GHC.Num.Num a) => Numeric.AD.Jacobian.Jacobian (Numeric.AD.Internal.Dense.Dense f a)
instance (Data.Traversable.Traversable f, GHC.Num.Num a, GHC.Classes.Eq a) => GHC.Classes.Eq (Numeric.AD.Internal.Dense.Dense f a)
instance (Data.Traversable.Traversable f, GHC.Num.Num a, GHC.Classes.Ord a) => GHC.Classes.Ord (Numeric.AD.Internal.Dense.Dense f a)
instance (Data.Traversable.Traversable f, GHC.Num.Num a, GHC.Enum.Bounded a) => GHC.Enum.Bounded (Numeric.AD.Internal.Dense.Dense f a)
instance (Data.Traversable.Traversable f, GHC.Num.Num a) => GHC.Num.Num (Numeric.AD.Internal.Dense.Dense f a)
instance (Data.Traversable.Traversable f, GHC.Real.Fractional a) => GHC.Real.Fractional (Numeric.AD.Internal.Dense.Dense f a)
instance (Data.Traversable.Traversable f, GHC.Float.Floating a) => GHC.Float.Floating (Numeric.AD.Internal.Dense.Dense f a)
instance (Data.Traversable.Traversable f, GHC.Num.Num a, GHC.Enum.Enum a) => GHC.Enum.Enum (Numeric.AD.Internal.Dense.Dense f a)
instance (Data.Traversable.Traversable f, GHC.Real.Real a) => GHC.Real.Real (Numeric.AD.Internal.Dense.Dense f a)
instance (Data.Traversable.Traversable f, GHC.Float.RealFloat a) => GHC.Float.RealFloat (Numeric.AD.Internal.Dense.Dense f a)
instance (Data.Traversable.Traversable f, GHC.Real.RealFrac a) => GHC.Real.RealFrac (Numeric.AD.Internal.Dense.Dense f a)
instance (Data.Traversable.Traversable f, Data.Number.Erf.Erf a) => Data.Number.Erf.Erf (Numeric.AD.Internal.Dense.Dense f a)
instance (Data.Traversable.Traversable f, Data.Number.Erf.InvErf a) => Data.Number.Erf.InvErf (Numeric.AD.Internal.Dense.Dense f a)


-- | Reverse-mode automatic differentiation using Wengert lists and
--   Data.Reflection
module Numeric.AD.Mode.Reverse
data Reverse s a

-- | Embed a constant
auto :: Mode t => Scalar t -> t

-- | The <a>grad</a> function calculates the gradient of a
--   non-scalar-to-scalar function with reverse-mode AD in a single pass.
--   
--   <pre>
--   &gt;&gt;&gt; grad (\[x,y,z] -&gt; x*y+z) [1,2,3]
--   [2,1,1]
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; grad (\[x,y] -&gt; x**y) [0,2]
--   [0.0,NaN]
--   </pre>
grad :: (Traversable f, Num a) => (forall s. Reifies s Tape => f (Reverse s a) -> Reverse s a) -> f a -> f a

-- | The <a>grad'</a> function calculates the result and gradient of a
--   non-scalar-to-scalar function with reverse-mode AD in a single pass.
--   
--   <pre>
--   &gt;&gt;&gt; grad' (\[x,y,z] -&gt; x*y+z) [1,2,3]
--   (5,[2,1,1])
--   </pre>
grad' :: (Traversable f, Num a) => (forall s. Reifies s Tape => f (Reverse s a) -> Reverse s a) -> f a -> (a, f a)

-- | <tt><a>grad</a> g f</tt> function calculates the gradient of a
--   non-scalar-to-scalar function <tt>f</tt> with reverse-mode AD in a
--   single pass. The gradient is combined element-wise with the argument
--   using the function <tt>g</tt>.
--   
--   <pre>
--   <a>grad</a> == <a>gradWith</a> (_ dx -&gt; dx)
--   <a>id</a> == <a>gradWith</a> <a>const</a>
--   </pre>
gradWith :: (Traversable f, Num a) => (a -> a -> b) -> (forall s. Reifies s Tape => f (Reverse s a) -> Reverse s a) -> f a -> f b

-- | <tt><a>grad'</a> g f</tt> calculates the result and gradient of a
--   non-scalar-to-scalar function <tt>f</tt> with reverse-mode AD in a
--   single pass the gradient is combined element-wise with the argument
--   using the function <tt>g</tt>.
--   
--   <pre>
--   <a>grad'</a> == <a>gradWith'</a> (_ dx -&gt; dx)
--   </pre>
gradWith' :: (Traversable f, Num a) => (a -> a -> b) -> (forall s. Reifies s Tape => f (Reverse s a) -> Reverse s a) -> f a -> (a, f b)

-- | The <a>jacobian</a> function calculates the jacobian of a
--   non-scalar-to-non-scalar function with reverse AD lazily in <tt>m</tt>
--   passes for <tt>m</tt> outputs.
--   
--   <pre>
--   &gt;&gt;&gt; jacobian (\[x,y] -&gt; [y,x,x*y]) [2,1]
--   [[0,1],[1,0],[1,2]]
--   </pre>
jacobian :: (Traversable f, Functor g, Num a) => (forall s. Reifies s Tape => f (Reverse s a) -> g (Reverse s a)) -> f a -> g (f a)

-- | The <a>jacobian'</a> function calculates both the result and the
--   Jacobian of a nonscalar-to-nonscalar function, using <tt>m</tt>
--   invocations of reverse AD, where <tt>m</tt> is the output
--   dimensionality. Applying <tt>fmap snd</tt> to the result will recover
--   the result of <a>jacobian</a> | An alias for <tt>gradF'</tt>
--   
--   <pre>
--   &gt;&gt;&gt; jacobian' (\[x,y] -&gt; [y,x,x*y]) [2,1]
--   [(1,[0,1]),(2,[1,0]),(2,[1,2])]
--   </pre>
jacobian' :: (Traversable f, Functor g, Num a) => (forall s. Reifies s Tape => f (Reverse s a) -> g (Reverse s a)) -> f a -> g (a, f a)

-- | 'jacobianWith g f' calculates the Jacobian of a
--   non-scalar-to-non-scalar function <tt>f</tt> with reverse AD lazily in
--   <tt>m</tt> passes for <tt>m</tt> outputs.
--   
--   Instead of returning the Jacobian matrix, the elements of the matrix
--   are combined with the input using the <tt>g</tt>.
--   
--   <pre>
--   <a>jacobian</a> == <a>jacobianWith</a> (_ dx -&gt; dx)
--   <a>jacobianWith</a> <a>const</a> == (f x -&gt; <a>const</a> x <a>&lt;$&gt;</a> f x)
--   </pre>
jacobianWith :: (Traversable f, Functor g, Num a) => (a -> a -> b) -> (forall s. Reifies s Tape => f (Reverse s a) -> g (Reverse s a)) -> f a -> g (f b)

-- | <a>jacobianWith</a> g f' calculates both the result and the Jacobian
--   of a nonscalar-to-nonscalar function <tt>f</tt>, using <tt>m</tt>
--   invocations of reverse AD, where <tt>m</tt> is the output
--   dimensionality. Applying <tt>fmap snd</tt> to the result will recover
--   the result of <a>jacobianWith</a>
--   
--   Instead of returning the Jacobian matrix, the elements of the matrix
--   are combined with the input using the <tt>g</tt>.
--   
--   <pre>
--   <a>jacobian'</a> == <a>jacobianWith'</a> (_ dx -&gt; dx)
--   </pre>
jacobianWith' :: (Traversable f, Functor g, Num a) => (a -> a -> b) -> (forall s. Reifies s Tape => f (Reverse s a) -> g (Reverse s a)) -> f a -> g (a, f b)

-- | Compute the hessian via the jacobian of the gradient. gradient is
--   computed in reverse mode and then the jacobian is computed in reverse
--   mode.
--   
--   However, since the <tt><a>grad</a> f :: f a -&gt; f a</tt> is square
--   this is not as fast as using the forward-mode Jacobian of a reverse
--   mode gradient provided by <a>hessian</a>.
--   
--   <pre>
--   &gt;&gt;&gt; hessian (\[x,y] -&gt; x*y) [1,2]
--   [[0,1],[1,0]]
--   </pre>
hessian :: (Traversable f, Num a) => (forall s s'. (Reifies s Tape, Reifies s' Tape) => f (On (Reverse s (Reverse s' a))) -> (On (Reverse s (Reverse s' a)))) -> f a -> f (f a)

-- | Compute the order 3 Hessian tensor on a non-scalar-to-non-scalar
--   function via the reverse-mode Jacobian of the reverse-mode Jacobian of
--   the function.
--   
--   Less efficient than <a>hessianF</a>.
--   
--   <pre>
--   &gt;&gt;&gt; hessianF (\[x,y] -&gt; [x*y,x+y,exp x*cos y]) [1,2]
--   [[[0.0,1.0],[1.0,0.0]],[[0.0,0.0],[0.0,0.0]],[[-1.1312043837568135,-2.4717266720048188],[-2.4717266720048188,1.1312043837568135]]]
--   </pre>
hessianF :: (Traversable f, Functor g, Num a) => (forall s s'. (Reifies s Tape, Reifies s' Tape) => f (On (Reverse s (Reverse s' a))) -> g (On (Reverse s (Reverse s' a)))) -> f a -> g (f (f a))

-- | Compute the derivative of a function.
--   
--   <pre>
--   &gt;&gt;&gt; diff sin 0
--   1.0
--   </pre>
diff :: Num a => (forall s. Reifies s Tape => Reverse s a -> Reverse s a) -> a -> a

-- | The <a>diff'</a> function calculates the result and derivative, as a
--   pair, of a scalar-to-scalar function.
--   
--   <pre>
--   &gt;&gt;&gt; diff' sin 0
--   (0.0,1.0)
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; diff' exp 0
--   (1.0,1.0)
--   </pre>
diff' :: Num a => (forall s. Reifies s Tape => Reverse s a -> Reverse s a) -> a -> (a, a)

-- | Compute the derivatives of each result of a scalar-to-vector function
--   with regards to its input.
--   
--   <pre>
--   &gt;&gt;&gt; diffF (\a -&gt; [sin a, cos a]) 0
--   [1.0,0.0]
--   </pre>
diffF :: (Functor f, Num a) => (forall s. Reifies s Tape => Reverse s a -> f (Reverse s a)) -> a -> f a

-- | Compute the derivatives of each result of a scalar-to-vector function
--   with regards to its input along with the answer.
--   
--   <pre>
--   &gt;&gt;&gt; diffF' (\a -&gt; [sin a, cos a]) 0
--   [(0.0,1.0),(1.0,0.0)]
--   </pre>
diffF' :: (Functor f, Num a) => (forall s. Reifies s Tape => Reverse s a -> f (Reverse s a)) -> a -> f (a, a)


-- | Forward mode automatic differentiation
module Numeric.AD.Rank1.Forward

-- | <a>Forward</a> mode AD
data Forward a

-- | Embed a constant
auto :: Mode t => Scalar t -> t

-- | Compute the gradient of a function using forward mode AD.
--   
--   Note, this performs <i>O(n)</i> worse than <a>grad</a> for <tt>n</tt>
--   inputs, in exchange for better space utilization.
grad :: (Traversable f, Num a) => (f (Forward a) -> Forward a) -> f a -> f a

-- | Compute the gradient and answer to a function using forward mode AD.
--   
--   Note, this performs <i>O(n)</i> worse than <a>grad'</a> for <tt>n</tt>
--   inputs, in exchange for better space utilization.
grad' :: (Traversable f, Num a) => (f (Forward a) -> Forward a) -> f a -> (a, f a)

-- | Compute the gradient of a function using forward mode AD and combine
--   the result with the input using a user-specified function.
--   
--   Note, this performs <i>O(n)</i> worse than <a>gradWith</a> for
--   <tt>n</tt> inputs, in exchange for better space utilization.
gradWith :: (Traversable f, Num a) => (a -> a -> b) -> (f (Forward a) -> Forward a) -> f a -> f b

-- | Compute the gradient of a function using forward mode AD and the
--   answer, and combine the result with the input using a user-specified
--   function.
--   
--   Note, this performs <i>O(n)</i> worse than <a>gradWith'</a> for
--   <tt>n</tt> inputs, in exchange for better space utilization.
--   
--   <pre>
--   &gt;&gt;&gt; gradWith' (,) sum [0..4]
--   (10,[(0,1),(1,1),(2,1),(3,1),(4,1)])
--   </pre>
gradWith' :: (Traversable f, Num a) => (a -> a -> b) -> (f (Forward a) -> Forward a) -> f a -> (a, f b)

-- | Compute the Jacobian using <a>Forward</a> mode <tt>AD</tt>. This must
--   transpose the result, so <a>jacobianT</a> is faster and allows more
--   result types.
--   
--   <pre>
--   &gt;&gt;&gt; jacobian (\[x,y] -&gt; [y,x,x+y,x*y,exp x * sin y]) [pi,1]
--   [[0.0,1.0],[1.0,0.0],[1.0,1.0],[1.0,3.141592653589793],[19.472221418841606,12.502969588876512]]
--   </pre>
jacobian :: (Traversable f, Traversable g, Num a) => (f (Forward a) -> g (Forward a)) -> f a -> g (f a)

-- | Compute the Jacobian using <a>Forward</a> mode <tt>AD</tt> along with
--   the actual answer.
jacobian' :: (Traversable f, Traversable g, Num a) => (f (Forward a) -> g (Forward a)) -> f a -> g (a, f a)

-- | Compute the Jacobian using <a>Forward</a> mode <tt>AD</tt> and combine
--   the output with the input. This must transpose the result, so
--   <a>jacobianWithT</a> is faster, and allows more result types.
jacobianWith :: (Traversable f, Traversable g, Num a) => (a -> a -> b) -> (f (Forward a) -> g (Forward a)) -> f a -> g (f b)

-- | Compute the Jacobian using <a>Forward</a> mode <tt>AD</tt> combined
--   with the input using a user specified function, along with the actual
--   answer.
jacobianWith' :: (Traversable f, Traversable g, Num a) => (a -> a -> b) -> (f (Forward a) -> g (Forward a)) -> f a -> g (a, f b)

-- | A fast, simple, transposed Jacobian computed with forward-mode AD.
jacobianT :: (Traversable f, Functor g, Num a) => (f (Forward a) -> g (Forward a)) -> f a -> f (g a)

-- | A fast, simple, transposed Jacobian computed with <a>Forward</a> mode
--   <tt>AD</tt> that combines the output with the input.
jacobianWithT :: (Traversable f, Functor g, Num a) => (a -> a -> b) -> (f (Forward a) -> g (Forward a)) -> f a -> f (g b)

-- | Compute the product of a vector with the Hessian using
--   forward-on-forward-mode AD.
hessianProduct :: (Traversable f, Num a) => (f (On (Forward (Forward a))) -> On (Forward (Forward a))) -> f (a, a) -> f a

-- | Compute the gradient and hessian product using forward-on-forward-mode
--   AD.
hessianProduct' :: (Traversable f, Num a) => (f (On (Forward (Forward a))) -> On (Forward (Forward a))) -> f (a, a) -> f (a, a)

-- | The <a>diff</a> function calculates the first derivative of a
--   scalar-to-scalar function by forward-mode <tt>AD</tt>
--   
--   <pre>
--   &gt;&gt;&gt; diff sin 0
--   1.0
--   </pre>
diff :: Num a => (Forward a -> Forward a) -> a -> a

-- | The <a>diff'</a> function calculates the result and first derivative
--   of scalar-to-scalar function by <a>Forward</a> mode <tt>AD</tt>
--   
--   <pre>
--   <a>diff'</a> <a>sin</a> == <a>sin</a> <a>&amp;&amp;&amp;</a> <a>cos</a>
--   <a>diff'</a> f = f <a>&amp;&amp;&amp;</a> d f
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; diff' sin 0
--   (0.0,1.0)
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; diff' exp 0
--   (1.0,1.0)
--   </pre>
diff' :: Num a => (Forward a -> Forward a) -> a -> (a, a)

-- | The <a>diffF</a> function calculates the first derivatives of
--   scalar-to-nonscalar function by <a>Forward</a> mode <tt>AD</tt>
--   
--   <pre>
--   &gt;&gt;&gt; diffF (\a -&gt; [sin a, cos a]) 0
--   [1.0,-0.0]
--   </pre>
diffF :: (Functor f, Num a) => (Forward a -> f (Forward a)) -> a -> f a

-- | The <a>diffF'</a> function calculates the result and first derivatives
--   of a scalar-to-non-scalar function by <a>Forward</a> mode <tt>AD</tt>
--   
--   <pre>
--   &gt;&gt;&gt; diffF' (\a -&gt; [sin a, cos a]) 0
--   [(0.0,1.0),(1.0,-0.0)]
--   </pre>
diffF' :: (Functor f, Num a) => (Forward a -> f (Forward a)) -> a -> f (a, a)

-- | Compute the directional derivative of a function given a zipped up
--   <a>Functor</a> of the input values and their derivatives
du :: (Functor f, Num a) => (f (Forward a) -> Forward a) -> f (a, a) -> a

-- | Compute the answer and directional derivative of a function given a
--   zipped up <a>Functor</a> of the input values and their derivatives
du' :: (Functor f, Num a) => (f (Forward a) -> Forward a) -> f (a, a) -> (a, a)

-- | Compute a vector of directional derivatives for a function given a
--   zipped up <a>Functor</a> of the input values and their derivatives.
duF :: (Functor f, Functor g, Num a) => (f (Forward a) -> g (Forward a)) -> f (a, a) -> g a

-- | Compute a vector of answers and directional derivatives for a function
--   given a zipped up <a>Functor</a> of the input values and their
--   derivatives.
duF' :: (Functor f, Functor g, Num a) => (f (Forward a) -> g (Forward a)) -> f (a, a) -> g (a, a)


-- | Forward mode automatic differentiation
module Numeric.AD.Mode.Forward
data AD s a

-- | <a>Forward</a> mode AD
data Forward a

-- | Embed a constant
auto :: Mode t => Scalar t -> t

-- | Compute the gradient of a function using forward mode AD.
--   
--   Note, this performs <i>O(n)</i> worse than <a>grad</a> for <tt>n</tt>
--   inputs, in exchange for better space utilization.
grad :: (Traversable f, Num a) => (forall s. f (AD s (Forward a)) -> AD s (Forward a)) -> f a -> f a

-- | Compute the gradient and answer to a function using forward mode AD.
--   
--   Note, this performs <i>O(n)</i> worse than <a>grad'</a> for <tt>n</tt>
--   inputs, in exchange for better space utilization.
grad' :: (Traversable f, Num a) => (forall s. f (AD s (Forward a)) -> AD s (Forward a)) -> f a -> (a, f a)

-- | Compute the gradient of a function using forward mode AD and combine
--   the result with the input using a user-specified function.
--   
--   Note, this performs <i>O(n)</i> worse than <a>gradWith</a> for
--   <tt>n</tt> inputs, in exchange for better space utilization.
gradWith :: (Traversable f, Num a) => (a -> a -> b) -> (forall s. f (AD s (Forward a)) -> AD s (Forward a)) -> f a -> f b

-- | Compute the gradient of a function using forward mode AD and the
--   answer, and combine the result with the input using a user-specified
--   function.
--   
--   Note, this performs <i>O(n)</i> worse than <a>gradWith'</a> for
--   <tt>n</tt> inputs, in exchange for better space utilization.
--   
--   <pre>
--   &gt;&gt;&gt; gradWith' (,) sum [0..4]
--   (10,[(0,1),(1,1),(2,1),(3,1),(4,1)])
--   </pre>
gradWith' :: (Traversable f, Num a) => (a -> a -> b) -> (forall s. f (AD s (Forward a)) -> AD s (Forward a)) -> f a -> (a, f b)

-- | Compute the Jacobian using <a>Forward</a> mode <a>AD</a>. This must
--   transpose the result, so <a>jacobianT</a> is faster and allows more
--   result types.
--   
--   <pre>
--   &gt;&gt;&gt; jacobian (\[x,y] -&gt; [y,x,x+y,x*y,exp x * sin y]) [pi,1]
--   [[0.0,1.0],[1.0,0.0],[1.0,1.0],[1.0,3.141592653589793],[19.472221418841606,12.502969588876512]]
--   </pre>
jacobian :: (Traversable f, Traversable g, Num a) => (forall s. f (AD s (Forward a)) -> g (AD s (Forward a))) -> f a -> g (f a)

-- | Compute the Jacobian using <a>Forward</a> mode <a>AD</a> along with
--   the actual answer.
jacobian' :: (Traversable f, Traversable g, Num a) => (forall s. f (AD s (Forward a)) -> g (AD s (Forward a))) -> f a -> g (a, f a)

-- | Compute the Jacobian using <a>Forward</a> mode <a>AD</a> and combine
--   the output with the input. This must transpose the result, so
--   <a>jacobianWithT</a> is faster, and allows more result types.
jacobianWith :: (Traversable f, Traversable g, Num a) => (a -> a -> b) -> (forall s. f (AD s (Forward a)) -> g (AD s (Forward a))) -> f a -> g (f b)

-- | Compute the Jacobian using <a>Forward</a> mode <a>AD</a> combined with
--   the input using a user specified function, along with the actual
--   answer.
jacobianWith' :: (Traversable f, Traversable g, Num a) => (a -> a -> b) -> (forall s. f (AD s (Forward a)) -> g (AD s (Forward a))) -> f a -> g (a, f b)

-- | A fast, simple, transposed Jacobian computed with forward-mode AD.
jacobianT :: (Traversable f, Functor g, Num a) => (forall s. f (AD s (Forward a)) -> g (AD s (Forward a))) -> f a -> f (g a)

-- | A fast, simple, transposed Jacobian computed with <a>Forward</a> mode
--   <a>AD</a> that combines the output with the input.
jacobianWithT :: (Traversable f, Functor g, Num a) => (a -> a -> b) -> (forall s. f (AD s (Forward a)) -> g (AD s (Forward a))) -> f a -> f (g b)

-- | Compute the product of a vector with the Hessian using
--   forward-on-forward-mode AD.
hessianProduct :: (Traversable f, Num a) => (forall s. f (AD s (On (Forward (Forward a)))) -> AD s (On (Forward (Forward a)))) -> f (a, a) -> f a

-- | Compute the gradient and hessian product using forward-on-forward-mode
--   AD.
hessianProduct' :: (Traversable f, Num a) => (forall s. f (AD s (On (Forward (Forward a)))) -> AD s (On (Forward (Forward a)))) -> f (a, a) -> f (a, a)

-- | The <a>diff</a> function calculates the first derivative of a
--   scalar-to-scalar function by forward-mode <a>AD</a>
--   
--   <pre>
--   &gt;&gt;&gt; diff sin 0
--   1.0
--   </pre>
diff :: Num a => (forall s. AD s (Forward a) -> AD s (Forward a)) -> a -> a

-- | The <a>diff'</a> function calculates the result and first derivative
--   of scalar-to-scalar function by <a>Forward</a> mode <a>AD</a>
--   
--   <pre>
--   <a>diff'</a> <a>sin</a> == <a>sin</a> <a>&amp;&amp;&amp;</a> <a>cos</a>
--   <a>diff'</a> f = f <a>&amp;&amp;&amp;</a> d f
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; diff' sin 0
--   (0.0,1.0)
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; diff' exp 0
--   (1.0,1.0)
--   </pre>
diff' :: Num a => (forall s. AD s (Forward a) -> AD s (Forward a)) -> a -> (a, a)

-- | The <a>diffF</a> function calculates the first derivatives of
--   scalar-to-nonscalar function by <a>Forward</a> mode <a>AD</a>
--   
--   <pre>
--   &gt;&gt;&gt; diffF (\a -&gt; [sin a, cos a]) 0
--   [1.0,-0.0]
--   </pre>
diffF :: (Functor f, Num a) => (forall s. AD s (Forward a) -> f (AD s (Forward a))) -> a -> f a

-- | The <a>diffF'</a> function calculates the result and first derivatives
--   of a scalar-to-non-scalar function by <a>Forward</a> mode <a>AD</a>
--   
--   <pre>
--   &gt;&gt;&gt; diffF' (\a -&gt; [sin a, cos a]) 0
--   [(0.0,1.0),(1.0,-0.0)]
--   </pre>
diffF' :: (Functor f, Num a) => (forall s. AD s (Forward a) -> f (AD s (Forward a))) -> a -> f (a, a)

-- | Compute the directional derivative of a function given a zipped up
--   <a>Functor</a> of the input values and their derivatives
du :: (Functor f, Num a) => (forall s. f (AD s (Forward a)) -> AD s (Forward a)) -> f (a, a) -> a

-- | Compute the answer and directional derivative of a function given a
--   zipped up <a>Functor</a> of the input values and their derivatives
du' :: (Functor f, Num a) => (forall s. f (AD s (Forward a)) -> AD s (Forward a)) -> f (a, a) -> (a, a)

-- | Compute a vector of directional derivatives for a function given a
--   zipped up <a>Functor</a> of the input values and their derivatives.
duF :: (Functor f, Functor g, Num a) => (forall s. f (AD s (Forward a)) -> g (AD s (Forward a))) -> f (a, a) -> g a

-- | Compute a vector of answers and directional derivatives for a function
--   given a zipped up <a>Functor</a> of the input values and their
--   derivatives.
duF' :: (Functor f, Functor g, Num a) => (forall s. f (AD s (Forward a)) -> g (AD s (Forward a))) -> f (a, a) -> g (a, a)

module Numeric.AD.Rank1.Forward.Double
data ForwardDouble

-- | Compute the gradient of a function using forward mode AD.
--   
--   Note, this performs <i>O(n)</i> worse than <a>grad</a> for <tt>n</tt>
--   inputs, in exchange for better space utilization.
grad :: Traversable f => (f ForwardDouble -> ForwardDouble) -> f Double -> f Double

-- | Compute the gradient and answer to a function using forward mode AD.
--   
--   Note, this performs <i>O(n)</i> worse than <a>grad'</a> for <tt>n</tt>
--   inputs, in exchange for better space utilization.
grad' :: Traversable f => (f ForwardDouble -> ForwardDouble) -> f Double -> (Double, f Double)

-- | Compute the gradient of a function using forward mode AD and combine
--   the result with the input using a user-specified function.
--   
--   Note, this performs <i>O(n)</i> worse than <a>gradWith</a> for
--   <tt>n</tt> inputs, in exchange for better space utilization.
gradWith :: Traversable f => (Double -> Double -> b) -> (f ForwardDouble -> ForwardDouble) -> f Double -> f b

-- | Compute the gradient of a function using forward mode AD and the
--   answer, and combine the result with the input using a user-specified
--   function.
--   
--   Note, this performs <i>O(n)</i> worse than <a>gradWith'</a> for
--   <tt>n</tt> inputs, in exchange for better space utilization.
--   
--   <pre>
--   &gt;&gt;&gt; gradWith' (,) sum [0..4]
--   (10.0,[(0.0,1.0),(1.0,1.0),(2.0,1.0),(3.0,1.0),(4.0,1.0)])
--   </pre>
gradWith' :: Traversable f => (Double -> Double -> b) -> (f ForwardDouble -> ForwardDouble) -> f Double -> (Double, f b)

-- | Compute the Jacobian using <tt>Forward</tt> mode <tt>AD</tt>. This
--   must transpose the result, so <a>jacobianT</a> is faster and allows
--   more result types.
--   
--   <pre>
--   &gt;&gt;&gt; jacobian (\[x,y] -&gt; [y,x,x+y,x*y,exp x * sin y]) [pi,1]
--   [[0.0,1.0],[1.0,0.0],[1.0,1.0],[1.0,3.141592653589793],[19.472221418841606,12.502969588876512]]
--   </pre>
jacobian :: (Traversable f, Traversable g) => (f ForwardDouble -> g ForwardDouble) -> f Double -> g (f Double)

-- | Compute the Jacobian using <tt>Forward</tt> mode <tt>AD</tt> along
--   with the actual answer.
jacobian' :: (Traversable f, Traversable g) => (f ForwardDouble -> g ForwardDouble) -> f Double -> g (Double, f Double)

-- | Compute the Jacobian using <tt>Forward</tt> mode <tt>AD</tt> and
--   combine the output with the input. This must transpose the result, so
--   <a>jacobianWithT</a> is faster, and allows more result types.
jacobianWith :: (Traversable f, Traversable g) => (Double -> Double -> b) -> (f ForwardDouble -> g ForwardDouble) -> f Double -> g (f b)

-- | Compute the Jacobian using <tt>Forward</tt> mode <tt>AD</tt> combined
--   with the input using a user specified function, along with the actual
--   answer.
jacobianWith' :: (Traversable f, Traversable g) => (Double -> Double -> b) -> (f ForwardDouble -> g ForwardDouble) -> f Double -> g (Double, f b)

-- | A fast, simple, transposed Jacobian computed with forward-mode AD.
jacobianT :: (Traversable f, Functor g) => (f ForwardDouble -> g ForwardDouble) -> f Double -> f (g Double)

-- | A fast, simple, transposed Jacobian computed with <tt>Forward</tt>
--   mode <tt>AD</tt> that combines the output with the input.
jacobianWithT :: (Traversable f, Functor g) => (Double -> Double -> b) -> (f ForwardDouble -> g ForwardDouble) -> f Double -> f (g b)

-- | The <a>diff</a> function calculates the first derivative of a
--   scalar-to-scalar function by forward-mode <tt>AD</tt>
--   
--   <pre>
--   &gt;&gt;&gt; diff sin 0
--   1.0
--   </pre>
diff :: (ForwardDouble -> ForwardDouble) -> Double -> Double

-- | The <a>diff'</a> function calculates the result and first derivative
--   of scalar-to-scalar function by <tt>Forward</tt> mode <tt>AD</tt>
--   
--   <pre>
--   <a>diff'</a> <a>sin</a> == <a>sin</a> <a>&amp;&amp;&amp;</a> <a>cos</a>
--   <a>diff'</a> f = f <a>&amp;&amp;&amp;</a> d f
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; diff' sin 0
--   (0.0,1.0)
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; diff' exp 0
--   (1.0,1.0)
--   </pre>
diff' :: (ForwardDouble -> ForwardDouble) -> Double -> (Double, Double)

-- | The <a>diffF</a> function calculates the first derivatives of
--   scalar-to-nonscalar function by <tt>Forward</tt> mode <tt>AD</tt>
--   
--   <pre>
--   &gt;&gt;&gt; diffF (\a -&gt; [sin a, cos a]) 0
--   [1.0,-0.0]
--   </pre>
diffF :: Functor f => (ForwardDouble -> f ForwardDouble) -> Double -> f Double

-- | The <a>diffF'</a> function calculates the result and first derivatives
--   of a scalar-to-non-scalar function by <tt>Forward</tt> mode
--   <tt>AD</tt>
--   
--   <pre>
--   &gt;&gt;&gt; diffF' (\a -&gt; [sin a, cos a]) 0
--   [(0.0,1.0),(1.0,-0.0)]
--   </pre>
diffF' :: Functor f => (ForwardDouble -> f ForwardDouble) -> Double -> f (Double, Double)

-- | Compute the directional derivative of a function given a zipped up
--   <a>Functor</a> of the input values and their derivatives
du :: Functor f => (f ForwardDouble -> ForwardDouble) -> f (Double, Double) -> Double

-- | Compute the answer and directional derivative of a function given a
--   zipped up <a>Functor</a> of the input values and their derivatives
du' :: Functor f => (f ForwardDouble -> ForwardDouble) -> f (Double, Double) -> (Double, Double)

-- | Compute a vector of directional derivatives for a function given a
--   zipped up <a>Functor</a> of the input values and their derivatives.
duF :: (Functor f, Functor g) => (f ForwardDouble -> g ForwardDouble) -> f (Double, Double) -> g Double

-- | Compute a vector of answers and directional derivatives for a function
--   given a zipped up <a>Functor</a> of the input values and their
--   derivatives.
duF' :: (Functor f, Functor g) => (f ForwardDouble -> g ForwardDouble) -> f (Double, Double) -> g (Double, Double)


-- | Forward Mode AD specialized to <a>Double</a>. This enables the entire
--   structure to be unboxed.
module Numeric.AD.Mode.Forward.Double
data AD s a
data ForwardDouble

-- | Compute the gradient of a function using forward mode AD.
--   
--   Note, this performs <i>O(n)</i> worse than <a>grad</a> for <tt>n</tt>
--   inputs, in exchange for better space utilization.
grad :: Traversable f => (forall s. f (AD s ForwardDouble) -> AD s ForwardDouble) -> f Double -> f Double

-- | Compute the gradient and answer to a function using forward mode AD.
--   
--   Note, this performs <i>O(n)</i> worse than <a>grad'</a> for <tt>n</tt>
--   inputs, in exchange for better space utilization.
grad' :: Traversable f => (forall s. f (AD s ForwardDouble) -> AD s ForwardDouble) -> f Double -> (Double, f Double)

-- | Compute the gradient of a function using forward mode AD and combine
--   the result with the input using a user-specified function.
--   
--   Note, this performs <i>O(n)</i> worse than <a>gradWith</a> for
--   <tt>n</tt> inputs, in exchange for better space utilization.
gradWith :: Traversable f => (Double -> Double -> b) -> (forall s. f (AD s ForwardDouble) -> AD s ForwardDouble) -> f Double -> f b

-- | Compute the gradient of a function using forward mode AD and the
--   answer, and combine the result with the input using a user-specified
--   function.
--   
--   Note, this performs <i>O(n)</i> worse than <a>gradWith'</a> for
--   <tt>n</tt> inputs, in exchange for better space utilization.
--   
--   <pre>
--   &gt;&gt;&gt; gradWith' (,) sum [0..4]
--   (10.0,[(0.0,1.0),(1.0,1.0),(2.0,1.0),(3.0,1.0),(4.0,1.0)])
--   </pre>
gradWith' :: Traversable f => (Double -> Double -> b) -> (forall s. f (AD s ForwardDouble) -> AD s ForwardDouble) -> f Double -> (Double, f b)

-- | Compute the Jacobian using <tt>Forward</tt> mode <a>AD</a>. This must
--   transpose the result, so <a>jacobianT</a> is faster and allows more
--   result types.
--   
--   <pre>
--   &gt;&gt;&gt; jacobian (\[x,y] -&gt; [y,x,x+y,x*y,exp x * sin y]) [pi,1]
--   [[0.0,1.0],[1.0,0.0],[1.0,1.0],[1.0,3.141592653589793],[19.472221418841606,12.502969588876512]]
--   </pre>
jacobian :: (Traversable f, Traversable g) => (forall s. f (AD s ForwardDouble) -> g (AD s ForwardDouble)) -> f Double -> g (f Double)

-- | Compute the Jacobian using <tt>Forward</tt> mode <a>AD</a> along with
--   the actual answer.
jacobian' :: (Traversable f, Traversable g) => (forall s. f (AD s ForwardDouble) -> g (AD s ForwardDouble)) -> f Double -> g (Double, f Double)

-- | Compute the Jacobian using <tt>Forward</tt> mode <a>AD</a> and combine
--   the output with the input. This must transpose the result, so
--   <a>jacobianWithT</a> is faster, and allows more result types.
jacobianWith :: (Traversable f, Traversable g) => (Double -> Double -> b) -> (forall s. f (AD s ForwardDouble) -> g (AD s ForwardDouble)) -> f Double -> g (f b)

-- | Compute the Jacobian using <tt>Forward</tt> mode <a>AD</a> combined
--   with the input using a user specified function, along with the actual
--   answer.
jacobianWith' :: (Traversable f, Traversable g) => (Double -> Double -> b) -> (forall s. f (AD s ForwardDouble) -> g (AD s ForwardDouble)) -> f Double -> g (Double, f b)

-- | A fast, simple, transposed Jacobian computed with forward-mode AD.
jacobianT :: (Traversable f, Functor g) => (forall s. f (AD s ForwardDouble) -> g (AD s ForwardDouble)) -> f Double -> f (g Double)

-- | A fast, simple, transposed Jacobian computed with <tt>Forward</tt>
--   mode <a>AD</a> that combines the output with the input.
jacobianWithT :: (Traversable f, Functor g) => (Double -> Double -> b) -> (forall s. f (AD s ForwardDouble) -> g (AD s ForwardDouble)) -> f Double -> f (g b)

-- | The <a>diff</a> function calculates the first derivative of a
--   scalar-to-scalar function by forward-mode <a>AD</a>
--   
--   <pre>
--   &gt;&gt;&gt; diff sin 0
--   1.0
--   </pre>
diff :: (forall s. AD s ForwardDouble -> AD s ForwardDouble) -> Double -> Double

-- | The <a>diff'</a> function calculates the result and first derivative
--   of scalar-to-scalar function by <tt>Forward</tt> mode <a>AD</a>
--   
--   <pre>
--   <a>diff'</a> <a>sin</a> == <a>sin</a> <a>&amp;&amp;&amp;</a> <a>cos</a>
--   <a>diff'</a> f = f <a>&amp;&amp;&amp;</a> d f
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; diff' sin 0
--   (0.0,1.0)
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; diff' exp 0
--   (1.0,1.0)
--   </pre>
diff' :: (forall s. AD s ForwardDouble -> AD s ForwardDouble) -> Double -> (Double, Double)

-- | The <a>diffF</a> function calculates the first derivatives of
--   scalar-to-nonscalar function by <tt>Forward</tt> mode <a>AD</a>
--   
--   <pre>
--   &gt;&gt;&gt; diffF (\a -&gt; [sin a, cos a]) 0
--   [1.0,-0.0]
--   </pre>
diffF :: Functor f => (forall s. AD s ForwardDouble -> f (AD s ForwardDouble)) -> Double -> f Double

-- | The <a>diffF'</a> function calculates the result and first derivatives
--   of a scalar-to-non-scalar function by <tt>Forward</tt> mode <a>AD</a>
--   
--   <pre>
--   &gt;&gt;&gt; diffF' (\a -&gt; [sin a, cos a]) 0
--   [(0.0,1.0),(1.0,-0.0)]
--   </pre>
diffF' :: Functor f => (forall s. AD s ForwardDouble -> f (AD s ForwardDouble)) -> Double -> f (Double, Double)

-- | Compute the directional derivative of a function given a zipped up
--   <a>Functor</a> of the input values and their derivatives
du :: Functor f => (forall s. f (AD s ForwardDouble) -> AD s ForwardDouble) -> f (Double, Double) -> Double

-- | Compute the answer and directional derivative of a function given a
--   zipped up <a>Functor</a> of the input values and their derivatives
du' :: Functor f => (forall s. f (AD s ForwardDouble) -> AD s ForwardDouble) -> f (Double, Double) -> (Double, Double)

-- | Compute a vector of directional derivatives for a function given a
--   zipped up <a>Functor</a> of the input values and their derivatives.
duF :: (Functor f, Functor g) => (forall s. f (AD s ForwardDouble) -> g (AD s ForwardDouble)) -> f (Double, Double) -> g Double

-- | Compute a vector of answers and directional derivatives for a function
--   given a zipped up <a>Functor</a> of the input values and their
--   derivatives.
duF' :: (Functor f, Functor g) => (forall s. f (AD s ForwardDouble) -> g (AD s ForwardDouble)) -> f (Double, Double) -> g (Double, Double)


-- | This module provides reverse-mode Automatic Differentiation using
--   post-hoc linear time topological sorting.
--   
--   For reverse mode AD we use <a>StableName</a> to recover sharing
--   information from the tape to avoid combinatorial explosion, and thus
--   run asymptotically faster than it could without such sharing
--   information, but the use of side-effects contained herein is benign.
module Numeric.AD.Rank1.Kahn

-- | <tt>Kahn</tt> is a <a>Mode</a> using reverse-mode automatic
--   differentiation that provides fast <tt>diffFU</tt>, <tt>diff2FU</tt>,
--   <tt>grad</tt>, <tt>grad2</tt> and a fast <tt>jacobian</tt> when you
--   have a significantly smaller number of outputs than inputs.
data Kahn a

-- | Embed a constant
auto :: Mode t => Scalar t -> t

-- | The <a>grad</a> function calculates the gradient of a
--   non-scalar-to-scalar function with kahn-mode AD in a single pass.
--   
--   <pre>
--   &gt;&gt;&gt; grad (\[x,y,z] -&gt; x*y+z) [1,2,3]
--   [2,1,1]
--   </pre>
grad :: (Traversable f, Num a) => (f (Kahn a) -> Kahn a) -> f a -> f a

-- | The <a>grad'</a> function calculates the result and gradient of a
--   non-scalar-to-scalar function with kahn-mode AD in a single pass.
--   
--   <pre>
--   &gt;&gt;&gt; grad' (\[x,y,z] -&gt; 4*x*exp y+cos z) [1,2,3]
--   (28.566231899122155,[29.5562243957226,29.5562243957226,-0.1411200080598672])
--   </pre>
grad' :: (Traversable f, Num a) => (f (Kahn a) -> Kahn a) -> f a -> (a, f a)

-- | <tt><a>grad</a> g f</tt> function calculates the gradient of a
--   non-scalar-to-scalar function <tt>f</tt> with kahn-mode AD in a single
--   pass. The gradient is combined element-wise with the argument using
--   the function <tt>g</tt>.
--   
--   <pre>
--   <a>grad</a> = <a>gradWith</a> (_ dx -&gt; dx)
--   <a>id</a> = <a>gradWith</a> const
--   </pre>
gradWith :: (Traversable f, Num a) => (a -> a -> b) -> (f (Kahn a) -> Kahn a) -> f a -> f b

-- | <tt><a>grad'</a> g f</tt> calculates the result and gradient of a
--   non-scalar-to-scalar function <tt>f</tt> with kahn-mode AD in a single
--   pass the gradient is combined element-wise with the argument using the
--   function <tt>g</tt>.
--   
--   <pre>
--   <a>grad'</a> == <a>gradWith'</a> (_ dx -&gt; dx)
--   </pre>
gradWith' :: (Traversable f, Num a) => (a -> a -> b) -> (f (Kahn a) -> Kahn a) -> f a -> (a, f b)

-- | The <a>jacobian</a> function calculates the jacobian of a
--   non-scalar-to-non-scalar function with kahn AD lazily in <tt>m</tt>
--   passes for <tt>m</tt> outputs.
--   
--   <pre>
--   &gt;&gt;&gt; jacobian (\[x,y] -&gt; [y,x,x*y]) [2,1]
--   [[0,1],[1,0],[1,2]]
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; jacobian (\[x,y] -&gt; [exp y,cos x,x+y]) [1,2]
--   [[0.0,7.38905609893065],[-0.8414709848078965,0.0],[1.0,1.0]]
--   </pre>
jacobian :: (Traversable f, Functor g, Num a) => (f (Kahn a) -> g (Kahn a)) -> f a -> g (f a)

-- | The <a>jacobian'</a> function calculates both the result and the
--   Jacobian of a nonscalar-to-nonscalar function, using <tt>m</tt>
--   invocations of kahn AD, where <tt>m</tt> is the output dimensionality.
--   Applying <tt>fmap snd</tt> to the result will recover the result of
--   <a>jacobian</a> | An alias for <tt>gradF'</tt>
--   
--   ghci&gt; jacobian' ([x,y] -&gt; [y,x,x*y]) [2,1]
--   [(1,[0,1]),(2,[1,0]),(2,[1,2])]
jacobian' :: (Traversable f, Functor g, Num a) => (f (Kahn a) -> g (Kahn a)) -> f a -> g (a, f a)

-- | 'jacobianWith g f' calculates the Jacobian of a
--   non-scalar-to-non-scalar function <tt>f</tt> with kahn AD lazily in
--   <tt>m</tt> passes for <tt>m</tt> outputs.
--   
--   Instead of returning the Jacobian matrix, the elements of the matrix
--   are combined with the input using the <tt>g</tt>.
--   
--   <pre>
--   <a>jacobian</a> = <a>jacobianWith</a> (_ dx -&gt; dx)
--   <a>jacobianWith</a> <a>const</a> = (f x -&gt; <a>const</a> x <a>&lt;$&gt;</a> f x)
--   </pre>
jacobianWith :: (Traversable f, Functor g, Num a) => (a -> a -> b) -> (f (Kahn a) -> g (Kahn a)) -> f a -> g (f b)

-- | <a>jacobianWith</a> g f' calculates both the result and the Jacobian
--   of a nonscalar-to-nonscalar function <tt>f</tt>, using <tt>m</tt>
--   invocations of kahn AD, where <tt>m</tt> is the output dimensionality.
--   Applying <tt>fmap snd</tt> to the result will recover the result of
--   <a>jacobianWith</a>
--   
--   Instead of returning the Jacobian matrix, the elements of the matrix
--   are combined with the input using the <tt>g</tt>.
--   
--   <pre>
--   <a>jacobian'</a> == <a>jacobianWith'</a> (_ dx -&gt; dx)
--   </pre>
jacobianWith' :: (Traversable f, Functor g, Num a) => (a -> a -> b) -> (f (Kahn a) -> g (Kahn a)) -> f a -> g (a, f b)

-- | Compute the <a>hessian</a> via the <a>jacobian</a> of the gradient.
--   gradient is computed in <a>Kahn</a> mode and then the <a>jacobian</a>
--   is computed in <a>Kahn</a> mode.
--   
--   However, since the <tt><a>grad</a> f :: f a -&gt; f a</tt> is square
--   this is not as fast as using the forward-mode <a>jacobian</a> of a
--   reverse mode gradient provided by <a>hessian</a>.
--   
--   <pre>
--   &gt;&gt;&gt; hessian (\[x,y] -&gt; x*y) [1,2]
--   [[0,1],[1,0]]
--   </pre>
hessian :: (Traversable f, Num a) => (f (On (Kahn (Kahn a))) -> (On (Kahn (Kahn a)))) -> f a -> f (f a)

-- | Compute the order 3 Hessian tensor on a non-scalar-to-non-scalar
--   function via the <a>Kahn</a>-mode Jacobian of the <a>Kahn</a>-mode
--   Jacobian of the function.
--   
--   Less efficient than <a>hessianF</a>.
--   
--   <pre>
--   &gt;&gt;&gt; hessianF (\[x,y] -&gt; [x*y,x+y,exp x*cos y]) [1,2]
--   [[[0.0,1.0],[1.0,0.0]],[[0.0,0.0],[0.0,0.0]],[[-1.1312043837568135,-2.4717266720048188],[-2.4717266720048188,1.1312043837568135]]]
--   </pre>
hessianF :: (Traversable f, Functor g, Num a) => (f (On (Kahn (Kahn a))) -> g (On (Kahn (Kahn a)))) -> f a -> g (f (f a))

-- | Compute the derivative of a function.
--   
--   <pre>
--   &gt;&gt;&gt; diff sin 0
--   1.0
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; cos 0
--   1.0
--   </pre>
diff :: Num a => (Kahn a -> Kahn a) -> a -> a

-- | The <a>diff'</a> function calculates the value and derivative, as a
--   pair, of a scalar-to-scalar function.
--   
--   <pre>
--   &gt;&gt;&gt; diff' sin 0
--   (0.0,1.0)
--   </pre>
diff' :: Num a => (Kahn a -> Kahn a) -> a -> (a, a)

-- | Compute the derivatives of a function that returns a vector with
--   regards to its single input.
--   
--   <pre>
--   &gt;&gt;&gt; diffF (\a -&gt; [sin a, cos a]) 0
--   [1.0,0.0]
--   </pre>
diffF :: (Functor f, Num a) => (Kahn a -> f (Kahn a)) -> a -> f a

-- | Compute the derivatives of a function that returns a vector with
--   regards to its single input as well as the primal answer.
--   
--   <pre>
--   &gt;&gt;&gt; diffF' (\a -&gt; [sin a, cos a]) 0
--   [(0.0,1.0),(1.0,0.0)]
--   </pre>
diffF' :: (Functor f, Num a) => (Kahn a -> f (Kahn a)) -> a -> f (a, a)
vgrad :: Grad i o o' a => i -> o
vgrad' :: Grad i o o' a => i -> o'
class Num a => Grad i o o' a | i -> a o o', o -> a i o', o' -> a i o


-- | This module provides reverse-mode Automatic Differentiation using
--   post-hoc linear time topological sorting.
--   
--   For reverse mode AD we use <a>StableName</a> to recover sharing
--   information from the tape to avoid combinatorial explosion, and thus
--   run asymptotically faster than it could without such sharing
--   information, but the use of side-effects contained herein is benign.
module Numeric.AD.Mode.Kahn
data AD s a

-- | <tt>Kahn</tt> is a <a>Mode</a> using reverse-mode automatic
--   differentiation that provides fast <tt>diffFU</tt>, <tt>diff2FU</tt>,
--   <tt>grad</tt>, <tt>grad2</tt> and a fast <tt>jacobian</tt> when you
--   have a significantly smaller number of outputs than inputs.
data Kahn a

-- | Embed a constant
auto :: Mode t => Scalar t -> t

-- | The <a>grad</a> function calculates the gradient of a
--   non-scalar-to-scalar function with kahn-mode AD in a single pass.
--   
--   <pre>
--   &gt;&gt;&gt; grad (\[x,y,z] -&gt; x*y+z) [1,2,3]
--   [2,1,1]
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; grad (\[x,y] -&gt; x**y) [0,2]
--   [0.0,NaN]
--   </pre>
grad :: (Traversable f, Num a) => (forall s. f (AD s (Kahn a)) -> AD s (Kahn a)) -> f a -> f a

-- | The <a>grad'</a> function calculates the result and gradient of a
--   non-scalar-to-scalar function with kahn-mode AD in a single pass.
--   
--   <pre>
--   &gt;&gt;&gt; grad' (\[x,y,z] -&gt; 4*x*exp y+cos z) [1,2,3]
--   (28.566231899122155,[29.5562243957226,29.5562243957226,-0.1411200080598672])
--   </pre>
grad' :: (Traversable f, Num a) => (forall s. f (AD s (Kahn a)) -> AD s (Kahn a)) -> f a -> (a, f a)

-- | <tt><a>grad</a> g f</tt> function calculates the gradient of a
--   non-scalar-to-scalar function <tt>f</tt> with kahn-mode AD in a single
--   pass. The gradient is combined element-wise with the argument using
--   the function <tt>g</tt>.
--   
--   <pre>
--   <a>grad</a> = <a>gradWith</a> (_ dx -&gt; dx)
--   <a>id</a> = <a>gradWith</a> const
--   </pre>
gradWith :: (Traversable f, Num a) => (a -> a -> b) -> (forall s. f (AD s (Kahn a)) -> AD s (Kahn a)) -> f a -> f b

-- | <tt><a>grad'</a> g f</tt> calculates the result and gradient of a
--   non-scalar-to-scalar function <tt>f</tt> with kahn-mode AD in a single
--   pass the gradient is combined element-wise with the argument using the
--   function <tt>g</tt>.
--   
--   <pre>
--   <a>grad'</a> == <a>gradWith'</a> (_ dx -&gt; dx)
--   </pre>
gradWith' :: (Traversable f, Num a) => (a -> a -> b) -> (forall s. f (AD s (Kahn a)) -> AD s (Kahn a)) -> f a -> (a, f b)

-- | The <a>jacobian</a> function calculates the jacobian of a
--   non-scalar-to-non-scalar function with kahn AD lazily in <tt>m</tt>
--   passes for <tt>m</tt> outputs.
--   
--   <pre>
--   &gt;&gt;&gt; jacobian (\[x,y] -&gt; [y,x,x*y]) [2,1]
--   [[0,1],[1,0],[1,2]]
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; jacobian (\[x,y] -&gt; [exp y,cos x,x+y]) [1,2]
--   [[0.0,7.38905609893065],[-0.8414709848078965,0.0],[1.0,1.0]]
--   </pre>
jacobian :: (Traversable f, Functor g, Num a) => (forall s. f (AD s (Kahn a)) -> g (AD s (Kahn a))) -> f a -> g (f a)

-- | The <a>jacobian'</a> function calculates both the result and the
--   Jacobian of a nonscalar-to-nonscalar function, using <tt>m</tt>
--   invocations of kahn AD, where <tt>m</tt> is the output dimensionality.
--   Applying <tt>fmap snd</tt> to the result will recover the result of
--   <a>jacobian</a> | An alias for <tt>gradF'</tt>
--   
--   ghci&gt; jacobian' ([x,y] -&gt; [y,x,x*y]) [2,1]
--   [(1,[0,1]),(2,[1,0]),(2,[1,2])]
jacobian' :: (Traversable f, Functor g, Num a) => (forall s. f (AD s (Kahn a)) -> g (AD s (Kahn a))) -> f a -> g (a, f a)

-- | 'jacobianWith g f' calculates the Jacobian of a
--   non-scalar-to-non-scalar function <tt>f</tt> with kahn AD lazily in
--   <tt>m</tt> passes for <tt>m</tt> outputs.
--   
--   Instead of returning the Jacobian matrix, the elements of the matrix
--   are combined with the input using the <tt>g</tt>.
--   
--   <pre>
--   <a>jacobian</a> = <a>jacobianWith</a> (_ dx -&gt; dx)
--   <a>jacobianWith</a> <a>const</a> = (f x -&gt; <a>const</a> x <a>&lt;$&gt;</a> f x)
--   </pre>
jacobianWith :: (Traversable f, Functor g, Num a) => (a -> a -> b) -> (forall s. f (AD s (Kahn a)) -> g (AD s (Kahn a))) -> f a -> g (f b)

-- | <a>jacobianWith</a> g f' calculates both the result and the Jacobian
--   of a nonscalar-to-nonscalar function <tt>f</tt>, using <tt>m</tt>
--   invocations of kahn AD, where <tt>m</tt> is the output dimensionality.
--   Applying <tt>fmap snd</tt> to the result will recover the result of
--   <a>jacobianWith</a>
--   
--   Instead of returning the Jacobian matrix, the elements of the matrix
--   are combined with the input using the <tt>g</tt>.
--   
--   <pre>
--   <a>jacobian'</a> == <a>jacobianWith'</a> (_ dx -&gt; dx)
--   </pre>
jacobianWith' :: (Traversable f, Functor g, Num a) => (a -> a -> b) -> (forall s. f (AD s (Kahn a)) -> g (AD s (Kahn a))) -> f a -> g (a, f b)

-- | Compute the <a>hessian</a> via the <a>jacobian</a> of the gradient.
--   gradient is computed in <a>Kahn</a> mode and then the <a>jacobian</a>
--   is computed in <a>Kahn</a> mode.
--   
--   However, since the <tt><a>grad</a> f :: f a -&gt; f a</tt> is square
--   this is not as fast as using the forward-mode <a>jacobian</a> of a
--   reverse mode gradient provided by <a>hessian</a>.
--   
--   <pre>
--   &gt;&gt;&gt; hessian (\[x,y] -&gt; x*y) [1,2]
--   [[0,1],[1,0]]
--   </pre>
hessian :: (Traversable f, Num a) => (forall s. f (AD s (On (Kahn (Kahn a)))) -> AD s (On (Kahn (Kahn a)))) -> f a -> f (f a)

-- | Compute the order 3 Hessian tensor on a non-scalar-to-non-scalar
--   function via the <a>Kahn</a>-mode Jacobian of the <a>Kahn</a>-mode
--   Jacobian of the function.
--   
--   Less efficient than <a>hessianF</a>.
--   
--   <pre>
--   &gt;&gt;&gt; hessianF (\[x,y] -&gt; [x*y,x+y,exp x*cos y]) [1,2]
--   [[[0.0,1.0],[1.0,0.0]],[[0.0,0.0],[0.0,0.0]],[[-1.1312043837568135,-2.4717266720048188],[-2.4717266720048188,1.1312043837568135]]]
--   </pre>
hessianF :: (Traversable f, Functor g, Num a) => (forall s. f (AD s (On (Kahn (Kahn a)))) -> g (AD s (On (Kahn (Kahn a))))) -> f a -> g (f (f a))

-- | Compute the derivative of a function.
--   
--   <pre>
--   &gt;&gt;&gt; diff sin 0
--   1.0
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; cos 0
--   1.0
--   </pre>
diff :: Num a => (forall s. AD s (Kahn a) -> AD s (Kahn a)) -> a -> a

-- | The <a>diff'</a> function calculates the value and derivative, as a
--   pair, of a scalar-to-scalar function.
--   
--   <pre>
--   &gt;&gt;&gt; diff' sin 0
--   (0.0,1.0)
--   </pre>
diff' :: Num a => (forall s. AD s (Kahn a) -> AD s (Kahn a)) -> a -> (a, a)

-- | Compute the derivatives of a function that returns a vector with
--   regards to its single input.
--   
--   <pre>
--   &gt;&gt;&gt; diffF (\a -&gt; [sin a, cos a]) 0
--   [1.0,0.0]
--   </pre>
diffF :: (Functor f, Num a) => (forall s. AD s (Kahn a) -> f (AD s (Kahn a))) -> a -> f a

-- | Compute the derivatives of a function that returns a vector with
--   regards to its single input as well as the primal answer.
--   
--   <pre>
--   &gt;&gt;&gt; diffF' (\a -&gt; [sin a, cos a]) 0
--   [(0.0,1.0),(1.0,0.0)]
--   </pre>
diffF' :: (Functor f, Num a) => (forall s. AD s (Kahn a) -> f (AD s (Kahn a))) -> a -> f (a, a)


module Numeric.AD.Rank1.Newton

-- | The <a>findZero</a> function finds a zero of a scalar function using
--   Newton's method; its output is a stream of increasingly accurate
--   results. (Modulo the usual caveats.) If the stream becomes constant
--   ("it converges"), no further elements are returned.
--   
--   Examples:
--   
--   <pre>
--   &gt;&gt;&gt; take 10 $ findZero (\x-&gt;x^2-4) 1
--   [1.0,2.5,2.05,2.000609756097561,2.0000000929222947,2.000000000000002,2.0]
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; last $ take 10 $ findZero ((+1).(^2)) (1 :+ 1)
--   0.0 :+ 1.0
--   </pre>
findZero :: (Fractional a, Eq a) => (Forward a -> Forward a) -> a -> [a]

-- | The <a>findZeroNoEq</a> function behaves the same as <a>findZero</a>
--   except that it doesn't truncate the list once the results become
--   constant. This means it can be used with types without an <a>Eq</a>
--   instance.
findZeroNoEq :: Fractional a => (Forward a -> Forward a) -> a -> [a]

-- | The <a>inverse</a> function inverts a scalar function using Newton's
--   method; its output is a stream of increasingly accurate results.
--   (Modulo the usual caveats.) If the stream becomes constant ("it
--   converges"), no further elements are returned.
--   
--   Example:
--   
--   <pre>
--   &gt;&gt;&gt; last $ take 10 $ inverse sqrt 1 (sqrt 10)
--   10.0
--   </pre>
inverse :: (Fractional a, Eq a) => (Forward a -> Forward a) -> a -> a -> [a]

-- | The <a>inverseNoEq</a> function behaves the same as <a>inverse</a>
--   except that it doesn't truncate the list once the results become
--   constant. This means it can be used with types without an <a>Eq</a>
--   instance.
inverseNoEq :: Fractional a => (Forward a -> Forward a) -> a -> a -> [a]

-- | The <a>fixedPoint</a> function find a fixedpoint of a scalar function
--   using Newton's method; its output is a stream of increasingly accurate
--   results. (Modulo the usual caveats.)
--   
--   If the stream becomes constant ("it converges"), no further elements
--   are returned.
--   
--   <pre>
--   &gt;&gt;&gt; last $ take 10 $ fixedPoint cos 1
--   0.7390851332151607
--   </pre>
fixedPoint :: (Fractional a, Eq a) => (Forward a -> Forward a) -> a -> [a]

-- | The <a>fixedPointNoEq</a> function behaves the same as
--   <a>fixedPoint</a> except that it doesn't truncate the list once the
--   results become constant. This means it can be used with types without
--   an <a>Eq</a> instance.
fixedPointNoEq :: Fractional a => (Forward a -> Forward a) -> a -> [a]

-- | The <a>extremum</a> function finds an extremum of a scalar function
--   using Newton's method; produces a stream of increasingly accurate
--   results. (Modulo the usual caveats.) If the stream becomes constant
--   ("it converges"), no further elements are returned.
--   
--   <pre>
--   &gt;&gt;&gt; last $ take 10 $ extremum cos 1
--   0.0
--   </pre>
extremum :: (Fractional a, Eq a) => (On (Forward (Forward a)) -> On (Forward (Forward a))) -> a -> [a]

-- | The <a>extremumNoEq</a> function behaves the same as <a>extremum</a>
--   except that it doesn't truncate the list once the results become
--   constant. This means it can be used with types without an <a>Eq</a>
--   instance.
extremumNoEq :: Fractional a => (On (Forward (Forward a)) -> On (Forward (Forward a))) -> a -> [a]

-- | The <a>gradientDescent</a> function performs a multivariate
--   optimization, based on the naive-gradient-descent in the file
--   <tt>stalingrad/examples/flow-tests/pre-saddle-1a.vlad</tt> from the
--   VLAD compiler Stalingrad sources. Its output is a stream of
--   increasingly accurate results. (Modulo the usual caveats.)
--   
--   It uses reverse mode automatic differentiation to compute the
--   gradient.
gradientDescent :: (Traversable f, Fractional a, Ord a) => (f (Kahn a) -> Kahn a) -> f a -> [f a]

-- | Perform a gradient descent using reverse mode automatic
--   differentiation to compute the gradient.
gradientAscent :: (Traversable f, Fractional a, Ord a) => (f (Kahn a) -> Kahn a) -> f a -> [f a]


module Numeric.AD.Newton

-- | The <a>findZero</a> function finds a zero of a scalar function using
--   Newton's method; its output is a stream of increasingly accurate
--   results. (Modulo the usual caveats.) If the stream becomes constant
--   ("it converges"), no further elements are returned.
--   
--   Examples:
--   
--   <pre>
--   &gt;&gt;&gt; take 10 $ findZero (\x-&gt;x^2-4) 1
--   [1.0,2.5,2.05,2.000609756097561,2.0000000929222947,2.000000000000002,2.0]
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; last $ take 10 $ findZero ((+1).(^2)) (1 :+ 1)
--   0.0 :+ 1.0
--   </pre>
findZero :: (Fractional a, Eq a) => (forall s. AD s (Forward a) -> AD s (Forward a)) -> a -> [a]

-- | The <a>findZeroNoEq</a> function behaves the same as <a>findZero</a>
--   except that it doesn't truncate the list once the results become
--   constant. This means it can be used with types without an <a>Eq</a>
--   instance.
findZeroNoEq :: Fractional a => (forall s. AD s (Forward a) -> AD s (Forward a)) -> a -> [a]

-- | The <a>inverse</a> function inverts a scalar function using Newton's
--   method; its output is a stream of increasingly accurate results.
--   (Modulo the usual caveats.) If the stream becomes constant ("it
--   converges"), no further elements are returned.
--   
--   Example:
--   
--   <pre>
--   &gt;&gt;&gt; last $ take 10 $ inverse sqrt 1 (sqrt 10)
--   10.0
--   </pre>
inverse :: (Fractional a, Eq a) => (forall s. AD s (Forward a) -> AD s (Forward a)) -> a -> a -> [a]

-- | The <a>inverseNoEq</a> function behaves the same as <a>inverse</a>
--   except that it doesn't truncate the list once the results become
--   constant. This means it can be used with types without an <a>Eq</a>
--   instance.
inverseNoEq :: Fractional a => (forall s. AD s (Forward a) -> AD s (Forward a)) -> a -> a -> [a]

-- | The <a>fixedPoint</a> function find a fixedpoint of a scalar function
--   using Newton's method; its output is a stream of increasingly accurate
--   results. (Modulo the usual caveats.)
--   
--   If the stream becomes constant ("it converges"), no further elements
--   are returned.
--   
--   <pre>
--   &gt;&gt;&gt; last $ take 10 $ fixedPoint cos 1
--   0.7390851332151607
--   </pre>
fixedPoint :: (Fractional a, Eq a) => (forall s. AD s (Forward a) -> AD s (Forward a)) -> a -> [a]

-- | The <a>fixedPointNoEq</a> function behaves the same as
--   <a>fixedPoint</a> except that it doesn't truncate the list once the
--   results become constant. This means it can be used with types without
--   an <a>Eq</a> instance.
fixedPointNoEq :: Fractional a => (forall s. AD s (Forward a) -> AD s (Forward a)) -> a -> [a]

-- | The <a>extremum</a> function finds an extremum of a scalar function
--   using Newton's method; produces a stream of increasingly accurate
--   results. (Modulo the usual caveats.) If the stream becomes constant
--   ("it converges"), no further elements are returned.
--   
--   <pre>
--   &gt;&gt;&gt; last $ take 10 $ extremum cos 1
--   0.0
--   </pre>
extremum :: (Fractional a, Eq a) => (forall s. AD s (On (Forward (Forward a))) -> AD s (On (Forward (Forward a)))) -> a -> [a]

-- | The <a>extremumNoEq</a> function behaves the same as <a>extremum</a>
--   except that it doesn't truncate the list once the results become
--   constant. This means it can be used with types without an <a>Eq</a>
--   instance.
extremumNoEq :: Fractional a => (forall s. AD s (On (Forward (Forward a))) -> AD s (On (Forward (Forward a)))) -> a -> [a]

-- | The <a>gradientDescent</a> function performs a multivariate
--   optimization, based on the naive-gradient-descent in the file
--   <tt>stalingrad/examples/flow-tests/pre-saddle-1a.vlad</tt> from the
--   VLAD compiler Stalingrad sources. Its output is a stream of
--   increasingly accurate results. (Modulo the usual caveats.)
--   
--   It uses reverse mode automatic differentiation to compute the
--   gradient.
gradientDescent :: (Traversable f, Fractional a, Ord a) => (forall s. Reifies s Tape => f (Reverse s a) -> Reverse s a) -> f a -> [f a]

-- | <tt>constrainedDescent obj fs env</tt> optimizes the convex function
--   <tt>obj</tt> subject to the convex constraints <tt>f &lt;= 0</tt>
--   where <tt>f <a>elem</a> fs</tt>. This is done using a log barrier to
--   model constraints (i.e. Boyd, Chapter 11.3). The returned optimal
--   point for the objective function must satisfy <tt>fs</tt>, but the
--   initial environment, <tt>env</tt>, needn't be feasible.
constrainedDescent :: forall f a. (Traversable f, RealFloat a, Floating a, Ord a) => (forall s. Reifies s Tape => f (Reverse s a) -> Reverse s a) -> [CC f a] -> f a -> [(a, f a)]

-- | Convex constraint, CC, is a GADT wrapper that hides the existential
--   (<tt>s</tt>) which is so prevalent in the rest of the API. This is an
--   engineering convenience for managing the skolems.
data CC f a
[CC] :: forall f a. (forall s. Reifies s Tape => f (Reverse s a) -> Reverse s a) -> CC f a
eval :: (Traversable f, Fractional a, Ord a) => (forall s. Reifies s Tape => f (Reverse s a) -> Reverse s a) -> f a -> a

-- | Perform a gradient descent using reverse mode automatic
--   differentiation to compute the gradient.
gradientAscent :: (Traversable f, Fractional a, Ord a) => (forall s. Reifies s Tape => f (Reverse s a) -> Reverse s a) -> f a -> [f a]

-- | Perform a conjugate gradient descent using reverse mode automatic
--   differentiation to compute the gradient, and using forward-on-forward
--   mode for computing extrema.
--   
--   <pre>
--   &gt;&gt;&gt; let sq x = x * x
--   
--   &gt;&gt;&gt; let rosenbrock [x,y] = sq (1 - x) + 100 * sq (y - sq x)
--   
--   &gt;&gt;&gt; rosenbrock [0,0]
--   1
--   
--   &gt;&gt;&gt; rosenbrock (conjugateGradientDescent rosenbrock [0, 0] !! 5) &lt; 0.1
--   True
--   </pre>
conjugateGradientDescent :: (Traversable f, Ord a, Fractional a) => (forall s. Chosen s => f (Or s (On (Forward (Forward a))) (Kahn a)) -> Or s (On (Forward (Forward a))) (Kahn a)) -> f a -> [f a]

-- | Perform a conjugate gradient ascent using reverse mode automatic
--   differentiation to compute the gradient.
conjugateGradientAscent :: (Traversable f, Ord a, Fractional a) => (forall s. Chosen s => f (Or s (On (Forward (Forward a))) (Kahn a)) -> Or s (On (Forward (Forward a))) (Kahn a)) -> f a -> [f a]

-- | The <a>stochasticGradientDescent</a> function approximates the true
--   gradient of the constFunction by a gradient at a single example. As
--   the algorithm sweeps through the training set, it performs the update
--   for each training example.
--   
--   It uses reverse mode automatic differentiation to compute the gradient
--   The learning rate is constant through out, and is set to 0.001
stochasticGradientDescent :: (Traversable f, Fractional a, Ord a) => (forall s. Reifies s Tape => f (Scalar a) -> f (Reverse s a) -> Reverse s a) -> [f (Scalar a)] -> f a -> [f a]
instance Data.Traversable.Traversable f => Data.Traversable.Traversable (Numeric.AD.Newton.SEnv f)
instance Data.Foldable.Foldable f => Data.Foldable.Foldable (Numeric.AD.Newton.SEnv f)
instance GHC.Base.Functor f => GHC.Base.Functor (Numeric.AD.Newton.SEnv f)


module Numeric.AD.Rank1.Newton.Double

-- | The <a>findZero</a> function finds a zero of a scalar function using
--   Newton's method; its output is a stream of increasingly accurate
--   results. (Modulo the usual caveats.) If the stream becomes constant
--   ("it converges"), no further elements are returned.
--   
--   Examples:
--   
--   <pre>
--   &gt;&gt;&gt; take 10 $ findZero (\x-&gt;x^2-4) 1
--   [1.0,2.5,2.05,2.000609756097561,2.0000000929222947,2.000000000000002,2.0]
--   </pre>
findZero :: (ForwardDouble -> ForwardDouble) -> Double -> [Double]

-- | The <a>findZeroNoEq</a> function behaves the same as <a>findZero</a>
--   except that it doesn't truncate the list once the results become
--   constant.
findZeroNoEq :: (ForwardDouble -> ForwardDouble) -> Double -> [Double]

-- | The <a>inverse</a> function inverts a scalar function using Newton's
--   method; its output is a stream of increasingly accurate results.
--   (Modulo the usual caveats.) If the stream becomes constant ("it
--   converges"), no further elements are returned.
--   
--   Example:
--   
--   <pre>
--   &gt;&gt;&gt; last $ take 10 $ inverse sqrt 1 (sqrt 10)
--   10.0
--   </pre>
inverse :: (ForwardDouble -> ForwardDouble) -> Double -> Double -> [Double]

-- | The <a>inverseNoEq</a> function behaves the same as <a>inverse</a>
--   except that it doesn't truncate the list once the results become
--   constant.
inverseNoEq :: (ForwardDouble -> ForwardDouble) -> Double -> Double -> [Double]

-- | The <a>fixedPoint</a> function find a fixedpoint of a scalar function
--   using Newton's method; its output is a stream of increasingly accurate
--   results. (Modulo the usual caveats.)
--   
--   If the stream becomes constant ("it converges"), no further elements
--   are returned.
--   
--   <pre>
--   &gt;&gt;&gt; last $ take 10 $ fixedPoint cos 1
--   0.7390851332151607
--   </pre>
fixedPoint :: (ForwardDouble -> ForwardDouble) -> Double -> [Double]

-- | The <a>fixedPointNoEq</a> function behaves the same as
--   <a>fixedPoint</a> except that doesn't truncate the list once the
--   results become constant.
fixedPointNoEq :: (ForwardDouble -> ForwardDouble) -> Double -> [Double]

-- | The <a>extremum</a> function finds an extremum of a scalar function
--   using Newton's method; produces a stream of increasingly accurate
--   results. (Modulo the usual caveats.) If the stream becomes constant
--   ("it converges"), no further elements are returned.
--   
--   <pre>
--   &gt;&gt;&gt; last $ take 10 $ extremum cos 1
--   0.0
--   </pre>
extremum :: (On (Forward ForwardDouble) -> On (Forward ForwardDouble)) -> Double -> [Double]

-- | The <a>extremumNoEq</a> function behaves the same as <a>extremum</a>
--   except that it doesn't truncate the list once the results become
--   constant.
extremumNoEq :: (On (Forward ForwardDouble) -> On (Forward ForwardDouble)) -> Double -> [Double]


module Numeric.AD.Newton.Double

-- | The <a>findZero</a> function finds a zero of a scalar function using
--   Newton's method; its output is a stream of increasingly accurate
--   results. (Modulo the usual caveats.) If the stream becomes constant
--   ("it converges"), no further elements are returned.
--   
--   Examples:
--   
--   <pre>
--   &gt;&gt;&gt; take 10 $ findZero (\x-&gt;x^2-4) 1
--   [1.0,2.5,2.05,2.000609756097561,2.0000000929222947,2.000000000000002,2.0]
--   </pre>
findZero :: (forall s. AD s ForwardDouble -> AD s ForwardDouble) -> Double -> [Double]

-- | The <a>findZeroNoEq</a> function behaves the same as <a>findZero</a>
--   except that it doesn't truncate the list once the results become
--   constant.
findZeroNoEq :: (forall s. AD s ForwardDouble -> AD s ForwardDouble) -> Double -> [Double]

-- | The <a>inverse</a> function inverts a scalar function using Newton's
--   method; its output is a stream of increasingly accurate results.
--   (Modulo the usual caveats.) If the stream becomes constant ("it
--   converges"), no further elements are returned.
--   
--   Example:
--   
--   <pre>
--   &gt;&gt;&gt; last $ take 10 $ inverse sqrt 1 (sqrt 10)
--   10.0
--   </pre>
inverse :: (forall s. AD s ForwardDouble -> AD s ForwardDouble) -> Double -> Double -> [Double]

-- | The <a>inverseNoEq</a> function behaves the same as <a>inverse</a>
--   except that it doesn't truncate the list once the results become
--   constant.
inverseNoEq :: (forall s. AD s ForwardDouble -> AD s ForwardDouble) -> Double -> Double -> [Double]

-- | The <a>fixedPoint</a> function find a fixedpoint of a scalar function
--   using Newton's method; its output is a stream of increasingly accurate
--   results. (Modulo the usual caveats.)
--   
--   If the stream becomes constant ("it converges"), no further elements
--   are returned.
--   
--   <pre>
--   &gt;&gt;&gt; last $ take 10 $ fixedPoint cos 1
--   0.7390851332151607
--   </pre>
fixedPoint :: (forall s. AD s ForwardDouble -> AD s ForwardDouble) -> Double -> [Double]

-- | The <a>fixedPointNoEq</a> function behaves the same as
--   <a>fixedPoint</a> except that doesn't truncate the list once the
--   results become constant.
fixedPointNoEq :: (forall s. AD s ForwardDouble -> AD s ForwardDouble) -> Double -> [Double]

-- | The <a>extremum</a> function finds an extremum of a scalar function
--   using Newton's method; produces a stream of increasingly accurate
--   results. (Modulo the usual caveats.) If the stream becomes constant
--   ("it converges"), no further elements are returned.
--   
--   <pre>
--   &gt;&gt;&gt; last $ take 10 $ extremum cos 1
--   0.0
--   </pre>
extremum :: (forall s. AD s (On (Forward ForwardDouble)) -> AD s (On (Forward ForwardDouble))) -> Double -> [Double]

-- | The <a>extremumNoEq</a> function behaves the same as <a>extremum</a>
--   except that it doesn't truncate the list once the results become
--   constant.
extremumNoEq :: (forall s. AD s (On (Forward ForwardDouble)) -> AD s (On (Forward ForwardDouble))) -> Double -> [Double]

-- | Perform a conjugate gradient descent using reverse mode automatic
--   differentiation to compute the gradient, and using forward-on-forward
--   mode for computing extrema.
--   
--   <pre>
--   &gt;&gt;&gt; let sq x = x * x
--   
--   &gt;&gt;&gt; let rosenbrock [x,y] = sq (1 - x) + 100 * sq (y - sq x)
--   
--   &gt;&gt;&gt; rosenbrock [0,0]
--   1
--   
--   &gt;&gt;&gt; rosenbrock (conjugateGradientDescent rosenbrock [0, 0] !! 5) &lt; 0.1
--   True
--   </pre>
conjugateGradientDescent :: Traversable f => (forall s. Chosen s => f (Or s (On (Forward ForwardDouble)) (Kahn Double)) -> Or s (On (Forward ForwardDouble)) (Kahn Double)) -> f Double -> [f Double]

-- | Perform a conjugate gradient ascent using reverse mode automatic
--   differentiation to compute the gradient.
conjugateGradientAscent :: Traversable f => (forall s. Chosen s => f (Or s (On (Forward ForwardDouble)) (Kahn Double)) -> Or s (On (Forward ForwardDouble)) (Kahn Double)) -> f Double -> [f Double]


-- | Higher order derivatives via a "dual number tower".
module Numeric.AD.Rank1.Sparse

-- | We only store partials in sorted order, so the map contained in a
--   partial will only contain partials with equal or greater keys to that
--   of the map in which it was found. This should be key for efficiently
--   computing sparse hessians. there are only (n + k - 1) choose (k - 1)
--   distinct nth partial derivatives of a function with k inputs.
data Sparse a

-- | Embed a constant
auto :: Mode t => Scalar t -> t
grad :: (Traversable f, Num a) => (f (Sparse a) -> Sparse a) -> f a -> f a
grad' :: (Traversable f, Num a) => (f (Sparse a) -> Sparse a) -> f a -> (a, f a)
gradWith :: (Traversable f, Num a) => (a -> a -> b) -> (f (Sparse a) -> Sparse a) -> f a -> f b
gradWith' :: (Traversable f, Num a) => (a -> a -> b) -> (f (Sparse a) -> Sparse a) -> f a -> (a, f b)
class Num a => Grad i o o' a | i -> a o o', o -> a i o', o' -> a i o
vgrad :: Grad i o o' a => i -> o
grads :: (Traversable f, Num a) => (f (Sparse a) -> Sparse a) -> f a -> Cofree f a
class Num a => Grads i o a | i -> a o, o -> a i
vgrads :: Grads i o a => i -> o
jacobian :: (Traversable f, Functor g, Num a) => (f (Sparse a) -> g (Sparse a)) -> f a -> g (f a)
jacobian' :: (Traversable f, Functor g, Num a) => (f (Sparse a) -> g (Sparse a)) -> f a -> g (a, f a)
jacobianWith :: (Traversable f, Functor g, Num a) => (a -> a -> b) -> (f (Sparse a) -> g (Sparse a)) -> f a -> g (f b)
jacobianWith' :: (Traversable f, Functor g, Num a) => (a -> a -> b) -> (f (Sparse a) -> g (Sparse a)) -> f a -> g (a, f b)
jacobians :: (Traversable f, Functor g, Num a) => (f (Sparse a) -> g (Sparse a)) -> f a -> g (Cofree f a)
hessian :: (Traversable f, Num a) => (f (Sparse a) -> Sparse a) -> f a -> f (f a)
hessian' :: (Traversable f, Num a) => (f (Sparse a) -> Sparse a) -> f a -> (a, f (a, f a))
hessianF :: (Traversable f, Functor g, Num a) => (f (Sparse a) -> g (Sparse a)) -> f a -> g (f (f a))
hessianF' :: (Traversable f, Functor g, Num a) => (f (Sparse a) -> g (Sparse a)) -> f a -> g (a, f (a, f a))


-- | Higher order derivatives via a "dual number tower".
module Numeric.AD.Mode.Sparse
data AD s a

-- | We only store partials in sorted order, so the map contained in a
--   partial will only contain partials with equal or greater keys to that
--   of the map in which it was found. This should be key for efficiently
--   computing sparse hessians. there are only (n + k - 1) choose (k - 1)
--   distinct nth partial derivatives of a function with k inputs.
data Sparse a

-- | Embed a constant
auto :: Mode t => Scalar t -> t

-- | The <a>grad</a> function calculates the gradient of a
--   non-scalar-to-scalar function with sparse-mode AD in a single pass.
--   
--   <pre>
--   &gt;&gt;&gt; grad (\[x,y,z] -&gt; x*y+z) [1,2,3]
--   [2,1,1]
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; grad (\[x,y] -&gt; x**y) [0,2]
--   [0.0,NaN]
--   </pre>
grad :: (Traversable f, Num a) => (forall s. f (AD s (Sparse a)) -> AD s (Sparse a)) -> f a -> f a
grad' :: (Traversable f, Num a) => (forall s. f (AD s (Sparse a)) -> AD s (Sparse a)) -> f a -> (a, f a)
grads :: (Traversable f, Num a) => (forall s. f (AD s (Sparse a)) -> AD s (Sparse a)) -> f a -> Cofree f a
gradWith :: (Traversable f, Num a) => (a -> a -> b) -> (forall s. f (AD s (Sparse a)) -> AD s (Sparse a)) -> f a -> f b
gradWith' :: (Traversable f, Num a) => (a -> a -> b) -> (forall s. f (AD s (Sparse a)) -> AD s (Sparse a)) -> f a -> (a, f b)
jacobian :: (Traversable f, Functor g, Num a) => (forall s. f (AD s (Sparse a)) -> g (AD s (Sparse a))) -> f a -> g (f a)
jacobian' :: (Traversable f, Functor g, Num a) => (forall s. f (AD s (Sparse a)) -> g (AD s (Sparse a))) -> f a -> g (a, f a)
jacobianWith :: (Traversable f, Functor g, Num a) => (a -> a -> b) -> (forall s. f (AD s (Sparse a)) -> g (AD s (Sparse a))) -> f a -> g (f b)
jacobianWith' :: (Traversable f, Functor g, Num a) => (a -> a -> b) -> (forall s. f (AD s (Sparse a)) -> g (AD s (Sparse a))) -> f a -> g (a, f b)
jacobians :: (Traversable f, Functor g, Num a) => (forall s. f (AD s (Sparse a)) -> g (AD s (Sparse a))) -> f a -> g (Cofree f a)
hessian :: (Traversable f, Num a) => (forall s. f (AD s (Sparse a)) -> AD s (Sparse a)) -> f a -> f (f a)
hessian' :: (Traversable f, Num a) => (forall s. f (AD s (Sparse a)) -> AD s (Sparse a)) -> f a -> (a, f (a, f a))
hessianF :: (Traversable f, Functor g, Num a) => (forall s. f (AD s (Sparse a)) -> g (AD s (Sparse a))) -> f a -> g (f (f a))
hessianF' :: (Traversable f, Functor g, Num a) => (forall s. f (AD s (Sparse a)) -> g (AD s (Sparse a))) -> f a -> g (a, f (a, f a))


-- | Higher order derivatives via a "dual number tower".
module Numeric.AD.Rank1.Tower

-- | <tt>Tower</tt> is an AD <a>Mode</a> that calculates a tangent tower by
--   forward AD, and provides fast <tt>diffsUU</tt>, <tt>diffsUF</tt>
data Tower a

-- | Embed a constant
auto :: Mode t => Scalar t -> t

-- | <tt>taylor f x</tt> compute the Taylor series of <tt>f</tt> around
--   <tt>x</tt>.
taylor :: Fractional a => (Tower a -> Tower a) -> a -> a -> [a]

-- | <tt>taylor0 f x</tt> compute the Taylor series of <tt>f</tt> around
--   <tt>x</tt>, zero-padded.
taylor0 :: Fractional a => (Tower a -> Tower a) -> a -> a -> [a]

-- | <tt>maclaurin f</tt> compute the Maclaurin series of <tt>f</tt>
maclaurin :: Fractional a => (Tower a -> Tower a) -> a -> [a]

-- | <tt>maclaurin f</tt> compute the Maclaurin series of <tt>f</tt>,
--   zero-padded
maclaurin0 :: Fractional a => (Tower a -> Tower a) -> a -> [a]

-- | Compute the first derivative of a function <tt>(a -&gt; a)</tt>
diff :: Num a => (Tower a -> Tower a) -> a -> a

-- | Compute the answer and first derivative of a function <tt>(a -&gt;
--   a)</tt>
diff' :: Num a => (Tower a -> Tower a) -> a -> (a, a)

-- | Compute the answer and all derivatives of a function <tt>(a -&gt;
--   a)</tt>
diffs :: Num a => (Tower a -> Tower a) -> a -> [a]

-- | Compute the zero-padded derivatives of a function <tt>(a -&gt; a)</tt>
diffs0 :: Num a => (Tower a -> Tower a) -> a -> [a]

-- | Compute the answer and all derivatives of a function <tt>(a -&gt; f
--   a)</tt>
diffsF :: (Functor f, Num a) => (Tower a -> f (Tower a)) -> a -> f [a]

-- | Compute the zero-padded derivatives of a function <tt>(a -&gt; f
--   a)</tt>
diffs0F :: (Functor f, Num a) => (Tower a -> f (Tower a)) -> a -> f [a]

-- | Compute a directional derivative of a function <tt>(f a -&gt; a)</tt>
du :: (Functor f, Num a) => (f (Tower a) -> Tower a) -> f (a, a) -> a

-- | Compute the answer and a directional derivative of a function <tt>(f a
--   -&gt; a)</tt>
du' :: (Functor f, Num a) => (f (Tower a) -> Tower a) -> f (a, a) -> (a, a)

-- | Given a function <tt>(f a -&gt; a)</tt>, and a tower of derivatives,
--   compute the corresponding directional derivatives.
dus :: (Functor f, Num a) => (f (Tower a) -> Tower a) -> f [a] -> [a]

-- | Given a function <tt>(f a -&gt; a)</tt>, and a tower of derivatives,
--   compute the corresponding directional derivatives, zero-padded
dus0 :: (Functor f, Num a) => (f (Tower a) -> Tower a) -> f [a] -> [a]

-- | Compute a directional derivative of a function <tt>(f a -&gt; g
--   a)</tt>
duF :: (Functor f, Functor g, Num a) => (f (Tower a) -> g (Tower a)) -> f (a, a) -> g a

-- | Compute the answer and a directional derivative of a function <tt>(f a
--   -&gt; g a)</tt>
duF' :: (Functor f, Functor g, Num a) => (f (Tower a) -> g (Tower a)) -> f (a, a) -> g (a, a)

-- | Given a function <tt>(f a -&gt; g a)</tt>, and a tower of derivatives,
--   compute the corresponding directional derivatives
dusF :: (Functor f, Functor g, Num a) => (f (Tower a) -> g (Tower a)) -> f [a] -> g [a]

-- | Given a function <tt>(f a -&gt; g a)</tt>, and a tower of derivatives,
--   compute the corresponding directional derivatives, zero-padded
dus0F :: (Functor f, Functor g, Num a) => (f (Tower a) -> g (Tower a)) -> f [a] -> g [a]


-- | Root finding using Halley's rational method (the second in the class
--   of Householder methods). Assumes the function is three times
--   continuously differentiable and converges cubically when progress can
--   be made.
module Numeric.AD.Rank1.Halley

-- | The <a>findZero</a> function finds a zero of a scalar function using
--   Halley's method; its output is a stream of increasingly accurate
--   results. (Modulo the usual caveats.) If the stream becomes constant
--   ("it converges"), no further elements are returned.
--   
--   Examples:
--   
--   <pre>
--   &gt;&gt;&gt; take 10 $ findZero (\x-&gt;x^2-4) 1
--   [1.0,1.8571428571428572,1.9997967892704736,1.9999999999994755,2.0]
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; last $ take 10 $ findZero ((+1).(^2)) (1 :+ 1)
--   0.0 :+ 1.0
--   </pre>
findZero :: (Fractional a, Eq a) => (Tower a -> Tower a) -> a -> [a]

-- | The <a>findZeroNoEq</a> function behaves the same as <a>findZero</a>
--   except that it doesn't truncate the list once the results become
--   constant. This means it can be used with types without an <a>Eq</a>
--   instance.
findZeroNoEq :: Fractional a => (Tower a -> Tower a) -> a -> [a]

-- | The <a>inverse</a> function inverts a scalar function using Halley's
--   method; its output is a stream of increasingly accurate results.
--   (Modulo the usual caveats.) If the stream becomes constant ("it
--   converges"), no further elements are returned.
--   
--   Note: the <tt>take 10 $ inverse sqrt 1 (sqrt 10)</tt> example that
--   works for Newton's method fails with Halley's method because the
--   preconditions do not hold!
inverse :: (Fractional a, Eq a) => (Tower a -> Tower a) -> a -> a -> [a]

-- | The <a>inverseNoEq</a> function behaves the same as <a>inverse</a>
--   except that it doesn't truncate the list once the results become
--   constant. This means it can be used with types without an <a>Eq</a>
--   instance.
inverseNoEq :: Fractional a => (Tower a -> Tower a) -> a -> a -> [a]

-- | The <a>fixedPoint</a> function find a fixedpoint of a scalar function
--   using Halley's method; its output is a stream of increasingly accurate
--   results. (Modulo the usual caveats.)
--   
--   If the stream becomes constant ("it converges"), no further elements
--   are returned.
--   
--   <pre>
--   &gt;&gt;&gt; last $ take 10 $ fixedPoint cos 1
--   0.7390851332151607
--   </pre>
fixedPoint :: (Fractional a, Eq a) => (Tower a -> Tower a) -> a -> [a]

-- | The <a>fixedPointNoEq</a> function behaves the same as
--   <a>fixedPoint</a> except that it doesn't truncate the list once the
--   results become constant. This means it can be used with types without
--   an <a>Eq</a> instance.
fixedPointNoEq :: Fractional a => (Tower a -> Tower a) -> a -> [a]

-- | The <a>extremum</a> function finds an extremum of a scalar function
--   using Halley's method; produces a stream of increasingly accurate
--   results. (Modulo the usual caveats.) If the stream becomes constant
--   ("it converges"), no further elements are returned.
--   
--   <pre>
--   &gt;&gt;&gt; take 10 $ extremum cos 1
--   [1.0,0.29616942658570555,4.59979519460002e-3,1.6220740159042513e-8,0.0]
--   </pre>
extremum :: (Fractional a, Eq a) => (On (Forward (Tower a)) -> On (Forward (Tower a))) -> a -> [a]

-- | The <a>extremumNoEq</a> function behaves the same as <a>extremum</a>
--   except that it doesn't truncate the list once the results become
--   constant. This means it can be used with types without an <a>Eq</a>
--   instance.
extremumNoEq :: Fractional a => (On (Forward (Tower a)) -> On (Forward (Tower a))) -> a -> [a]


-- | Root finding using Halley's rational method (the second in the class
--   of Householder methods). Assumes the function is three times
--   continuously differentiable and converges cubically when progress can
--   be made.
module Numeric.AD.Halley

-- | The <a>findZero</a> function finds a zero of a scalar function using
--   Halley's method; its output is a stream of increasingly accurate
--   results. (Modulo the usual caveats.) If the stream becomes constant
--   ("it converges"), no further elements are returned.
--   
--   Examples:
--   
--   <pre>
--   &gt;&gt;&gt; take 10 $ findZero (\x-&gt;x^2-4) 1
--   [1.0,1.8571428571428572,1.9997967892704736,1.9999999999994755,2.0]
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; last $ take 10 $ findZero ((+1).(^2)) (1 :+ 1)
--   0.0 :+ 1.0
--   </pre>
findZero :: (Fractional a, Eq a) => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> [a]

-- | The <a>findZeroNoEq</a> function behaves the same as <a>findZero</a>
--   except that it doesn't truncate the list once the results become
--   constant. This means it can be used with types without an <a>Eq</a>
--   instance.
findZeroNoEq :: Fractional a => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> [a]

-- | The <a>inverse</a> function inverts a scalar function using Halley's
--   method; its output is a stream of increasingly accurate results.
--   (Modulo the usual caveats.) If the stream becomes constant ("it
--   converges"), no further elements are returned.
--   
--   Note: the <tt>take 10 $ inverse sqrt 1 (sqrt 10)</tt> example that
--   works for Newton's method fails with Halley's method because the
--   preconditions do not hold!
inverse :: (Fractional a, Eq a) => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> a -> [a]

-- | The <a>inverseNoEq</a> function behaves the same as <a>inverse</a>
--   except that it doesn't truncate the list once the results become
--   constant. This means it can be used with types without an <a>Eq</a>
--   instance.
inverseNoEq :: Fractional a => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> a -> [a]

-- | The <a>fixedPoint</a> function find a fixedpoint of a scalar function
--   using Halley's method; its output is a stream of increasingly accurate
--   results. (Modulo the usual caveats.)
--   
--   If the stream becomes constant ("it converges"), no further elements
--   are returned.
--   
--   <pre>
--   &gt;&gt;&gt; last $ take 10 $ fixedPoint cos 1
--   0.7390851332151607
--   </pre>
fixedPoint :: (Fractional a, Eq a) => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> [a]

-- | The <a>fixedPointNoEq</a> function behaves the same as
--   <a>fixedPoint</a> except that it doesn't truncate the list once the
--   results become constant. This means it can be used with types without
--   an <a>Eq</a> instance.
fixedPointNoEq :: Fractional a => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> [a]

-- | The <a>extremum</a> function finds an extremum of a scalar function
--   using Halley's method; produces a stream of increasingly accurate
--   results. (Modulo the usual caveats.) If the stream becomes constant
--   ("it converges"), no further elements are returned.
--   
--   <pre>
--   &gt;&gt;&gt; take 10 $ extremum cos 1
--   [1.0,0.29616942658570555,4.59979519460002e-3,1.6220740159042513e-8,0.0]
--   </pre>
extremum :: (Fractional a, Eq a) => (forall s. AD s (On (Forward (Tower a))) -> AD s (On (Forward (Tower a)))) -> a -> [a]

-- | The <a>extremumNoEq</a> function behaves the same as <a>extremum</a>
--   except that it doesn't truncate the list once the results become
--   constant. This means it can be used with types without an <a>Eq</a>
--   instance.
extremumNoEq :: Fractional a => (forall s. AD s (On (Forward (Tower a))) -> AD s (On (Forward (Tower a)))) -> a -> [a]


-- | Higher order derivatives via a "dual number tower".
module Numeric.AD.Mode.Tower
data AD s a

-- | <tt>Tower</tt> is an AD <a>Mode</a> that calculates a tangent tower by
--   forward AD, and provides fast <tt>diffsUU</tt>, <tt>diffsUF</tt>
data Tower a

-- | Embed a constant
auto :: Mode t => Scalar t -> t
taylor :: Fractional a => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> a -> [a]
taylor0 :: Fractional a => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> a -> [a]
maclaurin :: Fractional a => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> [a]
maclaurin0 :: Fractional a => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> [a]
diff :: Num a => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> a
diff' :: Num a => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> (a, a)
diffs :: Num a => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> [a]
diffs0 :: Num a => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> [a]
diffsF :: (Functor f, Num a) => (forall s. AD s (Tower a) -> f (AD s (Tower a))) -> a -> f [a]
diffs0F :: (Functor f, Num a) => (forall s. AD s (Tower a) -> f (AD s (Tower a))) -> a -> f [a]
du :: (Functor f, Num a) => (forall s. f (AD s (Tower a)) -> AD s (Tower a)) -> f (a, a) -> a
du' :: (Functor f, Num a) => (forall s. f (AD s (Tower a)) -> AD s (Tower a)) -> f (a, a) -> (a, a)
dus :: (Functor f, Num a) => (forall s. f (AD s (Tower a)) -> AD s (Tower a)) -> f [a] -> [a]
dus0 :: (Functor f, Num a) => (forall s. f (AD s (Tower a)) -> AD s (Tower a)) -> f [a] -> [a]
duF :: (Functor f, Functor g, Num a) => (forall s. f (AD s (Tower a)) -> g (AD s (Tower a))) -> f (a, a) -> g a
duF' :: (Functor f, Functor g, Num a) => (forall s. f (AD s (Tower a)) -> g (AD s (Tower a))) -> f (a, a) -> g (a, a)
dusF :: (Functor f, Functor g, Num a) => (forall s. f (AD s (Tower a)) -> g (AD s (Tower a))) -> f [a] -> g [a]
dus0F :: (Functor f, Functor g, Num a) => (forall s. f (AD s (Tower a)) -> g (AD s (Tower a))) -> f [a] -> g [a]


-- | Mixed-Mode Automatic Differentiation.
--   
--   Each combinator exported from this module chooses an appropriate AD
--   mode. The following basic operations are supported, modified as
--   appropriate by the suffixes below:
--   
--   <ul>
--   <li><a>grad</a> computes the gradient (partial derivatives) of a
--   function at a point</li>
--   <li><a>jacobian</a> computes the Jacobian matrix of a function at a
--   point</li>
--   <li><a>diff</a> computes the derivative of a function at a point</li>
--   <li><a>du</a> computes a directional derivative of a function at a
--   point</li>
--   <li><a>hessian</a> compute the Hessian matrix (matrix of second
--   partial derivatives) of a function at a point</li>
--   </ul>
--   
--   The suffixes have the following meanings:
--   
--   <ul>
--   <li><tt>'</tt> -- also return the answer</li>
--   <li><tt>With</tt> lets the user supply a function to blend the input
--   with the output</li>
--   <li><tt>F</tt> is a version of the base function lifted to return a
--   <a>Traversable</a> (or <a>Functor</a>) result</li>
--   <li><tt>s</tt> means the function returns all higher derivatives in a
--   list or f-branching <tt>Stream</tt></li>
--   <li><tt>T</tt> means the result is transposed with respect to the
--   traditional formulation.</li>
--   <li><tt>0</tt> means that the resulting derivative list is padded with
--   0s at the end.</li>
--   </ul>
module Numeric.AD
data AD s a
class (Num t, Num (Scalar t)) => Mode t where {
    type family Scalar t;
}

-- | Embed a constant
auto :: Mode t => Scalar t -> t

-- | The <a>grad</a> function calculates the gradient of a
--   non-scalar-to-scalar function with reverse-mode AD in a single pass.
--   
--   <pre>
--   &gt;&gt;&gt; grad (\[x,y,z] -&gt; x*y+z) [1,2,3]
--   [2,1,1]
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; grad (\[x,y] -&gt; x**y) [0,2]
--   [0.0,NaN]
--   </pre>
grad :: (Traversable f, Num a) => (forall s. Reifies s Tape => f (Reverse s a) -> Reverse s a) -> f a -> f a

-- | The <a>grad'</a> function calculates the result and gradient of a
--   non-scalar-to-scalar function with reverse-mode AD in a single pass.
--   
--   <pre>
--   &gt;&gt;&gt; grad' (\[x,y,z] -&gt; x*y+z) [1,2,3]
--   (5,[2,1,1])
--   </pre>
grad' :: (Traversable f, Num a) => (forall s. Reifies s Tape => f (Reverse s a) -> Reverse s a) -> f a -> (a, f a)

-- | <tt><a>grad</a> g f</tt> function calculates the gradient of a
--   non-scalar-to-scalar function <tt>f</tt> with reverse-mode AD in a
--   single pass. The gradient is combined element-wise with the argument
--   using the function <tt>g</tt>.
--   
--   <pre>
--   <a>grad</a> == <a>gradWith</a> (_ dx -&gt; dx)
--   <a>id</a> == <a>gradWith</a> <a>const</a>
--   </pre>
gradWith :: (Traversable f, Num a) => (a -> a -> b) -> (forall s. Reifies s Tape => f (Reverse s a) -> Reverse s a) -> f a -> f b

-- | <tt><a>grad'</a> g f</tt> calculates the result and gradient of a
--   non-scalar-to-scalar function <tt>f</tt> with reverse-mode AD in a
--   single pass the gradient is combined element-wise with the argument
--   using the function <tt>g</tt>.
--   
--   <pre>
--   <a>grad'</a> == <a>gradWith'</a> (_ dx -&gt; dx)
--   </pre>
gradWith' :: (Traversable f, Num a) => (a -> a -> b) -> (forall s. Reifies s Tape => f (Reverse s a) -> Reverse s a) -> f a -> (a, f b)
grads :: (Traversable f, Num a) => (forall s. f (AD s (Sparse a)) -> AD s (Sparse a)) -> f a -> Cofree f a
class Num a => Grad i o o' a | i -> a o o', o -> a i o', o' -> a i o
vgrad :: Grad i o o' a => i -> o
vgrad' :: Grad i o o' a => i -> o'
class Num a => Grads i o a | i -> a o, o -> a i
vgrads :: Grads i o a => i -> o

-- | The <a>jacobian</a> function calculates the jacobian of a
--   non-scalar-to-non-scalar function with reverse AD lazily in <tt>m</tt>
--   passes for <tt>m</tt> outputs.
--   
--   <pre>
--   &gt;&gt;&gt; jacobian (\[x,y] -&gt; [y,x,x*y]) [2,1]
--   [[0,1],[1,0],[1,2]]
--   </pre>
jacobian :: (Traversable f, Functor g, Num a) => (forall s. Reifies s Tape => f (Reverse s a) -> g (Reverse s a)) -> f a -> g (f a)

-- | The <a>jacobian'</a> function calculates both the result and the
--   Jacobian of a nonscalar-to-nonscalar function, using <tt>m</tt>
--   invocations of reverse AD, where <tt>m</tt> is the output
--   dimensionality. Applying <tt>fmap snd</tt> to the result will recover
--   the result of <a>jacobian</a> | An alias for <tt>gradF'</tt>
--   
--   <pre>
--   &gt;&gt;&gt; jacobian' (\[x,y] -&gt; [y,x,x*y]) [2,1]
--   [(1,[0,1]),(2,[1,0]),(2,[1,2])]
--   </pre>
jacobian' :: (Traversable f, Functor g, Num a) => (forall s. Reifies s Tape => f (Reverse s a) -> g (Reverse s a)) -> f a -> g (a, f a)

-- | 'jacobianWith g f' calculates the Jacobian of a
--   non-scalar-to-non-scalar function <tt>f</tt> with reverse AD lazily in
--   <tt>m</tt> passes for <tt>m</tt> outputs.
--   
--   Instead of returning the Jacobian matrix, the elements of the matrix
--   are combined with the input using the <tt>g</tt>.
--   
--   <pre>
--   <a>jacobian</a> == <a>jacobianWith</a> (_ dx -&gt; dx)
--   <a>jacobianWith</a> <a>const</a> == (f x -&gt; <a>const</a> x <a>&lt;$&gt;</a> f x)
--   </pre>
jacobianWith :: (Traversable f, Functor g, Num a) => (a -> a -> b) -> (forall s. Reifies s Tape => f (Reverse s a) -> g (Reverse s a)) -> f a -> g (f b)

-- | <a>jacobianWith</a> g f' calculates both the result and the Jacobian
--   of a nonscalar-to-nonscalar function <tt>f</tt>, using <tt>m</tt>
--   invocations of reverse AD, where <tt>m</tt> is the output
--   dimensionality. Applying <tt>fmap snd</tt> to the result will recover
--   the result of <a>jacobianWith</a>
--   
--   Instead of returning the Jacobian matrix, the elements of the matrix
--   are combined with the input using the <tt>g</tt>.
--   
--   <pre>
--   <a>jacobian'</a> == <a>jacobianWith'</a> (_ dx -&gt; dx)
--   </pre>
jacobianWith' :: (Traversable f, Functor g, Num a) => (a -> a -> b) -> (forall s. Reifies s Tape => f (Reverse s a) -> g (Reverse s a)) -> f a -> g (a, f b)
jacobians :: (Traversable f, Functor g, Num a) => (forall s. f (AD s (Sparse a)) -> g (AD s (Sparse a))) -> f a -> g (Cofree f a)

-- | A fast, simple, transposed Jacobian computed with forward-mode AD.
jacobianT :: (Traversable f, Functor g, Num a) => (forall s. f (AD s (Forward a)) -> g (AD s (Forward a))) -> f a -> f (g a)

-- | A fast, simple, transposed Jacobian computed with <a>Forward</a> mode
--   <a>AD</a> that combines the output with the input.
jacobianWithT :: (Traversable f, Functor g, Num a) => (a -> a -> b) -> (forall s. f (AD s (Forward a)) -> g (AD s (Forward a))) -> f a -> f (g b)

-- | Compute the Hessian via the Jacobian of the gradient. gradient is
--   computed in reverse mode and then the Jacobian is computed in sparse
--   (forward) mode.
--   
--   <pre>
--   &gt;&gt;&gt; hessian (\[x,y] -&gt; x*y) [1,2]
--   [[0,1],[1,0]]
--   </pre>
hessian :: (Traversable f, Num a) => (forall s. Reifies s Tape => f (On (Reverse s (Sparse a))) -> On (Reverse s (Sparse a))) -> f a -> f (f a)
hessian' :: (Traversable f, Num a) => (forall s. f (AD s (Sparse a)) -> AD s (Sparse a)) -> f a -> (a, f (a, f a))

-- | Compute the order 3 Hessian tensor on a non-scalar-to-non-scalar
--   function using 'Sparse'-on-'Reverse'
--   
--   <pre>
--   &gt;&gt;&gt; hessianF (\[x,y] -&gt; [x*y,x+y,exp x*cos y]) [1,2]
--   [[[0.0,1.0],[1.0,0.0]],[[0.0,0.0],[0.0,0.0]],[[-1.1312043837568135,-2.4717266720048188],[-2.4717266720048188,1.1312043837568135]]]
--   </pre>
hessianF :: (Traversable f, Functor g, Num a) => (forall s. Reifies s Tape => f (On (Reverse s (Sparse a))) -> g (On (Reverse s (Sparse a)))) -> f a -> g (f (f a))
hessianF' :: (Traversable f, Functor g, Num a) => (forall s. f (AD s (Sparse a)) -> g (AD s (Sparse a))) -> f a -> g (a, f (a, f a))

-- | <tt><a>hessianProduct</a> f wv</tt> computes the product of the
--   hessian <tt>H</tt> of a non-scalar-to-scalar function <tt>f</tt> at
--   <tt>w = <a>fst</a> <a>$</a> wv</tt> with a vector <tt>v = snd <a>$</a>
--   wv</tt> using "Pearlmutter's method" from
--   <a>http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.29.6143</a>,
--   which states:
--   
--   <pre>
--   H v = (d/dr) grad_w (w + r v) | r = 0
--   </pre>
--   
--   Or in other words, we take the directional derivative of the gradient.
--   The gradient is calculated in reverse mode, then the directional
--   derivative is calculated in forward mode.
hessianProduct :: (Traversable f, Num a) => (forall s. Reifies s Tape => f (On (Reverse s (Forward a))) -> On (Reverse s (Forward a))) -> f (a, a) -> f a

-- | <tt><a>hessianProduct'</a> f wv</tt> computes both the gradient of a
--   non-scalar-to-scalar <tt>f</tt> at <tt>w = <a>fst</a> <a>$</a> wv</tt>
--   and the product of the hessian <tt>H</tt> at <tt>w</tt> with a vector
--   <tt>v = snd <a>$</a> wv</tt> using "Pearlmutter's method". The outputs
--   are returned wrapped in the same functor.
--   
--   <pre>
--   H v = (d/dr) grad_w (w + r v) | r = 0
--   </pre>
--   
--   Or in other words, we return the gradient and the directional
--   derivative of the gradient. The gradient is calculated in reverse
--   mode, then the directional derivative is calculated in forward mode.
hessianProduct' :: (Traversable f, Num a) => (forall s. Reifies s Tape => f (On (Reverse s (Forward a))) -> On (Reverse s (Forward a))) -> f (a, a) -> f (a, a)

-- | The <a>diff</a> function calculates the first derivative of a
--   scalar-to-scalar function by forward-mode <a>AD</a>
--   
--   <pre>
--   &gt;&gt;&gt; diff sin 0
--   1.0
--   </pre>
diff :: Num a => (forall s. AD s (Forward a) -> AD s (Forward a)) -> a -> a

-- | The <a>diffF</a> function calculates the first derivatives of
--   scalar-to-nonscalar function by <a>Forward</a> mode <a>AD</a>
--   
--   <pre>
--   &gt;&gt;&gt; diffF (\a -&gt; [sin a, cos a]) 0
--   [1.0,-0.0]
--   </pre>
diffF :: (Functor f, Num a) => (forall s. AD s (Forward a) -> f (AD s (Forward a))) -> a -> f a

-- | The <a>diff'</a> function calculates the result and first derivative
--   of scalar-to-scalar function by <a>Forward</a> mode <a>AD</a>
--   
--   <pre>
--   <a>diff'</a> <a>sin</a> == <a>sin</a> <a>&amp;&amp;&amp;</a> <a>cos</a>
--   <a>diff'</a> f = f <a>&amp;&amp;&amp;</a> d f
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; diff' sin 0
--   (0.0,1.0)
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; diff' exp 0
--   (1.0,1.0)
--   </pre>
diff' :: Num a => (forall s. AD s (Forward a) -> AD s (Forward a)) -> a -> (a, a)

-- | The <a>diffF'</a> function calculates the result and first derivatives
--   of a scalar-to-non-scalar function by <a>Forward</a> mode <a>AD</a>
--   
--   <pre>
--   &gt;&gt;&gt; diffF' (\a -&gt; [sin a, cos a]) 0
--   [(0.0,1.0),(1.0,-0.0)]
--   </pre>
diffF' :: (Functor f, Num a) => (forall s. AD s (Forward a) -> f (AD s (Forward a))) -> a -> f (a, a)
diffs :: Num a => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> [a]
diffsF :: (Functor f, Num a) => (forall s. AD s (Tower a) -> f (AD s (Tower a))) -> a -> f [a]
diffs0 :: Num a => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> [a]
diffs0F :: (Functor f, Num a) => (forall s. AD s (Tower a) -> f (AD s (Tower a))) -> a -> f [a]

-- | Compute the directional derivative of a function given a zipped up
--   <a>Functor</a> of the input values and their derivatives
du :: (Functor f, Num a) => (forall s. f (AD s (Forward a)) -> AD s (Forward a)) -> f (a, a) -> a

-- | Compute the answer and directional derivative of a function given a
--   zipped up <a>Functor</a> of the input values and their derivatives
du' :: (Functor f, Num a) => (forall s. f (AD s (Forward a)) -> AD s (Forward a)) -> f (a, a) -> (a, a)

-- | Compute a vector of directional derivatives for a function given a
--   zipped up <a>Functor</a> of the input values and their derivatives.
duF :: (Functor f, Functor g, Num a) => (forall s. f (AD s (Forward a)) -> g (AD s (Forward a))) -> f (a, a) -> g a

-- | Compute a vector of answers and directional derivatives for a function
--   given a zipped up <a>Functor</a> of the input values and their
--   derivatives.
duF' :: (Functor f, Functor g, Num a) => (forall s. f (AD s (Forward a)) -> g (AD s (Forward a))) -> f (a, a) -> g (a, a)
dus :: (Functor f, Num a) => (forall s. f (AD s (Tower a)) -> AD s (Tower a)) -> f [a] -> [a]
dus0 :: (Functor f, Num a) => (forall s. f (AD s (Tower a)) -> AD s (Tower a)) -> f [a] -> [a]
dusF :: (Functor f, Functor g, Num a) => (forall s. f (AD s (Tower a)) -> g (AD s (Tower a))) -> f [a] -> g [a]
dus0F :: (Functor f, Functor g, Num a) => (forall s. f (AD s (Tower a)) -> g (AD s (Tower a))) -> f [a] -> g [a]
taylor :: Fractional a => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> a -> [a]
taylor0 :: Fractional a => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> a -> [a]
maclaurin :: Fractional a => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> [a]
maclaurin0 :: Fractional a => (forall s. AD s (Tower a) -> AD s (Tower a)) -> a -> [a]

-- | The <a>gradientDescent</a> function performs a multivariate
--   optimization, based on the naive-gradient-descent in the file
--   <tt>stalingrad/examples/flow-tests/pre-saddle-1a.vlad</tt> from the
--   VLAD compiler Stalingrad sources. Its output is a stream of
--   increasingly accurate results. (Modulo the usual caveats.)
--   
--   It uses reverse mode automatic differentiation to compute the
--   gradient.
gradientDescent :: (Traversable f, Fractional a, Ord a) => (forall s. Reifies s Tape => f (Reverse s a) -> Reverse s a) -> f a -> [f a]

-- | Perform a gradient descent using reverse mode automatic
--   differentiation to compute the gradient.
gradientAscent :: (Traversable f, Fractional a, Ord a) => (forall s. Reifies s Tape => f (Reverse s a) -> Reverse s a) -> f a -> [f a]

-- | Perform a conjugate gradient descent using reverse mode automatic
--   differentiation to compute the gradient, and using forward-on-forward
--   mode for computing extrema.
--   
--   <pre>
--   &gt;&gt;&gt; let sq x = x * x
--   
--   &gt;&gt;&gt; let rosenbrock [x,y] = sq (1 - x) + 100 * sq (y - sq x)
--   
--   &gt;&gt;&gt; rosenbrock [0,0]
--   1
--   
--   &gt;&gt;&gt; rosenbrock (conjugateGradientDescent rosenbrock [0, 0] !! 5) &lt; 0.1
--   True
--   </pre>
conjugateGradientDescent :: (Traversable f, Ord a, Fractional a) => (forall s. Chosen s => f (Or s (On (Forward (Forward a))) (Kahn a)) -> Or s (On (Forward (Forward a))) (Kahn a)) -> f a -> [f a]

-- | Perform a conjugate gradient ascent using reverse mode automatic
--   differentiation to compute the gradient.
conjugateGradientAscent :: (Traversable f, Ord a, Fractional a) => (forall s. Chosen s => f (Or s (On (Forward (Forward a))) (Kahn a)) -> Or s (On (Forward (Forward a))) (Kahn a)) -> f a -> [f a]

-- | The <a>stochasticGradientDescent</a> function approximates the true
--   gradient of the constFunction by a gradient at a single example. As
--   the algorithm sweeps through the training set, it performs the update
--   for each training example.
--   
--   It uses reverse mode automatic differentiation to compute the gradient
--   The learning rate is constant through out, and is set to 0.001
stochasticGradientDescent :: (Traversable f, Fractional a, Ord a) => (forall s. Reifies s Tape => f (Scalar a) -> f (Reverse s a) -> Reverse s a) -> [f (Scalar a)] -> f a -> [f a]
