From be6be52822a916e03e2e8580ed80a033b9f41a50 Mon Sep 17 00:00:00 2001 From: Manuel Drehwald Date: Sat, 6 Apr 2024 20:55:48 -0400 Subject: [PATCH] add higher order, update other pieces --- src/SUMMARY.md | 3 +-- src/limitations/comptime.md | 4 ++-- src/limitations/higher.md | 9 --------- src/limitations/runtime.md | 8 -------- src/usage/higher.md | 39 +++++++++++++++++++++++++++++++++++++ 5 files changed, 42 insertions(+), 21 deletions(-) delete mode 100644 src/limitations/higher.md delete mode 100644 src/limitations/runtime.md create mode 100644 src/usage/higher.md diff --git a/src/SUMMARY.md b/src/SUMMARY.md index f11020f..a731668 100644 --- a/src/SUMMARY.md +++ b/src/SUMMARY.md @@ -7,11 +7,10 @@ - [Usage](./usage/usage.md) - [Forward Mode](./usage/fwd.md) - [Reverse Mode](./usage/rev.md) + - [Higher Order Derivatives](./usage/higher.md) - [Current Limitations](./limitations.md) - [Safety](./limitations/safety.md) - - [Runtime Performance](./limitations/runtime.md) - [Compile Times](./limitations/comptime.md) - - [Higher Order Derivatives](./limitations/higher.md) - [How to Debug](./Debugging.md) # Reference Guide - [Other Enzyme frontends](./other_Frontends.md) diff --git a/src/limitations/comptime.md b/src/limitations/comptime.md index 127cec6..1afffda 100644 --- a/src/limitations/comptime.md +++ b/src/limitations/comptime.md @@ -11,13 +11,13 @@ Enzyme's TA will create TypeTrees based on usage patterns in the code. Due to a suboptimal datastructure this process scales very poorly. Transfer the code (~1200 Lines of C++) to a better suited trie should remove most of this overhead, please reach out if you can help. For the meantime, we do initialize TypeTrees for outermost function (those to which you apply '#[autodiff(...)]` based on the Rust types. +We also annotate a very small set of recognized LLVM functions with TypeInformation, but this work should be expanded. In some real-worl applications (50k LoC), this improved the compile times by over 1000x - reducing them from hours to single minutes. ## Duplicated Optimizations The key reason for Enzyme offering often excellent performance is that Enzyme does differentiate already optimized LLVM-IR. However, we also (have to) run LLVM's optimization pipeline after differentiating, to make sure that the code which Enzyme generates is optimized properly. -This is currently done approximately, but in certain cases some code will be optimized too often, while other code is not getting optimized enough. Tuning this could allow both compile time and runtime improvements. - +As a result you should have excellent runtime performance (please fill an issue if not), but at a compile time cost. ## FAT-LTO The usage of '#[autodiff(...)]' currently requires compiling your project with fat-lto. diff --git a/src/limitations/higher.md b/src/limitations/higher.md deleted file mode 100644 index 09cf1c5..0000000 --- a/src/limitations/higher.md +++ /dev/null @@ -1,9 +0,0 @@ -# Higher Order Derivatives - -Computing higher order derivatives like hessians can be done with Enzyme by differentiating functions that compute lower order derivatives. -[This example](https://github.com/EnzymeAD/rust/blob/master/library/autodiff/examples/hessian_sin.rs) requires that rustc first uses Enzyme to fill the implementation of the `jac` function, before it uses Enzyme to fill the implementation of `hess`, by differentiatng `jac`. -This is currently not guaranteed and only works by coincidence in some cases. -This should be easy to fix, so please reach out if you would like to contribute and need some help to get started! - -Enzyme also considers adding helper function to directly compute common higher order derivatives in the future. - diff --git a/src/limitations/runtime.md b/src/limitations/runtime.md deleted file mode 100644 index 6f4e835..0000000 --- a/src/limitations/runtime.md +++ /dev/null @@ -1,8 +0,0 @@ -# Runtime Performance - -While Enzymes performance should already be good in most cases, there are some optimizations left to apply. One is mentioned in the following compile time section. -The other optimization left to apply is re-enabling Rust's ABI optimizations. -The Rust compiler might change how Rust types are represented on a lower level, to allow faster function calls. These optimizations are mainly relevant when you call a small functions many times. -We don't expect this to be the main application of autodiff, where we assume that you will often differentiate math-heavy code that for example calls faer, ndarray, or nalgebra matrix operations. -We therefore disabled this optimization for the outermost function (the one to which one applies '#[autodiff(...)]`, to enable compile time improvements. -However, it would be nice to teach Enzyme about these Rust ABI optimizations so we can have the best of both worlds. diff --git a/src/usage/higher.md b/src/usage/higher.md new file mode 100644 index 0000000..bf8cfbf --- /dev/null +++ b/src/usage/higher.md @@ -0,0 +1,39 @@ +# Higher Order Derivatives + +Computing higher order derivatives like hessians can be done with Enzyme by differentiating functions that compute lower order derivatives. +This requires that functions are differentiated in the right order, which we currently don't handle. +As a workaround, we introduce two new AD modes `ForwardFirst` and ReverseFirst` that will be differentiated (and optimized) +before we differentiate the default `Forward` and `Reverse` mode invocations. An example is given below. + + +```rust +// A direct translation of +// https://enzyme.mit.edu/index.fcgi/julia/stable/generated/autodiff/#Forward-over-reverse + +#[autodiff(ddf, Forward, Dual, Dual, Dual, Dual)] +fn df2(x: &[f32;2], dx: &mut [f32;2], out: &mut [f32;1], dout: &mut [f32;1]) { + df(x, dx, out, dout); +} + +#[autodiff(df, ReverseFirst, Duplicated, Duplicated)] +fn f(x: &[f32;2], y: &mut [f32;1]) { + y[0] = x[0] * x[0] + x[1] * x[0] +} + +#[test] +fn main() { + let mut y = [0.0]; + let x = [2.0, 2.0]; + + let mut dy = [0.0]; + let mut dx = [1.0, 0.0]; + + let mut bx = [0.0, 0.0]; + let mut by = [1.0]; + let mut dbx = [0.0, 0.0]; + let mut dby = [0.0]; + + ddf(&x, &mut bx, &mut dx, &mut dbx, + &mut y, &mut by, &mut dy, &mut dby); +} +```