From d7bc661db02b2323aeb5cdf435a2138b04484dd1 Mon Sep 17 00:00:00 2001
From: Stefan Radomski <github@mintwerk.de>
Date: Wed, 5 Jul 2017 14:14:38 +0200
Subject: Added benchmarks to README

---
 README.md                 | 33 +++++++++++++++++++++++++++++++++
 contrib/benchmarks/run.sh |  4 +---
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index f76c544..a668abc 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,7 @@
 #### Quick Links
 
 - [Building from source](http://tklab-tud.github.io/uscxml/building.html)
+- [Changes](docs/CHANGES.md)
 - [Tests passed](test/w3c/TESTS.md)
 - [Publications](docs/PUBLICATIONS.md)
 
@@ -79,4 +80,36 @@ For more detailled information, refer to the [documentation](http://tklab-tud.gi
 * [test-gen-c.cpp](https://github.com/tklab-tud/uscxml/blob/master/test/src/test-gen-c.cpp) (**C++**)
 * [WaterPump.cxx](https://github.com/tklab-tud/uscxml/blob/master/apps/arduino/WaterPump.cxx) (**C++ on Arduino**)
 
+## Benchmarks
+
+We did conceive a [series of benchmarks](https://github.com/tklab-tud/uscxml/tree/master/test/benchmarks) SCXML documents to evaluate the performance of the various SCXML implementations. The state-charts in the benchmarks are completely artificial and bear no resemblance to real-world state-charts. However, they may provide a general guidance to get an impression about the performance of the different implementations.
+
+The implementations each stress a specific feature of any SCXML `microstep(T)` implementation. Each contains a state `mark` that is continuously entered and exited as part of a sequence of spontaneous microsteps and measures the entries per second. For every implementation, the [benchmark is run](https://github.com/tklab-tud/uscxml/blob/master/contrib/benchmarks/run.sh) for a number of seconds and the iterations per seconds are averaged. The benchmarks exist in increasing complexity from very simple with, e.g., 4 states nested in a depth of 4 compounds up until 512 for state-charts with > 250.000 states.
+
+**Note:** If you are the author / maintainer of one of the SCXML implementations being benchmarked below and feel that I misrepresent your implementation's performance, post an issue and I will set things straight.
+
+**Note:** There are two `microstep(T)` implementations in uSCXML, namely `fast` and `large` with the former being the default for transpilation and the latter for interpretation. Both are being employed on an interpreted state-chart here. For the `fast` microstep implementation we measured the case with pre-calculated predicates.
+
+**Note:** The numbers for scxmlcc are necessarily for the compiled case and N/A if we could not compile the state-chart within the time limit.
+
+### Transitions
+
+The Transitions benchmark measures transition selection with many conflicting transitions enabled as part of a microstep.
+
+![Transition Benchmark](https://user-images.githubusercontent.com/980655/27858834-004c9c78-6177-11e7-8519-2f73f0ff9fb4.png)
+
+### LCCA
+When exiting a state via a transition, the least-common compound ancestor (LCCA) of the transition's targets and source state has to be identified. This is a common operation and its runtime is proportional to the nesting depth if implemented respectively.
+
+![LCCA Benchmark](https://user-images.githubusercontent.com/980655/27858835-00527ecc-6177-11e7-85d2-46c83ad5ed71.png)
+
+### Conclusion
+
+uSCXML with either microstep implementation is consistently the fastest with the exception of the Transitions benchmark, where the compiled `scxmlcc` is degenerating slower for more complex state-charts. This may be due to compiler optimizations (or an incomplete implementation) and it would be interesting to compare `scxmlcc` against the transpiled ANSI-C code from `uscxml-transform`. However, the limiting factor here becomes the time required to transpile the state-chart or to compile the generated source file into an executable binary respectively. With regard to huge state-charts, the large microstep implementation of `uSCXML` performs best and retains acceptable performance throughout the range of benchmarks, only surpassed by the fast implementation for smaller complexities.
+
+# Changes {#changes}
+
+ * **[bfefa5fd44b9ed1491612f26b099db8ad624247b](https://github.com/tklab-tud/uscxml/pull/155/commits/bfefa5fd44b9ed1491612f26b099db8ad624247b):**
+
+    We **broke the InterpreterMonitor** API by substituting the Interpreter instance in the first formal parameter by its sessionId throughout all callbacks. Retrieving the actual Interpreter involved locking a weak_ptr into a shared_ptr which proved to be a performance bottleneck. You can retrieve the Interpreter from its sessionId via the new static method `Interpreter::fromSessionId` if you actually need.
 
diff --git a/contrib/benchmarks/run.sh b/contrib/benchmarks/run.sh
index ac13779..388c601 100755
--- a/contrib/benchmarks/run.sh
+++ b/contrib/benchmarks/run.sh
@@ -111,9 +111,7 @@ function run-uscxml {
 	SC_NAME=$2
 
 	cd uscxml
-	timeout 600s ./statesPerSecond ${BENCHMARK} fast
 	timeout ${TIMEOUT} ./statesPerSecond ${BENCHMARK} fast |tee ../logs/${SC_NAME}-uscxml-fast.log
-	USCXML_NOCACHE_FILES=YES \
 	timeout ${TIMEOUT} ./statesPerSecond ${BENCHMARK} large |tee ../logs/${SC_NAME}-uscxml-large.log
 	cd ..
 }
@@ -207,7 +205,7 @@ function run-scxmlcc {
 
 	cd scxmlcc
 	rm test
-	timeout 600s ./scxmlcc/src/scxmlcc -i ${BENCHMARK} -o ./test.h
+	timeout ${TIMEOUT} ./scxmlcc/src/scxmlcc -i ${BENCHMARK} -o ./test.h
 	timeout ${TIMEOUT} g++ -DMACHINE_NAME=sc_benchmark ./statesPerSecond.cpp -o test
 	timeout ${TIMEOUT} ./test |tee ../logs/${SC_NAME}-scxmlcc.log
 
-- 
cgit v0.12