diff options
author | Stefan Radomski <github@mintwerk.de> | 2016-05-12 13:12:33 (GMT) |
---|---|---|
committer | Stefan Radomski <github@mintwerk.de> | 2016-05-12 13:12:33 (GMT) |
commit | b62e7979600feee23dc7cdb61042a8fc7673122b (patch) | |
tree | f7351372f37979dd2d048e0b68a16a4cd3b2aadb /src/uscxml/util | |
parent | 1b11b310be61e51b3ac5ebb83f7c8a33aef3d6e8 (diff) | |
download | uscxml-b62e7979600feee23dc7cdb61042a8fc7673122b.zip uscxml-b62e7979600feee23dc7cdb61042a8fc7673122b.tar.gz uscxml-b62e7979600feee23dc7cdb61042a8fc7673122b.tar.bz2 |
Major Refactoring v2.0
Diffstat (limited to 'src/uscxml/util')
-rw-r--r-- | src/uscxml/util/Convenience.cpp | 177 | ||||
-rw-r--r-- | src/uscxml/util/Convenience.h | 83 | ||||
-rw-r--r-- | src/uscxml/util/DOM.cpp | 402 | ||||
-rw-r--r-- | src/uscxml/util/DOM.h | 224 | ||||
-rw-r--r-- | src/uscxml/util/Predicates.cpp | 468 | ||||
-rw-r--r-- | src/uscxml/util/Predicates.h | 61 | ||||
-rw-r--r-- | src/uscxml/util/Trie.cpp | 173 | ||||
-rw-r--r-- | src/uscxml/util/Trie.h | 64 | ||||
-rw-r--r-- | src/uscxml/util/URL.cpp | 773 | ||||
-rw-r--r-- | src/uscxml/util/URL.h | 333 | ||||
-rw-r--r-- | src/uscxml/util/UUID.cpp | 70 | ||||
-rw-r--r-- | src/uscxml/util/UUID.h | 39 |
12 files changed, 2630 insertions, 237 deletions
diff --git a/src/uscxml/util/Convenience.cpp b/src/uscxml/util/Convenience.cpp new file mode 100644 index 0000000..7ceb875 --- /dev/null +++ b/src/uscxml/util/Convenience.cpp @@ -0,0 +1,177 @@ +/** + * @file + * @author 2012-2013 Stefan Radomski (stefan.radomski@cs.tu-darmstadt.de) + * @copyright Simplified BSD + * + * @cond + * This program is free software: you can redistribute it and/or modify + * it under the terms of the FreeBSD license as published by the FreeBSD + * project. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the FreeBSD license along with this + * program. If not, see <http://www.opensource.org/licenses/bsd-license>. + * @endcond + */ + +#include <inttypes.h> +#include <stdlib.h> +#include "Convenience.h" + +namespace uscxml { + +bool isnan(double x) { + return x != x; +} + +bool isNumeric(const char* pszInput, int nNumberBase) { + std::string base = ".-0123456789ABCDEF"; + std::string input = pszInput; + return (input.find_first_not_of(base.substr(0, nNumberBase + 2)) == std::string::npos); +} + +bool isInteger(const char* pszInput, int nNumberBase) { + std::string base = "-0123456789ABCDEF"; + std::string input = pszInput; + return (input.find_first_not_of(base.substr(0, nNumberBase + 1)) == std::string::npos); +} + +bool iequals(const std::string& a, const std::string& b) { + // this impementation beats boost::iequals 2700ms vs 2100ms for test-performance.scxml - we don't care for non-ascii yet + unsigned int size = a.size(); + if (b.size() != size) + return false; + for (unsigned int i = 0; i < size; ++i) + if (tolower(a[i]) != tolower(b[i])) + return false; + return true; +} + +bool equals(const std::string& a, const std::string& b) { + unsigned int size = a.size(); + if (b.size() != size) + return false; + for (unsigned int i = 0; i < size; ++i) + if (a[i] != b[i]) + return false; + return true; +} + +bool stringIsTrue(const std::string& value) { + return (iequals(value, "on") || + iequals(value, "true") || + iequals(value, "1") || + iequals(value, "yes")); +} + +bool envVarIsTrue(const char* name) { + const char* value = getenv(name); + if (value == NULL) + return false; + return stringIsTrue(value); +} + +bool envVarIEquals(const char* name, const char* value) { + const char* envVarValue = getenv(name); + if (envVarValue == NULL) + return false; + return iequals(envVarValue, value); +} + +std::string escape(const std::string& a) { + std::stringstream b; + // see http://en.cppreference.com/w/cpp/language/escape + + std::string::const_iterator it = a.begin(); + while (it != a.end()) { + char c = *it++; + switch (c) { + case '\\': + b << '\\' << '\\'; + break; + case '\0': + b << '\\' << '0'; + break; + case '"': + b << '\\' << '"'; + break; + case '\a': + b << '\\' << 'a'; + break; + case '\b': + b << '\\' << 'b'; + break; + case '\f': + b << '\\' << 'f'; + break; + case '\n': + b << '\\' << 'n'; + break; + case '\r': + b << '\\' << 'r'; + break; + case '\t': + b << '\\' << 't'; + break; + case '\v': + b << '\\' << 'v'; + break; + default: + b << c; + } + } + + return b.str(); +} + +std::string unescape(const std::string& a) { + std::stringstream b; + // see http://en.cppreference.com/w/cpp/language/escape + + std::string::const_iterator it = a.begin(); + while (it != a.end()) { + char c = *it++; + if (c == '\\' && it != a.end()) { + switch (*it++) { + case '\\': + c = '\\'; + break; + case '0': + c = '\0'; + break; + case '"': + c = '"'; + break; + case 'a': + c = '\a'; + break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + } + } + b << c; + } + + return b.str(); +} + +} diff --git a/src/uscxml/util/Convenience.h b/src/uscxml/util/Convenience.h new file mode 100644 index 0000000..532bcc0 --- /dev/null +++ b/src/uscxml/util/Convenience.h @@ -0,0 +1,83 @@ +/** + * @file + * @author 2012-2013 Stefan Radomski (stefan.radomski@cs.tu-darmstadt.de) + * @copyright Simplified BSD + * + * @cond + * This program is free software: you can redistribute it and/or modify + * it under the terms of the FreeBSD license as published by the FreeBSD + * project. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the FreeBSD license along with this + * program. If not, see <http://www.opensource.org/licenses/bsd-license>. + * @endcond + */ + +#ifndef CONVENIENCE_H_LU7GZ6CB +#define CONVENIENCE_H_LU7GZ6CB + +#include "uscxml/Common.h" +#include <string> +#include <limits> +#include <sstream> + +namespace uscxml { +inline bool isnan(double x); + +// see http://stackoverflow.com/questions/228005/alternative-to-itoa-for-converting-integer-to-string-c +template <typename T> std::string toStr(T tmp) { + std::ostringstream outSS; + outSS.precision(std::numeric_limits<double>::digits10 + 1); + outSS << tmp; + return outSS.str(); +} + +template <typename T> T strTo(std::string tmp) { + T output; + std::istringstream in(tmp); + in >> output; + return output; +} + +class USCXML_API NumAttr { +public: + NumAttr(const std::string& str) { + size_t valueStart = str.find_first_of("0123456789."); + if (valueStart != std::string::npos) { + size_t valueEnd = str.find_last_of("0123456789."); + if (valueEnd != std::string::npos) { + value = str.substr(valueStart, (valueEnd - valueStart) + 1); + size_t unitStart = str.find_first_not_of(" \t", valueEnd + 1); + if (unitStart != std::string::npos) { + size_t unitEnd = str.find_last_of(" \t"); + if (unitEnd != std::string::npos && unitEnd > unitStart) { + unit = str.substr(unitStart, unitEnd - unitStart); + } else { + unit = str.substr(unitStart, str.length() - unitStart); + } + } + } + } + } + + std::string value; + std::string unit; +}; + +bool isNumeric(const char* pszInput, int nNumberBase); +bool isInteger( const char* pszInput, int nNumberBase); +bool iequals(const std::string& a, const std::string& b); +bool equals(const std::string& a, const std::string& b); +bool stringIsTrue(const std::string& value); +bool envVarIsTrue(const char* name); +bool envVarIEquals(const char* name, const char* value); + +std::string escape(const std::string& a); +std::string unescape(const std::string& a); + +} +#endif /* end of include guard: CONVENIENCE_H_LU7GZ6CB */ diff --git a/src/uscxml/util/DOM.cpp b/src/uscxml/util/DOM.cpp new file mode 100644 index 0000000..c7ed1e9 --- /dev/null +++ b/src/uscxml/util/DOM.cpp @@ -0,0 +1,402 @@ +/** + * @file + * @author 2012-2013 Stefan Radomski (stefan.radomski@cs.tu-darmstadt.de) + * @copyright Simplified BSD + * + * @cond + * This program is free software: you can redistribute it and/or modify + * it under the terms of the FreeBSD license as published by the FreeBSD + * project. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the FreeBSD license along with this + * program. If not, see <http://www.opensource.org/licenses/bsd-license>. + * @endcond + */ + +#include <algorithm> + +#include "uscxml/Common.h" +#include "uscxml/util/Convenience.h" +//#include "uscxml/util/UUID.h" +#include "uscxml/util/DOM.h" +//#include "uscxml/util/Convenience.h" + +#include <xercesc/util/PlatformUtils.hpp> +#include <xercesc/dom/DOM.hpp> +#include <xercesc/framework/StdOutFormatTarget.hpp> + +#include "easylogging++.h" + +//#include <glog/logging.h> +//#include <boost/algorithm/string.hpp> + +namespace uscxml { + +using namespace xercesc; + +std::ostream& operator<< (std::ostream& os, const DOMNode& node) { + + DOMImplementation *implementation = DOMImplementationRegistry::getDOMImplementation(X("LS")); + DOMLSSerializer *serializer = ((DOMImplementationLS*)implementation)->createLSSerializer(); + if (serializer->getDomConfig()->canSetParameter(XMLUni::fgDOMWRTFormatPrettyPrint, true)) + serializer->getDomConfig()->setParameter(XMLUni::fgDOMWRTFormatPrettyPrint, true); + serializer->setNewLine(XMLString::transcode("\r\n")); + + X output = serializer->writeToString(&node); + os << output; + return os; +} + +std::ostream& operator<< (std::ostream& os, const X& xmlString) { + os << xmlString._localForm; + return os; +} + +std::string DOMUtils::idForNode(const DOMNode* node) { + std::string nodeId; + std::string seperator; + const DOMNode* curr = node; + while(curr) { + switch (curr->getNodeType()) { + case DOMNode::ELEMENT_NODE: { + const DOMElement* elem = dynamic_cast<const DOMElement*>(curr); + if (HAS_ATTR(elem, "id")) { + std::string elementId = ATTR(elem, "id"); + std::replace( elementId.begin(), elementId.end(), '.', '_'); + std::replace( elementId.begin(), elementId.end(), ',', '_'); + + nodeId.insert(0, elementId + seperator); + seperator = "_"; + return nodeId; + } else { + DOMNode* sibling = curr->getPreviousSibling(); + int index = 0; + while(sibling) { + if (sibling->getNodeType() == DOMNode::ELEMENT_NODE) { + if (iequals(TAGNAME_CAST(sibling), TAGNAME(elem))) { + index++; + } + } + sibling = sibling->getPreviousSibling(); + } + nodeId.insert(0, TAGNAME(elem) + toStr(index) + seperator); + seperator = "_"; + } + break; + } + case DOMNode::DOCUMENT_NODE: + return nodeId; + default: + break; + } + + curr = curr->getParentNode(); + } + return nodeId; +} + +std::string DOMUtils::xPathForNode(const DOMNode* node, const std::string& ns) { + std::string xPath; + std::string nsPrefix; + + if (ns.size() > 0) { + nsPrefix = ns + ":"; + } + + if (!node || node->getNodeType() != DOMNode::ELEMENT_NODE) + return xPath; + + const DOMNode* curr = node; + while(curr) { + switch (curr->getNodeType()) { + case DOMNode::ELEMENT_NODE: { + const DOMElement* elem = dynamic_cast<const DOMElement*>(curr); + if (HAS_ATTR(elem, "id")) { + // we assume ids to be unique and return immediately + if (ns == "*") { + xPath.insert(0, "//*[local-name() = \"" + TAGNAME(elem) + "\"][@id=\"" + ATTR(elem, "id") + "\"]"); + } else { + xPath.insert(0, "//" + nsPrefix + TAGNAME(elem) + "[@id=\"" + ATTR(elem, "id") + "\"]"); + } + return xPath; + } else { + // check previous siblings to count our index + DOMNode* sibling = curr->getPreviousSibling(); + int index = 1; // xpath indices start at 1 + while(sibling) { + if (sibling->getNodeType() == DOMNode::ELEMENT_NODE) { + if (iequals(TAGNAME_CAST(sibling), TAGNAME(elem))) { + index++; + } + } + sibling = sibling->getPreviousSibling(); + } + if (ns == "*") { + xPath.insert(0, "/*[local-name() = \"" + TAGNAME(elem) + "\"][" + toStr(index) + "]"); + } else { + xPath.insert(0, "/" + nsPrefix + TAGNAME(elem) + "[" + toStr(index) + "]"); + } + } + break; + } + case DOMNode::DOCUMENT_NODE: + return xPath; + default: + LOG(ERROR) << "Only nodes of type element supported for now"; + return ""; + break; + } + curr = curr->getParentNode(); + } + return xPath; +} + +bool DOMUtils::hasIntersection(const std::list<DOMElement*>& l1, const std::list<DOMElement*>& l2) { + for (auto i = l1.begin(); i != l1.end(); i++) { + for (auto j = l2.begin(); j != l2.end(); j++) { + if (*i == *j) + return true; + } + } + return false; +} + +bool DOMUtils::isMember(const DOMNode* node, + const DOMNodeList* list) { + for (size_t i = 0; i < list->getLength(); i++) { + if (list->item(i) == node) + return true; + } + return false; +} + +bool DOMUtils::isMember(const DOMNode* node, + const std::list<DOMNode*>& list) { + + for (auto listIter = list.begin(); listIter != list.end(); listIter++) { + if ((*listIter) == node) + return true; + } + return false; +} + +bool DOMUtils::isMember(const DOMElement* node, + const std::list<DOMElement*>& list) { + + for (auto listIter = list.begin(); listIter != list.end(); listIter++) { + if ((*listIter) == node) + return true; + } + return false; +} + +const DOMNode* DOMUtils::getNearestAncestor(const DOMNode* node, const std::string tagName) { + const DOMNode* parent = node->getParentNode(); + while(parent) { + if (parent->getNodeType() == DOMNode::ELEMENT_NODE && + iequals(TAGNAME_CAST(parent), tagName)) { + return parent; + } + parent = parent->getParentNode(); + } + return NULL; +} + +bool DOMUtils::isDescendant(const DOMNode* s1, + const DOMNode* s2) { + if (!s1 || !s2) + return false; + + const DOMNode* parent = s1->getParentNode(); + while(parent) { + if (s2 == parent) + return true; + parent = parent->getParentNode(); + } + return false; +} + +std::list<DOMElement*> DOMUtils::inPostFixOrder(const std::set<std::string>& elements, + const DOMElement* root, + const bool includeEmbeddedDoc) { + std::list<DOMElement*> nodes; + inPostFixOrder(elements, root, includeEmbeddedDoc, nodes); + return nodes; +} + +void DOMUtils::inPostFixOrder(const std::set<std::string>& elements, + const DOMElement* root, + const bool includeEmbeddedDoc, + std::list<DOMElement*>& nodes) { + + DOMNodeList* children = root->getChildNodes(); + for (size_t i = 0; i < children->getLength(); i++) { + if (children->item(i)->getNodeType() != DOMNode::ELEMENT_NODE) + continue; + const DOMElement* childElem = dynamic_cast<const DOMElement*>(children->item(i)); + if (!includeEmbeddedDoc && LOCALNAME(childElem) == "scxml") + continue; + inPostFixOrder(elements, childElem, includeEmbeddedDoc, nodes); + + } + for (size_t i = 0; i < children->getLength(); i++) { + if (children->item(i)->getNodeType() != DOMNode::ELEMENT_NODE) + continue; + const DOMElement* childElem = dynamic_cast<const DOMElement*>(children->item(i)); + if (!includeEmbeddedDoc && TAGNAME(childElem) == XML_PREFIX(root).str() + "scxml") + continue; + + if (elements.find(TAGNAME(childElem)) != elements.end()) { + nodes.push_back((DOMElement*)childElem); + } + } +} + +std::list<DOMElement*> DOMUtils::inDocumentOrder(const std::set<std::string>& elements, + const DOMElement* root, + const bool includeEmbeddedDoc) { + std::list<DOMElement*> nodes; + inDocumentOrder(elements, root, includeEmbeddedDoc, nodes); + return nodes; +} + +void DOMUtils::inDocumentOrder(const std::set<std::string>& elements, + const DOMElement* root, + const bool includeEmbeddedDoc, + std::list<DOMElement*>& nodes) { + if (elements.find(TAGNAME(root)) != elements.end()) { + nodes.push_back((DOMElement*)root); + } + + DOMNodeList* children = root->getChildNodes(); + for (size_t i = 0; i < children->getLength(); i++) { + if (children->item(i)->getNodeType() != DOMNode::ELEMENT_NODE) + continue; + const DOMElement* childElem = dynamic_cast<const DOMElement*>(children->item(i)); + if (!includeEmbeddedDoc && TAGNAME(childElem) == XML_PREFIX(root).str() + "scxml") + continue; + inDocumentOrder(elements, childElem, includeEmbeddedDoc, nodes); + } +} + +std::list<DOMNode*> DOMUtils::getElementsByType(const DOMNode* root, + DOMNode::NodeType type) { + std::list<DOMNode*> result; + std::list<DOMNode*> stack; + std::list<DOMNode*>::iterator stackIter; + + if (!root) + return result; + + stack.push_back((DOMNode*)root); + while(stack.size() > 0) { +// for(stackIter = stack.begin(); stackIter != stack.end(); stackIter++) { +// std::cout << stackIter->getNodeType() << " " << stackIter->getLocalName() << " " << stackIter->getNodeValue() << std::endl; +// } +// std::cout << std::endl; + + DOMNode* currNode = stack.back(); + if (currNode->hasChildNodes()) { + stack.push_back(currNode->getFirstChild()); + continue; + } + + // roll back stack and pop everyone without next sibling + do { + currNode = stack.back(); + if (currNode->getNodeType() == type) + result.push_back(currNode); + stack.pop_back(); + if (currNode->getNextSibling()) { + stack.push_back(currNode->getNextSibling()); + break; + } + } while(stack.size() > 0); + } + return result; +} + + +std::list<DOMElement*> DOMUtils::filterChildElements(const std::string& tagName, + const std::list<DOMElement*>& nodeSet, + bool recurse) { + + std::list<DOMElement*> filteredChildElems; + std::list<DOMElement*>::const_iterator nodeIter = nodeSet.begin(); + while(nodeIter != nodeSet.end()) { + std::list<DOMElement*> filtered = filterChildElements(tagName, *nodeIter, recurse); + filteredChildElems.merge(filtered); // TODO: guess we want insert? + nodeIter++; + } + return filteredChildElems; +} + +std::list<DOMElement*> DOMUtils::filterChildElements(const std::string& tagName, + const DOMElement* node, + bool recurse) { + + std::list<DOMElement*> filteredChildElems; + + if (!node) + return filteredChildElems; + + DOMNodeList* children = node->getChildNodes(); + for (unsigned int i = 0; i < children->getLength(); i++) { + if (children->item(i)->getNodeType() != DOMNode::ELEMENT_NODE) + continue; + const DOMElement* childElem = dynamic_cast<const DOMElement*>(children->item(i)); + + // std::cerr << TAGNAME(childs.item(i)) << std::endl; + if(iequals(TAGNAME(childElem), tagName)) { + filteredChildElems.push_back((DOMElement*)childElem); + } + if (recurse) { + std::list<DOMElement*> nested = filterChildElements(tagName, childElem, recurse); + filteredChildElems.merge(nested); + } + } + return filteredChildElems; +} + + +std::list<DOMNode*> DOMUtils::filterChildType(const DOMNode::NodeType type, + const std::list<DOMNode*>& nodeSet, + bool recurse) { + std::list<DOMNode*> filteredChildType; + std::list<DOMNode*>::const_iterator nodeIter = nodeSet.begin(); + while(nodeIter != nodeSet.end()) { + std::list<DOMNode*> filtered = filterChildType(type, *nodeIter, recurse); + filteredChildType.merge(filtered); + nodeIter++; + } + return filteredChildType; +} + +std::list<DOMNode*> DOMUtils::filterChildType(const DOMNode::NodeType type, + const DOMNode* node, + bool recurse) { + + std::list<DOMNode*> filteredChildTypes; + + if (!node) + return filteredChildTypes; + + DOMNodeList* children = node->getChildNodes(); + for (unsigned int i = 0; i < children->getLength(); i++) { + if (children->item(i)->getNodeType() == type) + filteredChildTypes.push_back(children->item(i)); + if (recurse) { + std::list<DOMNode*> nested = filterChildType(type, children->item(i), recurse); + filteredChildTypes.merge(nested); + + } + } + return filteredChildTypes; +} + + +}
\ No newline at end of file diff --git a/src/uscxml/util/DOM.h b/src/uscxml/util/DOM.h new file mode 100644 index 0000000..4eb0e36 --- /dev/null +++ b/src/uscxml/util/DOM.h @@ -0,0 +1,224 @@ +/** + * @file + * @author 2012-2013 Stefan Radomski (stefan.radomski@cs.tu-darmstadt.de) + * @copyright Simplified BSD + * + * @cond + * This program is free software: you can redistribute it and/or modify + * it under the terms of the FreeBSD license as published by the FreeBSD + * project. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the FreeBSD license along with this + * program. If not, see <http://www.opensource.org/licenses/bsd-license>. + * @endcond + */ + +#ifndef DOMUTILS_H_WK0WAEA7 +#define DOMUTILS_H_WK0WAEA7 + +#include <set> +#include <list> +#include <iostream> + +#include "uscxml/Common.h" +#include <xercesc/util/XMLString.hpp> +#include <xercesc/dom/DOM.hpp> + + +/* +#define TAGNAME_CAST(elem) ((Arabica::DOM::Element<std::string>)elem).getTagName() +#define LOCALNAME_CAST(elem) ((Arabica::DOM::Element<std::string>)elem).getLocalName() +#define ATTR_CAST(elem, attr) ((Arabica::DOM::Element<std::string>)elem).getAttribute(attr) +#define ATTR_NODE_CAST(elem, attr) ((Arabica::DOM::Element<std::string>)elem).getAttributeNode(attr) +#define HAS_ATTR_CAST(elem, attr) ((Arabica::DOM::Element<std::string>)elem).hasAttribute(attr) + +#define TAGNAME(elem) elem.getTagName() +#define LOCALNAME(elem) elem.getLocalName() +#define ATTR(elem, attr) elem.getAttribute(attr) +#define ATTR_NODE(elem, attr) elem.getAttributeNode(attr) +*/ + +#define HAS_ATTR(elem, attr) (elem)->hasAttribute(X(attr)) +#define HAS_ATTR_CAST(elem, attr) HAS_ATTR(static_cast<const DOMElement*>(elem), attr) +#define ATTR(elem, attr) std::string(X((elem)->getAttribute(X(attr)))) +#define ATTR_CAST(elem, attr) ATTR(static_cast<const DOMElement*>(elem), attr) +#define TAGNAME(elem) std::string(X((elem)->getTagName())) +#define TAGNAME_CAST(elem) TAGNAME(static_cast<const DOMElement*>(elem)) +#define LOCALNAME(elem) std::string(X((elem)->getLocalName())) +#define LOCALNAME_CAST(elem) LOCALNAME(static_cast<const DOMElement*>(elem)) + + + +namespace uscxml { + +class USCXML_API DOMUtils { +public: + + static const xercesc::DOMNode* getNearestAncestor(const xercesc::DOMNode* node, const std::string tagName); + static bool isDescendant(const xercesc::DOMNode* s1, const xercesc::DOMNode* s2); + + + static bool hasIntersection(const std::list<xercesc::DOMElement*>& l1, + const std::list<xercesc::DOMElement*>& l2); + static bool isMember(const xercesc::DOMElement* node, const std::list<xercesc::DOMElement*>& list); + static bool isMember(const xercesc::DOMNode* node, const std::list<xercesc::DOMNode*>& list); + static bool isMember(const xercesc::DOMNode* node, const xercesc::DOMNodeList* list); + + static std::string xPathForNode(const xercesc::DOMNode* node, + const std::string& ns = ""); + static std::string idForNode(const xercesc::DOMNode* node); + + static std::list<xercesc::DOMNode*> getElementsByType(const xercesc::DOMNode* root, + xercesc::DOMNode::NodeType type); + + static std::list<xercesc::DOMElement*> inPostFixOrder(const std::string& element, + const xercesc::DOMElement* root, + const bool includeEmbeddedDoc = false) { + std::set<std::string> elements; + elements.insert(element); + return inPostFixOrder(elements, root, includeEmbeddedDoc); + } + + static std::list<xercesc::DOMElement*> inPostFixOrder(const std::set<std::string>& elements, + const xercesc::DOMElement* root, + const bool includeEmbeddedDoc = false); + + + static std::list<xercesc::DOMElement*> inDocumentOrder(const std::string& element, + const xercesc::DOMElement* root, + const bool includeEmbeddedDoc = false) { + std::set<std::string> elements; + elements.insert(element); + return inDocumentOrder(elements, root, includeEmbeddedDoc); + } + + static std::list<xercesc::DOMElement*> inDocumentOrder(const std::set<std::string>& elements, + const xercesc::DOMElement* root, + const bool includeEmbeddedDoc = false); + + static std::list<xercesc::DOMElement*> filterChildElements(const std::string& tagName, + const xercesc::DOMElement* node, + bool recurse = false); + + static std::list<xercesc::DOMElement*> filterChildElements(const std::string& tagName, + const std::list<xercesc::DOMElement*>& nodeSet, + bool recurse = false); + + static std::list<xercesc::DOMNode*> filterChildType(const xercesc::DOMNode::NodeType type, + const xercesc::DOMNode* node, + bool recurse = false); + + static std::list<xercesc::DOMNode*> filterChildType(const xercesc::DOMNode::NodeType type, + const std::list<xercesc::DOMNode*>& nodeSet, + bool recurse = false); + +protected: + static void inPostFixOrder(const std::set<std::string>& elements, + const xercesc::DOMElement* root, + const bool includeEmbeddedDoc, + std::list<xercesc::DOMElement*>& nodes); + + static void inDocumentOrder(const std::set<std::string>& elements, + const xercesc::DOMElement* root, + const bool includeEmbeddedDoc, + std::list<xercesc::DOMElement*>& nodes); + + +}; + +// create a prefix from a given element - useful for copying namespace information +#define XML_PREFIX(element) X(element->getPrefix() ? X(element->getPrefix()).str() + ":" : "") + +class USCXML_API X { +public : + + X(X const &other) { + _localForm = other._localForm; + _otherForm = xercesc::XMLString::replicate(other._otherForm); + _deallocOther = true; + } + void operator=(X const &other) { // did we maybe leak before? + _localForm = other._localForm; + _otherForm = xercesc::XMLString::replicate(other._otherForm); + _deallocOther = true; + } + + X(const XMLCh* const toTranscode) { + if (toTranscode != NULL) { + // Call the private transcoding method + char* tmp = xercesc::XMLString::transcode(toTranscode); + _localForm = std::string(tmp); + xercesc::XMLString::release(&tmp); + } + _otherForm = NULL; + _deallocOther = false; + } + + X(const std::string& fromTranscode) { + // Call the private transcoding method + _localForm = fromTranscode; + _otherForm = xercesc::XMLString::transcode(fromTranscode.c_str()); + _deallocOther = true; + } + + X(const char* const fromTranscode) { + // Call the private transcoding method + _localForm = fromTranscode; + _otherForm = xercesc::XMLString::transcode(fromTranscode); + _deallocOther = true; + } + + X(char* fromTranscode) { + // Call the private transcoding method + _localForm = fromTranscode; + _otherForm = xercesc::XMLString::transcode(fromTranscode); + _deallocOther = true; + } + + X() { + _otherForm = NULL; + _deallocOther = false; + } + + ~X() { + if (_deallocOther) + xercesc::XMLString::release(&_otherForm); + } + + const std::string& str() const { + return _localForm; + } + + operator const XMLCh* () { + assert(_otherForm != NULL); // constructor with XMLCh + return _otherForm; + } + + operator bool () { + return _localForm.size() > 0; + } + + operator std::string () { + return _localForm; + } + +protected: + friend USCXML_API std::ostream& operator<< (std::ostream& os, const X& data); + +private: + bool _deallocOther; + std::string _localForm; + XMLCh* _otherForm; +}; + +USCXML_API std::ostream& operator<< (std::ostream& os, const X& xmlString); +USCXML_API std::ostream& operator<< (std::ostream& os, const xercesc::DOMNode& node); + +} + + +#endif /* end of include guard: DOMUTILS_H_WK0WAEA7 */ diff --git a/src/uscxml/util/Predicates.cpp b/src/uscxml/util/Predicates.cpp new file mode 100644 index 0000000..6ac092f --- /dev/null +++ b/src/uscxml/util/Predicates.cpp @@ -0,0 +1,468 @@ +/** + * @file + * @author 2016 Stefan Radomski (stefan.radomski@cs.tu-darmstadt.de) + * @copyright Simplified BSD + * + * @cond + * This program is free software: you can redistribute it and/or modify + * it under the terms of the FreeBSD license as published by the FreeBSD + * project. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the FreeBSD license along with this + * program. If not, see <http://www.opensource.org/licenses/bsd-license>. + * @endcond + */ + +#include "Predicates.h" +#include "uscxml/util/String.h" + +namespace uscxml { + +using namespace xercesc; + +std::list<DOMElement*> getChildStates(const DOMElement* state, bool properOnly) { + std::list<DOMElement*> children; + + DOMNodeList* childElems = state->getChildNodes(); + for (size_t i = 0; i < childElems->getLength(); i++) { + if (childElems->item(i)->getNodeType() != DOMNode::ELEMENT_NODE) + continue; + DOMElement* childElem = dynamic_cast<DOMElement*>(childElems->item(i)); + if (isState(childElem, properOnly)) { + children.push_back(childElem); + } + } + return children; +} + +std::list<xercesc::DOMElement*> getChildStates(const std::list<xercesc::DOMElement*>& states, bool properOnly) { + std::list<xercesc::DOMElement*> children; + for (auto stateIter = states.begin(); stateIter != states.end(); stateIter++) { + std::list<DOMElement*> tmp = getChildStates(*stateIter, properOnly); + children.merge(tmp); + } + return children; +} + + +DOMElement* getParentState(const DOMElement* element) { + DOMNode* parent = element->getParentNode(); + while(parent && !isState(dynamic_cast<DOMElement*>(parent))) { + parent = parent->getParentNode(); + } + return dynamic_cast<DOMElement*>(parent); +} + +DOMElement* getSourceState(const DOMElement* transition) { + if (iequals(TAGNAME_CAST(transition->getParentNode()), XML_PREFIX(transition).str() + "initial")) + return dynamic_cast<DOMElement*>(transition->getParentNode()->getParentNode()); + return dynamic_cast<DOMElement*>(transition->getParentNode()); +} + + +/** + See: http://www.w3.org/TR/scxml/#LCCA + The Least Common Compound Ancestor is the <state> or <scxml> element s such that s is a proper ancestor + of all states on stateList and no descendant of s has this property. Note that there is guaranteed to be + such an element since the <scxml> wrapper element is a common ancestor of all states. Note also that since + we are speaking of proper ancestor (parent or parent of a parent, etc.) the LCCA is never a member of stateList. + */ + +#define VERBOSE_FIND_LCCA 0 +DOMElement* findLCCA(const std::list<DOMElement*>& states) { + + std::list<DOMElement*> ancestors = getProperAncestors(states.front(), NULL); + DOMElement* ancestor = NULL; + + for (auto ancIter = ancestors.begin(); ancIter != ancestors.end(); ancIter++) { + if (!isCompound(dynamic_cast<DOMElement*>(*ancIter))) + continue; + for (auto stateIter = states.begin(); stateIter != states.end(); stateIter++) { + +#if VERBOSE_FIND_LCCA + std::cerr << "Checking " << ATTR_CAST(states[j], "id") << " and " << ATTR_CAST(ancestors[i], "id") << std::endl; +#endif + + if (!DOMUtils::isDescendant(*stateIter, *ancIter)) + goto NEXT_ANCESTOR; + } + ancestor = *ancIter; + break; +NEXT_ANCESTOR: + ; + } + + // take uppermost root as ancestor + if (!ancestor) + ancestor = ancestors.back(); + +#if VERBOSE_FIND_LCCA + std::cerr << " -> " << ATTR_CAST(ancestor, "id") << " " << ancestor.getLocalName() << std::endl; +#endif + return ancestor; +} + +/* + * If state2 is null, returns the set of all ancestors of state1 in ancestry order + * (state1's parent followed by the parent's parent, etc. up to an including the <scxml> + * element). If state2 is non-null, returns in ancestry order the set of all ancestors + * of state1, up to but not including state2. (A "proper ancestor" of a state is its + * parent, or the parent's parent, or the parent's parent's parent, etc.))If state2 is + * state1's parent, or equal to state1, or a descendant of state1, this returns the empty set. + */ + +std::list<DOMElement*> getProperAncestors(const DOMElement* s1, const DOMElement* s2) { + + std::list<DOMElement*> ancestors; + if (isState(s1)) { + DOMNode* node = (DOMNode*)s1; + while((node = node->getParentNode())) { + if (node->getNodeType() != DOMNode::ELEMENT_NODE) + break; + + const DOMElement* nodeElem = dynamic_cast<const DOMElement*>(node); + if (!isState(nodeElem)) + break; + if (!iequals(LOCALNAME(nodeElem), "parallel") && + !iequals(LOCALNAME(nodeElem), "state") && + !iequals(LOCALNAME(nodeElem), "scxml")) + break; + if (node == s2) + break; + ancestors.push_back(dynamic_cast<DOMElement*>(node)); + } + } + return ancestors; +} + +std::list<DOMElement*> getExitSet(const DOMElement* transition, const DOMElement* root) { + std::list<DOMElement*> statesToExit; + if (HAS_ATTR(transition, "target")) { + DOMElement* domain = getTransitionDomain(transition, root); + if (!domain) + return statesToExit; + + // std::cout << DOMUtils::xPathForNode(domain) << std::endl; + + std::set<std::string> elements; + elements.insert(XML_PREFIX(transition).str() + "parallel"); + elements.insert(XML_PREFIX(transition).str() + "state"); + elements.insert(XML_PREFIX(transition).str() + "final"); + statesToExit = DOMUtils::inDocumentOrder(elements, domain); + + if (statesToExit.front() == domain) { + statesToExit.pop_front(); // do not include domain itself + } + } + + return statesToExit; +} + +bool conflicts(const DOMElement* t1, const DOMElement* t2, const DOMElement* root) { + return (DOMUtils::hasIntersection(getExitSet(t1, root), getExitSet(t2, root)) || + (getSourceState(t1) == getSourceState(t2)) || + (DOMUtils::isDescendant(getSourceState(t1), getSourceState(t2))) || + (DOMUtils::isDescendant(getSourceState(t2), getSourceState(t1)))); +} + +bool isState(const DOMElement* state, bool properOnly) { + if (!state) + return false; + + std::string localName = LOCALNAME(state); + if (iequals("state", localName)) + return true; + if (iequals("scxml", localName)) + return true; + if (iequals("parallel", localName)) + return true; + if (iequals("final", localName)) + return true; + if (properOnly) + return false; + + if (iequals("history", localName)) + return true; + if (iequals("initial", localName)) + return true; + + return false; +} + +bool isFinal(const DOMElement* state) { + std::string localName = LOCALNAME(state); + if (iequals("final", localName)) + return true; + if (HAS_ATTR(state, "final") && iequals("true", ATTR(state, "final"))) + return true; + return false; +} + +bool isAtomic(const DOMElement* state) { + if (!isState(state)) + return false; + + if (iequals("final", LOCALNAME(state))) + return true; + + if (iequals("parallel", LOCALNAME(state))) + return false; + + if (getChildStates(state).size() > 0) + return false; + + return true; +} + +bool isHistory(const DOMElement* state) { + if (iequals("history", LOCALNAME(state))) + return true; + return false; +} + +bool isParallel(const DOMElement* state) { + if (!isState(state)) + return false; + if (iequals("parallel", LOCALNAME(state))) + return true; + return false; +} + + +bool isCompound(const DOMElement* state) { + if (!isState(state)) + return false; + + if (iequals(LOCALNAME(state), "parallel")) // parallel is no compound state + return false; + + if (getChildStates(state).size() > 0) + return true; + + return false; +} + +std::list<DOMElement*> getTargetStates(const DOMElement* transition, const DOMElement* root) { + std::list<DOMElement*> targetStates; + + std::string targetId = ATTR(transition, "target"); + std::list<std::string> targetIds = tokenize(ATTR(transition, "target")); + + for (auto targetIter = targetIds.begin(); targetIter != targetIds.end(); targetIter++) { + DOMElement* state = getState(*targetIter, root); + if (state) { + targetStates.push_back(state); + } + } + return targetStates; +} + + +DOMElement* getTransitionDomain(const DOMElement* transition, const DOMElement* root) { + std::list<DOMElement*> tStates = getTargetStates(transition, root); + if (tStates.size() == 0) { + return NULL; + } + std::string transitionType = (HAS_ATTR(transition, "type") ? ATTR(transition, "type") : "external"); + DOMElement* source = getSourceState(transition); + + if (iequals(transitionType, "internal") && isCompound(source)) { + for (auto tIter = tStates.begin(); tIter != tStates.end(); tIter++) { + if (!DOMUtils::isDescendant(*tIter, source)) + goto BREAK_LOOP; + } + return source; + } + +BREAK_LOOP: + tStates.push_front(source); + return findLCCA(tStates); +} + +std::list<DOMElement*> getStates(const std::list<std::string>& stateIds, const DOMElement* root) { + std::list<DOMElement*> states; + std::list<std::string>::const_iterator tokenIter = stateIds.begin(); + while(tokenIter != stateIds.end()) { + states.push_back(getState(*tokenIter, root)); + tokenIter++; + } + return states; +} + +DOMElement* getState(const std::string& stateId, const DOMElement* root) { + + std::list<const DOMElement*> stateStack; + stateStack.push_back(root); + + while(stateStack.size() > 0) { + const DOMElement* curr = stateStack.front(); + stateStack.pop_front(); + + if (!isState(curr, false)) + assert(false); + +// std::cout << *curr; + + if (HAS_ATTR(curr, "id") && ATTR(curr, "id") == stateId) + return (DOMElement*)curr; + + std::list<DOMElement*> children = getChildStates(curr, false); + stateStack.insert(stateStack.end(), children.begin(), children.end()); + } + + return NULL; +} + +/** + * In a conformant SCXML document, a compound state may specify either an "initial" + * attribute or an <initial> element, but not both. See 3.6 <initial> for a + * discussion of the difference between the two notations. If neither the "initial" + * attribute nor an <initial> element is specified, the SCXML Processor must use + * the first child state in document order as the default initial state. + */ +std::list<DOMElement*> getInitialStates(const DOMElement* state, const DOMElement* root) { + if (!state) { + state = root; + } + +#if VERBOSE + std::cerr << "Getting initial state of " << TAGNAME(state) << " " << ATTR(state, "id") << std::endl; +#endif + + if (isAtomic(state)) { + return std::list<DOMElement*>(); + } + + if (isParallel(state)) { + return getChildStates(state); + } + + if (isCompound(state)) { + // initial attribute at element + if (HAS_ATTR(state, "initial")) { + return getStates(tokenize(ATTR(state, "initial")), root); + } + + // initial element as child + std::list<DOMElement*> initElems = DOMUtils::filterChildElements(XML_PREFIX(state).str() + "initial", state); + if(initElems.size() > 0 ) { + std::list<DOMElement*> initTrans = DOMUtils::filterChildElements(XML_PREFIX(initElems.front()).str() + "transition", initElems.front()); + if (initTrans.size() > 0 && HAS_ATTR(initTrans.front(),"target")) { + return getTargetStates(initTrans.front(), root); + } + return std::list<DOMElement*>(); + } + + // first child state + std::list<DOMElement*> initStates; + DOMNodeList* children = state->getChildNodes(); + for (size_t i = 0; i < children->getLength(); i++) { + if (children->item(i)->getNodeType() != DOMNode::ELEMENT_NODE) + continue; + DOMElement* childElem = dynamic_cast<DOMElement*>(children->item(i)); + if (isState(childElem)) { + initStates.push_back(childElem); + return initStates; + } + } + } + + // nothing found + return std::list<DOMElement*>(); +} + +std::list<DOMElement*> getReachableStates(const DOMElement* root) { + /** Check which states are reachable */ + + std::list<DOMElement*> reachable; // total transitive hull + std::list<DOMElement*> additions; // nodes added in last iteration + std::list<DOMElement*> current; // new nodes caused by nodes added + additions.push_back((DOMElement*)root); + + while (additions.size() > 0) { + +#if 0 + for (auto stateIter = additions.begin(); stateIter != additions.end(); stateIter++) { + DOMElement* state = *stateIter; + std::cout << (HAS_ATTR(state, "id") ? ATTR(state, "id") : (std::string)X(state->getLocalName())) << ", " << std::endl; + } +#endif + // reachable per initial attribute or document order - size will increase as we append new states + for (auto stateIter = additions.begin(); stateIter != additions.end(); stateIter++) { + // get the state's initial states + DOMElement* state = *stateIter; + std::list<DOMElement*> initials = getInitialStates(state, root); + for (auto initIter = initials.begin(); initIter != initials.end(); initIter++) { + DOMElement* initial = *initIter; + if (!DOMUtils::isMember(initial, additions) && !DOMUtils::isMember(initial, reachable)) { + current.push_back(initial); + } + } + } + + // reachable per target attribute in transitions + for (auto stateIter = additions.begin(); stateIter != additions.end(); stateIter++) { + DOMElement* state = *stateIter; + std::list<DOMElement*> transitions = DOMUtils::filterChildElements(XML_PREFIX(state).str() + "transition", state, false); + for (auto transIter = transitions.begin(); transIter != transitions.end(); transIter++) { + DOMElement* transition = *transIter; + std::list<DOMElement*> targets = getTargetStates(transition, root); + for (auto targetIter = targets.begin(); targetIter != targets.end(); targetIter++) { + DOMElement* target = *targetIter; + if (!DOMUtils::isMember(target, additions) && !DOMUtils::isMember(target, reachable)) { + current.push_back(target); + } + } + } + } + + // reachable via a reachable child state + for (auto stateIter = additions.begin(); stateIter != additions.end(); stateIter++) { + DOMElement* state = *stateIter; + if (isAtomic(state)) { + // iterate the states parents + DOMNode* parent = state->getParentNode(); + while(parent && parent->getNodeType() == DOMNode::ELEMENT_NODE) { + DOMElement* parentElem = static_cast<DOMElement*>(parent); + if (!isState(parentElem)) { + break; + } + if (!DOMUtils::isMember(parentElem, additions) && !DOMUtils::isMember(parentElem, reachable)) { + current.push_back(parentElem); + } + parent = parent->getParentNode(); + } + } + } + + // add all additions from last iterations to reachable set + reachable.insert(reachable.end(), additions.begin(), additions.end()); + + // set current additions as new additions + additions = current; + + // clear current set for next iteration + current.clear(); + } + + return reachable; +} + + +bool isInEmbeddedDocument(const DOMNode* node) { + // a node is in an embedded document if there is a content element in its parents + const DOMNode* parent = node; + while(parent) { + if(iequals(LOCALNAME(parent), "content")) { + return true; + } + parent = parent->getParentNode(); + } + return false; +} + +}
\ No newline at end of file diff --git a/src/uscxml/util/Predicates.h b/src/uscxml/util/Predicates.h new file mode 100644 index 0000000..50c69b6 --- /dev/null +++ b/src/uscxml/util/Predicates.h @@ -0,0 +1,61 @@ +/** + * @file + * @author 2016 Stefan Radomski (stefan.radomski@cs.tu-darmstadt.de) + * @copyright Simplified BSD + * + * @cond + * This program is free software: you can redistribute it and/or modify + * it under the terms of the FreeBSD license as published by the FreeBSD + * project. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the FreeBSD license along with this + * program. If not, see <http://www.opensource.org/licenses/bsd-license>. + * @endcond + */ + +#ifndef PREDICATES_H_D3A20484 +#define PREDICATES_H_D3A20484 + +#include <string> +#include <list> +#include <xercesc/dom/DOM.hpp> +#include "uscxml/util/DOM.h" +#include "uscxml/util/Convenience.h" + +namespace uscxml { + +std::list<xercesc::DOMElement*> getChildStates(const xercesc::DOMElement* state, bool properOnly = true); +std::list<xercesc::DOMElement*> getChildStates(const std::list<xercesc::DOMElement*>& states, bool properOnly = true); +xercesc::DOMElement* getParentState(const xercesc::DOMElement* element); +xercesc::DOMElement* getSourceState(const xercesc::DOMElement* transition); +xercesc::DOMElement* findLCCA(const std::list<xercesc::DOMElement*>& states); +std::list<xercesc::DOMElement*> getProperAncestors(const xercesc::DOMElement* s1, const xercesc::DOMElement* s2); + +std::list<xercesc::DOMElement*> getTargetStates(const xercesc::DOMElement* transition, const xercesc::DOMElement* root); +std::list<xercesc::DOMElement*> getEffectiveTargetStates(const xercesc::DOMElement* transition); +xercesc::DOMElement* getTransitionDomain(const xercesc::DOMElement* transition, const xercesc::DOMElement* root); + +bool isInEmbeddedDocument(const xercesc::DOMNode* node); + +std::list<xercesc::DOMElement*> getStates(const std::list<std::string>& stateIds, const xercesc::DOMElement* root); +xercesc::DOMElement* getState(const std::string& stateId, const xercesc::DOMElement* root); +std::list<xercesc::DOMElement*> getInitialStates(const xercesc::DOMElement* state, const xercesc::DOMElement* root); +std::list<xercesc::DOMElement*> getReachableStates(const xercesc::DOMElement* root); +std::list<xercesc::DOMElement*> getExitSet(const xercesc::DOMElement* transition, const xercesc::DOMElement* root); +bool conflicts(const xercesc::DOMElement* transition1, const xercesc::DOMElement* transition2, const xercesc::DOMElement* root); + +bool isState(const xercesc::DOMElement* state, bool properOnly = true); +bool isCompound(const xercesc::DOMElement* state); +bool isAtomic(const xercesc::DOMElement* state); +bool isParallel(const xercesc::DOMElement* state); +bool isFinal(const xercesc::DOMElement* state); +bool isHistory(const xercesc::DOMElement* state); + + +} + +#endif /* end of include guard: PREDICATES_H_D3A20484 */ diff --git a/src/uscxml/util/Trie.cpp b/src/uscxml/util/Trie.cpp deleted file mode 100644 index 8e3aff3..0000000 --- a/src/uscxml/util/Trie.cpp +++ /dev/null @@ -1,173 +0,0 @@ -/** - * @file - * @author 2012-2014 Stefan Radomski (stefan.radomski@cs.tu-darmstadt.de) - * @copyright Simplified BSD - * - * @cond - * This program is free software: you can redistribute it and/or modify - * it under the terms of the FreeBSD license as published by the FreeBSD - * project. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the FreeBSD license along with this - * program. If not, see <http://www.opensource.org/licenses/bsd-license>. - * @endcond - */ - -#include "Trie.h" -#include <iostream> -#include <boost/algorithm/string.hpp> - -namespace uscxml { - -Trie::Trie() { - root = new TrieNode(); - lastIndex = 0; -} - -Trie::Trie(const std::string& seperator) : seperator(seperator) { - root = new TrieNode(); - lastIndex = 0; -} - -Trie::~Trie() { - delete root; -} - -TrieNode::TrieNode() : hasWord(false) {} - -TrieNode::~TrieNode() { - std::map<std::string, TrieNode*>::iterator childIter = childs.begin(); - while(childIter != childs.end()) { - delete childIter->second; - childIter++; - } -} - -size_t Trie::getNextToken(const std::string& word, size_t offset, std::string& token) { - if (offset == std::string::npos || offset >= word.length()) { - token = ""; - return std::string::npos; - } - if (seperator.size() > 0) { - size_t sepPos = word.find(seperator, offset); - if (sepPos == offset) // starts with a seperator - return getNextToken(word, offset + seperator.length(), token); - if (sepPos == std::string::npos) { - token = word.substr(offset, word.length() - offset); - } else { - token = word.substr(offset, sepPos - offset); - sepPos += seperator.length(); - } - return sepPos; - } - token = word[offset]; - return offset + 1; -} - -std::string Trie::escapeWord(const std::string& word) { - std::string identifier = word; - boost::replace_all(identifier, ".", "_"); - return identifier; -} - -void Trie::addWord(const std::string& word) { - TrieNode* currNode = root; - - std::string prefix; - size_t offset = 0; - - for(;;) { - offset = getNextToken(word, offset, prefix); - - if (prefix.size() > 0) { - if (currNode->childs.find(prefix) == currNode->childs.end()) - currNode->childs[prefix] = new TrieNode(); - currNode = currNode->childs[prefix]; - } - - if (offset == std::string::npos) - break; - } - if (!currNode->hasWord) { - currNode->index = lastIndex++; - currNode->value = word; - currNode->identifier = escapeWord(word); - currNode->hasWord = true; - } -} - -TrieNode* Trie::getNodeWithPrefix(const std::string& prefix) { - std::string token; - size_t offset = 0; - - TrieNode* currNode = root; - - for(;;) { - offset = getNextToken(prefix, offset, token); - if (currNode->childs.find(token) == currNode->childs.end()) { - if (token.size() > 0) - currNode = NULL; - break; - } else { - currNode = currNode->childs[token]; - } - } - return currNode; -} - -std::list<TrieNode*> Trie::getWordsWithPrefix(const std::string& prefix) { - std::list<TrieNode*> nodes; - TrieNode* prefixNode = getNodeWithPrefix(prefix); - - if (prefixNode) { - nodes = getChildsWithWords(prefixNode); - } - - return nodes; -} - -std::list<TrieNode*> Trie::getChildsWithWords(TrieNode* node) { - std::list<TrieNode*> nodes; - if (node->hasWord) { - nodes.push_back(node); - } - - std::map<std::string, TrieNode*>::iterator childIter = node->childs.begin(); - while(childIter != node->childs.end()) { - std::list<TrieNode*> otherChilds = getChildsWithWords(childIter->second); - nodes.merge(otherChilds); - childIter++; - } - - return nodes; -} - -void TrieNode::dump(int indent) { - std::string padding; - for (size_t i = 0; i < indent; i++) { - padding += " "; - } - - std::map<std::string, TrieNode*>::iterator childIter = childs.begin(); - while(childIter != childs.end()) { - std::cout << padding << childIter->first; - if (childIter->second->hasWord) { - std::cout << " (word)"; - } - std::cout << std::endl; - childIter->second->dump(indent + 1); - childIter++; - } -} - -void Trie::dump() { - if (root->hasWord) - std::cout << "(word)" << std::endl; - root->dump(); -} - -}
\ No newline at end of file diff --git a/src/uscxml/util/Trie.h b/src/uscxml/util/Trie.h deleted file mode 100644 index 73d75e7..0000000 --- a/src/uscxml/util/Trie.h +++ /dev/null @@ -1,64 +0,0 @@ -/** - * @file - * @author 2012-2014 Stefan Radomski (stefan.radomski@cs.tu-darmstadt.de) - * @copyright Simplified BSD - * - * @cond - * This program is free software: you can redistribute it and/or modify - * it under the terms of the FreeBSD license as published by the FreeBSD - * project. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the FreeBSD license along with this - * program. If not, see <http://www.opensource.org/licenses/bsd-license>. - * @endcond - */ - -#ifndef TRIE_H_UZMQRBO5 -#define TRIE_H_UZMQRBO5 - -#include "uscxml/Common.h" -#include <string> -#include <map> -#include <list> - -namespace uscxml { - -struct USCXML_API TrieNode { - TrieNode(); - virtual ~TrieNode(); - - bool hasWord; - int index; - std::string identifier; - std::string value; - std::map<std::string, TrieNode*> childs; - void dump(int indent = 0); -}; - -struct USCXML_API Trie { - Trie(); - Trie(const std::string& seperator); - virtual ~Trie(); - - void addWord(const std::string& word); - size_t getNextToken(const std::string& word, size_t offset, std::string& token); - std::string escapeWord(const std::string& word); - - TrieNode* getNodeWithPrefix(const std::string& prefix); - std::list<TrieNode*> getWordsWithPrefix(const std::string& prefix); - std::list<TrieNode*> getChildsWithWords(TrieNode* node); - void dump(); - - TrieNode* root; - std::string seperator; - int lastIndex; -}; - -} - - -#endif /* end of include guard: TRIE_H_UZMQRBO5 */ diff --git a/src/uscxml/util/URL.cpp b/src/uscxml/util/URL.cpp new file mode 100644 index 0000000..ad271d5 --- /dev/null +++ b/src/uscxml/util/URL.cpp @@ -0,0 +1,773 @@ +/** + * @file + * @author 2012-2013 Stefan Radomski (stefan.radomski@cs.tu-darmstadt.de) + * @copyright Simplified BSD + * + * @cond + * This program is free software: you can redistribute it and/or modify + * it under the terms of the FreeBSD license as published by the FreeBSD + * project. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the FreeBSD license along with this + * program. If not, see <http://www.opensource.org/licenses/bsd-license>. + * @endcond + */ + +#include "URL.h" +#include "uscxml/messages/Event.h" + +#include <string> +#include <cassert> + +#include <easylogging++.h> +#include "uscxml/config.h" + + +#ifdef _WIN32 +#define getcwd _getcwd +#else +#include <unistd.h> // getcwd +//#include <pwd.h> +#endif + +namespace uscxml { + +void URLImpl::prepareException(ErrorEvent& exception, int errorCode, const std::string& origUri, UriParserStateA* parser) { + exception.data.compound["uri"].atom = origUri; + + if (parser != NULL && parser->errorPos != NULL) { + const char* startPtr = origUri.c_str(); + while(startPtr != parser->errorPos && *startPtr != '\0') { + exception.data.compound["urk"].atom += " "; + startPtr++; + } + exception.data.compound["urk"].atom += "^"; + } + + switch (errorCode) { + case URI_ERROR_SYNTAX: + exception.data.compound["cause"].atom = "Parsed text violates expected format"; + break; + case URI_ERROR_NULL: + exception.data.compound["cause"].atom = "One of the params passed was NULL although it mustn't be"; + break; + case URI_ERROR_MALLOC: + exception.data.compound["cause"].atom = "Requested memory could not be allocated"; + break; + case URI_ERROR_OUTPUT_TOO_LARGE: + exception.data.compound["cause"].atom = "Some output is to large for the receiving buffer"; + break; + case URI_ERROR_NOT_IMPLEMENTED: + exception.data.compound["cause"].atom = "The called function is not implemented yet"; + break; + case URI_ERROR_RANGE_INVALID: + exception.data.compound["cause"].atom = "The parameters passed contained invalid ranges"; + break; + case URI_ERROR_ADDBASE_REL_BASE: + exception.data.compound["cause"].atom = "Given base is not absolute"; + break; + case URI_ERROR_REMOVEBASE_REL_SOURCE: + exception.data.compound["cause"].atom = "Given base is not absolute"; + break; + + default: + break; + } +} + +URLImpl::URLImpl() : _handle(NULL), _isDownloaded(false), _hasFailed(false) { +} + +URLImpl::URLImpl(const std::string& url) : _orig(url), _handle(NULL), _isDownloaded(false), _hasFailed(false) { + UriParserStateA state; + state.uri = &_uri; + + int err = uriParseUriA(&state, _orig.c_str()); + if (err != URI_SUCCESS) { + UriParserStateA state2; + state2.uri = &_uri; + + char* tmp = (char*)malloc(8 + 3 * _orig.size() + 1); + uriWindowsFilenameToUriStringA(_orig.c_str(), tmp); + _orig = std::string(tmp); + err = uriParseUriA(&state2, _orig.c_str()); + free(tmp); + } + + if (err != URI_SUCCESS) { + UriParserStateA state2; + state2.uri = &_uri; + + char* tmp = (char*)malloc(7 + 3 * _orig.size() + 1 ); + uriUnixFilenameToUriStringA(_orig.c_str(), tmp); + _orig = std::string(tmp); + err = uriParseUriA(&state2, _orig.c_str()); + free(tmp); + } + + if (err != URI_SUCCESS) { + ErrorEvent exc; + prepareException(exc, err, _orig, &state); + throw exc; + } +} + +URLImpl::~URLImpl() { + uriFreeUriMembersA(&_uri); +} + +URL URLImpl::resolve(URLImpl* relative, URLImpl* absolute) { + std::shared_ptr<URLImpl> dest(new URLImpl()); + int err = uriAddBaseUriExA(&(dest->_uri), &(relative->_uri), &(absolute->_uri), URI_RESOLVE_IDENTICAL_SCHEME_COMPAT); + if (err != URI_SUCCESS) { + ErrorEvent exc("Cannot resolve " + (std::string)(*relative) + " with " + (std::string)(*absolute)); + prepareException(exc, err, "", NULL); + throw exc; + } + + // serialize as string and reparse to mantain string in _orig + return URL((std::string)(*dest.get())); +} + +URL URLImpl::resolveWithCWD(URLImpl* relative) { + char currPath[FILENAME_MAX]; + if (!getcwd(currPath, sizeof(currPath))) { + ERROR_PLATFORM_THROW("Cannot get current working directory"); + } + currPath[sizeof(currPath) - 1] = '\0'; /* not really required */ + + + std::shared_ptr<URLImpl> cwdURL(new URLImpl(std::string("file://") + currPath)); + + return resolve(relative, cwdURL.get()); +} + +URL URLImpl::refer(URLImpl* absoluteSource, URLImpl* absoluteBase) { + std::shared_ptr<URLImpl> dest(new URLImpl()); + int err = uriRemoveBaseUriA(&(dest->_uri), &(absoluteSource->_uri), &(absoluteBase->_uri), URI_FALSE); + if (err != URI_SUCCESS) { + ErrorEvent exc("Cannot make a relative reference for " + (std::string)(*absoluteSource) + " with " + (std::string)(*absoluteBase)); + prepareException(exc, err, "", NULL); + throw exc; + } + + // serialize as string and reparse to mantain string in _orig + return URL((std::string)(*dest.get())); +} + +void URLImpl::normalize() { + int err = uriNormalizeSyntaxA(&_uri); + if (err != URI_SUCCESS) { + ErrorEvent exc("Cannot normalize URL " + (std::string)*this); + prepareException(exc, err, _orig, NULL); + throw exc; + } +} + +std::string URLImpl::path() const { + UriPathSegmentA* firstSeg = _uri.pathHead; + UriPathSegmentA* lastSeg = firstSeg; + while(lastSeg->next) { + lastSeg = lastSeg->next; + } + + std::string path; + + // what a mess! + if (_uri.absolutePath || + (_uri.pathHead != NULL && + (_uri.hostText.first != NULL || + _uri.hostData.ip4 != NULL || + _uri.hostData.ip6 != NULL || + _uri.hostData.ipFuture.first != NULL))) { + path += "/"; + } + path += std::string(firstSeg->text.first, lastSeg->text.afterLast - firstSeg->text.first); + return path; +} + +std::list<std::string> URLImpl::pathComponents() const { + std::list<std::string> pathList; + + UriPathSegmentA* currSeg = _uri.pathHead; + while(currSeg != NULL) { + pathList.push_back(USCXML_URI_STRING((*currSeg), text)); + currSeg = currSeg->next; + } + + return pathList; +} + +std::map<std::string, std::string> URLImpl::query() const { + UriQueryListA * queryList; + UriQueryListA * currList; + std::map<std::string, std::string> queryMap; + int itemCount; + + int err = uriDissectQueryMallocA(&queryList, &itemCount, _uri.query.first, _uri.query.afterLast); + if (err != URI_SUCCESS) { + ErrorEvent exc("Cannot get query from URL " + (std::string)*this); + prepareException(exc, err, _orig, NULL); + throw exc; + } + + currList = queryList; + while(currList != NULL) { + queryMap[currList->key] = currList->value != NULL ? currList->value : ""; + currList = currList->next; + } + + uriFreeQueryListA(queryList); + + return queryMap; +} + +CURL* URLImpl::getCurlHandle() { + if (_handle == NULL) { + _handle = curl_easy_init(); + if (_handle == NULL) + LOG(ERROR) << "curl_easy_init returned NULL, this is bad!"; + } + return _handle; +} + +size_t URLImpl::writeHandler(void *ptr, size_t size, size_t nmemb, void *userdata) { + URLImpl* url = (URLImpl*)userdata; + url->_rawInContent.write((char*)ptr, size * nmemb); + + std::set<URLMonitor*>::iterator monIter = url->_monitors.begin(); + while(monIter != url->_monitors.end()) { + (*monIter)->contentChunkReceived(URL(url->shared_from_this()), std::string((char*)ptr, size * nmemb)); + monIter++; + } + + return size * nmemb; +} + +size_t URLImpl::headerHandler(void *ptr, size_t size, size_t nmemb, void *userdata) { + URLImpl* url = (URLImpl*)userdata; + url->_rawInHeader.write((char*)ptr, size * nmemb); + + std::set<URLMonitor*>::iterator monIter = url->_monitors.begin(); + while(monIter != url->_monitors.end()) { + (*monIter)->headerChunkReceived(URL(url->shared_from_this()), std::string((char*)ptr, size * nmemb)); + monIter++; + } + + return size * nmemb; +} + +void URLImpl::downloadStarted() { + // LOG(INFO) << "Starting download of " << asString() << std::endl; + _rawInContent.str(""); + _rawInContent.clear(); + _rawInHeader.str(""); + _rawInHeader.clear(); + + _statusMsg = ""; + _statusCode = ""; + + std::set<URLMonitor*>::iterator monIter = _monitors.begin(); + while(monIter != _monitors.end()) { + (*monIter)->downloadStarted(URL(shared_from_this())); + monIter++; + } +} + +void URLImpl::downloadCompleted() { + std::lock_guard<std::recursive_mutex> lock(_mutex); + + if (iequals(scheme(), "http")) { + // process header fields + std::string line; + while (std::getline(_rawInHeader, line)) { + size_t colon = line.find_first_of(":"); + size_t newline = line.find_first_of("\r\n"); + if (newline == std::string::npos) + newline = line.size(); + + if (colon == std::string::npos) { + _statusMsg = line.substr(0, newline); + if (_statusMsg.length() >= 11) + _statusCode = _statusMsg.substr(9, 3); + } else { + std::string key = line.substr(0, colon); + size_t firstChar = line.find_first_not_of(": ", colon, 2); + if (firstChar == std::string::npos) { + // nothing but spaces? + _inHeaders[line.substr(0, newline)] = ""; + } else { + std::string value = line.substr(firstChar, newline - firstChar); + _inHeaders[key] = value; + } + } + } + } + + _hasFailed = false; + _isDownloaded = true; + _condVar.notify_all(); + + std::set<URLMonitor*>::iterator monIter = _monitors.begin(); + while(monIter != _monitors.end()) { + (*monIter)->downloadCompleted(URL(shared_from_this())); + monIter++; + } +} + +void URLImpl::downloadFailed(CURLcode errorCode) { + std::lock_guard<std::recursive_mutex> lock(_mutex); + + _error = curl_easy_strerror(errorCode); + _hasFailed = true; + _isDownloaded = false; + _condVar.notify_all(); + + std::set<URLMonitor*>::iterator monIter = _monitors.begin(); + while(monIter != _monitors.end()) { + (*monIter)->downloadFailed(URL(shared_from_this()), errorCode); + monIter++; + } + +} + +const void URLImpl::download(bool blocking) { + std::lock_guard<std::recursive_mutex> lock(_mutex); + + if (_isDownloaded) + return; + + URL url(shared_from_this()); + URLFetcher::fetchURL(url); + + if (blocking) { + while(!_isDownloaded && !_hasFailed) { + _condVar.wait(_mutex); // wait for notification + } + if (_hasFailed) { + ERROR_COMMUNICATION(exc, _error); +// exc.data = URL(shared_from_this()); + throw exc; + } + if (iequals(scheme(), "http")) { + if (_statusCode.size() > 0 && strTo<int>(_statusCode) > 400) { + ERROR_COMMUNICATION(exc, _error); +// exc.data = URL(shared_from_this()); + if (_error.length() > 0) + exc.data.compound["cause"] = Data(_error, Data::VERBATIM); + throw exc; + } + } + } +} + +URLImpl::operator Data() const { + Data data; + data.compound["url"] = Data(std::string(*this), Data::VERBATIM); + data.compound["host"] = Data(host(), Data::VERBATIM); + data.compound["scheme"] = Data(scheme(), Data::VERBATIM); + data.compound["path"] = Data(path(), Data::VERBATIM); + data.compound["port"] = Data(port(), Data::INTERPRETED); + data.compound["isAbsolute"] = Data(isAbsolute()); + if (_statusCode.length() > 0) + data.compound["statusCode"] = Data(_statusCode, Data::VERBATIM); + if (_statusMsg.length() > 0) + data.compound["statusMsg"] = Data(_statusMsg, Data::VERBATIM); + + std::list<std::string> pathComps = pathComponents(); + std::list<std::string>::const_iterator pathIter = pathComps.begin(); + while(pathIter != pathComps.end()) { + data.compound["pathComponent"].array.push_back(Data(*pathIter, Data::VERBATIM)); + pathIter++; + } + + return data; +} + + +URLImpl::operator std::string() const { + int charsRequired = 0; + if (uriToStringCharsRequiredA(&_uri, &charsRequired) != URI_SUCCESS) { + throw ErrorEvent("Cannot recompose URL"); + } + charsRequired++; + + char * uriString; + uriString = (char*)malloc(charsRequired * sizeof(char)); + if (uriString == NULL) { + throw ErrorEvent("Malloc failed"); + } + + if (uriToStringA(uriString, &_uri, charsRequired, NULL) != URI_SUCCESS) { + free(uriString); + throw ErrorEvent("Cannot recompose URL"); + } + + std::string recomposed(uriString); + free(uriString); + return recomposed; + +} + +URLFetcher::URLFetcher() { + _isStarted = false; + _envProxy = NULL; + _multiHandle = curl_multi_init(); + + // read proxy information from environment + // CURLOPT_PROXY; + // CURLOPT_PROXY_TRANSFER_MODE; + // CURLOPT_PROXYAUTH; + // CURLOPT_PROXYHEADER; + // CURLOPT_PROXYPASSWORD; + // CURLOPT_PROXYPORT; + // CURLOPT_PROXYTYPE; + // CURLOPT_PROXYUSERNAME; + // CURLOPT_PROXYUSERPWD; + + /* + see http://curl.haxx.se/libcurl/c/CURLOPT_PROXY.html + e.g. 'socks5://bob:marley@localhost:12345' + */ + _envProxy = getenv("USCXML_PROXY"); + +#if 0 + bool unsupported = false; + CURLcode curlError; + + // exposed just in case + char* envProxyTransferMode = getenv("USCXML_PROXY_TRANSFER_MODE"); + char* envProxyAuth = getenv("USCXML_PROXYAUTH"); + // char* envProxyHeader = getenv("USCXML_PROXYHEADER"); // not available in older curl + char* envProxyPassword = getenv("USCXML_PROXYPASSWORD"); + char* envProxyPort = getenv("USCXML_PROXYPORT"); + // char* envProxyType = getenv("USCXML_PROXYTYPE"); // takes an int, have another look if needed + char* envProxyUsername = getenv("USCXML_PROXYUSERNAME"); + char* envProxyUserPwd = getenv("USCXML_PROXYUSERPWD"); + + /* Name of proxy to use. */ + if (envProxy) + (curlError = curl_easy_setopt(_multiHandle, CURLOPT_PROXY, envProxy)) == CURLE_OK || + LOG(ERROR) << "Cannot set curl proxy: " << curl_easy_strerror(curlError); + + /* set transfer mode (;type=<a|i>) when doing FTP via an HTTP proxy */ + if (envProxyTransferMode) + (curlError = curl_easy_setopt(_multiHandle, CURLOPT_PROXY_TRANSFER_MODE, envProxyTransferMode)) == CURLE_OK || + LOG(ERROR) << "Cannot set curl proxy transfer mode: " << curl_easy_strerror(curlError); + + /* Set this to a bitmask value to enable the particular authentications + methods you like. Use this in combination with CURLOPT_PROXYUSERPWD. + Note that setting multiple bits may cause extra network round-trips. */ + if (envProxyAuth) + (curlError = curl_easy_setopt(_multiHandle, CURLOPT_PROXYAUTH, envProxyAuth)) == CURLE_OK || + LOG(ERROR) << "Cannot set curl proxy authentication: " << curl_easy_strerror(curlError); + +#if 0 + /* This points to a linked list of headers used for proxy requests only, + struct curl_slist kind */ + if (envProxyHeader && unsupported) + (curlError = curl_easy_setopt(_multiHandle, CURLOPT_PROXYHEADER, envProxyHeader)) == CURLE_OK || + LOG(ERROR) << "Cannot set curl proxy header: " << curl_easy_strerror(curlError); +#endif + + /* "name" and "pwd" to use with Proxy when fetching. */ + if (envProxyUsername) + (curlError = curl_easy_setopt(_multiHandle, CURLOPT_PROXYUSERNAME, envProxyUsername)) == CURLE_OK || + LOG(ERROR) << "Cannot set curl proxy username: " << curl_easy_strerror(curlError); + if (envProxyPassword) + (curlError = curl_easy_setopt(_multiHandle, CURLOPT_PROXYPASSWORD, envProxyPassword)) == CURLE_OK || + LOG(ERROR) << "Cannot set curl proxy password: " << curl_easy_strerror(curlError); + + /* Port of the proxy, can be set in the proxy string as well with: + "[host]:[port]" */ + if (envProxyPort) + (curlError = curl_easy_setopt(_multiHandle, CURLOPT_PROXYPORT, envProxyPort)) == CURLE_OK || + LOG(ERROR) << "Cannot set curl proxy port: " << curl_easy_strerror(curlError); + +#if 0 + /* indicates type of proxy. accepted values are CURLPROXY_HTTP (default), + CURLPROXY_SOCKS4, CURLPROXY_SOCKS4A and CURLPROXY_SOCKS5. */ + if (envProxyType && unsupported) + (curlError = curl_easy_setopt(_multiHandle, CURLOPT_PROXYTYPE, envProxyType)) == CURLE_OK || + LOG(ERROR) << "Cannot set curl proxy type: " << curl_easy_strerror(curlError); +#endif + + /* "user:password" to use with proxy. */ + if (envProxyUserPwd) + (curlError = curl_easy_setopt(_multiHandle, CURLOPT_PROXYUSERPWD, envProxyUserPwd)) == CURLE_OK || + LOG(ERROR) << "Cannot set curl proxy user password: " << curl_easy_strerror(curlError); +#endif + + start(); +} + +URLFetcher::~URLFetcher() { + stop(); + curl_multi_cleanup(_multiHandle); +} + +void URLFetcher::fetchURL(URL& url) { + URLFetcher* instance = getInstance(); + std::lock_guard<std::recursive_mutex> lock(instance->_mutex); + + CURL* handle = url._impl->getCurlHandle(); + assert(handle != NULL); + if (handle == NULL) + return; + + if (instance->_handlesToURLs.find(handle) == instance->_handlesToURLs.end()) { + CURLcode curlError; + + std::string fromURL(url); + + (curlError = curl_easy_setopt(handle, CURLOPT_URL, fromURL.c_str())) == CURLE_OK || + LOG(ERROR) << "Cannot set url to " << std::string(url) << ": " << curl_easy_strerror(curlError); + + // (curlError = curl_easy_setopt(handle, CURLOPT_NOSIGNAL, 1)) == CURLE_OK || + // LOG(ERROR) << "Cannot set curl to ignore signals: " << curl_easy_strerror(curlError); + + // (curlError = curl_easy_setopt(handle, CURLOPT_FORBID_REUSE, 1)) == CURLE_OK || + // LOG(ERROR) << "Cannot force noreuse: " << curl_easy_strerror(curlError); + + // (curlError = curl_easy_setopt(handle, CURLOPT_VERBOSE, 1)) == CURLE_OK || + // LOG(ERROR) << "Cannot set verbose: " << curl_easy_strerror(curlError); + + (curlError = curl_easy_setopt(handle, CURLOPT_WRITEDATA, url._impl.get())) == CURLE_OK || + LOG(ERROR) << "Cannot register this as write userdata: " << curl_easy_strerror(curlError); + + (curlError = curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, URLImpl::writeHandler)) == CURLE_OK || + LOG(ERROR) << "Cannot set write callback: " << curl_easy_strerror(curlError); + + (curlError = curl_easy_setopt(handle, CURLOPT_HEADERFUNCTION, URLImpl::headerHandler)) == CURLE_OK || + LOG(ERROR) << "Cannot request header from curl: " << curl_easy_strerror(curlError); + + (curlError = curl_easy_setopt(handle, CURLOPT_HEADERDATA, url._impl.get())) == CURLE_OK || + LOG(ERROR) << "Cannot register this as header userdata: " << curl_easy_strerror(curlError); + + (curlError = curl_easy_setopt(handle, CURLOPT_SSL_VERIFYPEER, false)) == CURLE_OK || + LOG(ERROR) << "Cannot forfeit peer verification: " << curl_easy_strerror(curlError); + + (curlError = curl_easy_setopt(handle, CURLOPT_USERAGENT, "uscxml/" USCXML_VERSION)) == CURLE_OK || + LOG(ERROR) << "Cannot set our user agent string: " << curl_easy_strerror(curlError); + + (curlError = curl_easy_setopt(handle, CURLOPT_FOLLOWLOCATION, true)) == CURLE_OK || + LOG(ERROR) << "Cannot enable follow redirects: " << curl_easy_strerror(curlError); + + if (instance->_envProxy) + (curlError = curl_easy_setopt(handle, CURLOPT_PROXY, instance->_envProxy)) == CURLE_OK || + LOG(ERROR) << "Cannot set curl proxy: " << curl_easy_strerror(curlError); + + if (url._impl->_requestType == URLRequestType::POST) { + + (curlError = curl_easy_setopt(handle, CURLOPT_POST, 1)) == CURLE_OK || + LOG(ERROR) << "Cannot set request type to post for " << std::string(url) << ": " << curl_easy_strerror(curlError); + + (curlError = curl_easy_setopt(handle, CURLOPT_COPYPOSTFIELDS, url._impl->_outContent.c_str())) == CURLE_OK || + LOG(ERROR) << "Cannot set post data " << std::string(url) << ": " << curl_easy_strerror(curlError); + + // Disable "Expect: 100-continue" + // curl_slist* disallowed_headers = 0; + // disallowed_headers = curl_slist_append(disallowed_headers, "Expect:"); + // (curlError = curl_easy_setopt(handle, CURLOPT_HTTPHEADER, disallowed_headers)) == CURLE_OK || + // LOG(ERROR) << "Cannot disable Expect 100 header: " << curl_easy_strerror(curlError); + + struct curl_slist* headers = NULL; + std::map<std::string, std::string>::iterator paramIter = url._impl->_outHeader.begin(); + while(paramIter != url._impl->_outHeader.end()) { + // char* key = curl_easy_escape(handle, paramIter->first.c_str(), paramIter->first.length()); + // char* value = curl_easy_escape(handle, paramIter->second.c_str(), paramIter->second.length()); + + const char* value = paramIter->second.c_str(); + + char* header = (char*)malloc(paramIter->first.size() + strlen(value) + 3); + sprintf(header,"%s: %s", paramIter->first.c_str(), value); + headers = curl_slist_append(headers, header); + + // curl_free(key); + // curl_free(value); + paramIter++; + } + + // Disable "Expect: 100-continue" + headers = curl_slist_append(headers, "Expect:"); + + (curlError = curl_easy_setopt(handle, CURLOPT_HTTPHEADER, headers)) == CURLE_OK || + LOG(ERROR) << "Cannot headers for " << std::string(url) << ": " << curl_easy_strerror(curlError); + + //curl_slist_free_all(headers); + + + } else if (url._impl->_requestType == URLRequestType::GET) { + (curlError = curl_easy_setopt(handle, CURLOPT_HTTPGET, 1)) == CURLE_OK || + LOG(ERROR) << "Cannot set request type to get for " << std::string(url) << ": " << curl_easy_strerror(curlError); + } + + url._impl->downloadStarted(); + instance->_handlesToURLs[handle] = url; + assert(instance->_handlesToURLs.size() > 0); + + curl_multi_add_handle(instance->_multiHandle, handle); + instance->_condVar.notify_all(); + } +} + +void URLFetcher::breakURL(URL& url) { + URLFetcher* instance = getInstance(); + CURL* handle = url._impl->getCurlHandle(); + + std::lock_guard<std::recursive_mutex> lock(instance->_mutex); + if (instance->_handlesToURLs.find(handle) != instance->_handlesToURLs.end()) { + url._impl->downloadFailed(CURLE_OK); + curl_multi_remove_handle(instance->_multiHandle, handle); + instance->_handlesToURLs.erase(handle); + } +} + +void URLFetcher::start() { + std::lock_guard<std::recursive_mutex> lock(_mutex); + if (!_isStarted) { + _isStarted = true; + _thread = new std::thread(URLFetcher::run, this); + } +} + +void URLFetcher::stop() { + std::lock_guard<std::recursive_mutex> lock(_mutex); + if (_isStarted) { + _isStarted = false; + _thread->join(); + delete _thread; + } +} + +void URLFetcher::run(void* instance) { + URLFetcher* fetcher = (URLFetcher*)instance; + while(fetcher->_isStarted) { + fetcher->perform(); + } + LOG(ERROR) << "URLFetcher thread stopped!"; +} + +void URLFetcher::perform() { + + CURLMsg *msg; /* for picking up messages with the transfer status */ + int msgsLeft; /* how many messages are left */ + int stillRunning; + CURLMcode err; + + { + std::lock_guard<std::recursive_mutex> lock(_mutex); + if (_handlesToURLs.empty()) { + _condVar.wait(_mutex); + } + err = curl_multi_perform(_multiHandle, &stillRunning); + if (err != CURLM_OK) { + LOG(WARNING) << "curl_multi_perform: " << curl_multi_strerror(err); + } + } + + do { + struct timeval timeout; + int rc; /* select() return code */ + + fd_set fdread, fdwrite, fdexcep; + FD_ZERO(&fdread); + FD_ZERO(&fdwrite); + FD_ZERO(&fdexcep); + + int maxfd = -1; + long curlTimeOut = -1; + + /* set a suitable timeout to play around with */ + timeout.tv_sec = 1; + timeout.tv_usec = 0; + + { + std::lock_guard<std::recursive_mutex> lock(_mutex); + err = curl_multi_timeout(_multiHandle, &curlTimeOut); + if (err != CURLM_OK) { + LOG(WARNING) << "curl_multi_timeout: " << curl_multi_strerror(err); + } + } + + if(curlTimeOut >= 0) { + timeout.tv_sec = curlTimeOut / 1000; + if(timeout.tv_sec > 1) { + timeout.tv_sec = 1; + } else { + timeout.tv_usec = (curlTimeOut % 1000) * 1000; + } + } + + /* get file descriptors from the transfers */ + { + std::lock_guard<std::recursive_mutex> lock(_mutex); + err = curl_multi_fdset(_multiHandle, &fdread, &fdwrite, &fdexcep, &maxfd); + if (err != CURLM_OK) { + LOG(WARNING) << "curl_multi_fdset: " << curl_multi_strerror(err); + } + } + + rc = select(maxfd+1, &fdread, &fdwrite, &fdexcep, &timeout); + + switch(rc) { + case -1: + /* select error */ + break; + case 0: /* timeout */ + default: { /* action */ + std::lock_guard<std::recursive_mutex> lock(_mutex); + err = curl_multi_perform(_multiHandle, &stillRunning); + if (err != CURLM_OK) { + LOG(WARNING) << "curl_multi_perform: " << curl_multi_strerror(err); + } + break; + } + } + + { + std::lock_guard<std::recursive_mutex> lock(_mutex); + while ((msg = curl_multi_info_read(_multiHandle, &msgsLeft))) { + if (msg->msg == CURLMSG_DONE) { + switch (msg->data.result) { + case CURLE_OK: + _handlesToURLs[msg->easy_handle]._impl->downloadCompleted(); + err = curl_multi_remove_handle(_multiHandle, msg->easy_handle); + if (err != CURLM_OK) { + LOG(WARNING) << "curl_multi_remove_handle: " << curl_multi_strerror(err); + } + + _handlesToURLs.erase(msg->easy_handle); + break; + default: + _handlesToURLs[msg->easy_handle]._impl->downloadFailed(msg->data.result); + err = curl_multi_remove_handle(_multiHandle, msg->easy_handle); + if (err != CURLM_OK) { + LOG(WARNING) << "curl_multi_remove_handle: " << curl_multi_strerror(err); + } + + _handlesToURLs.erase(msg->easy_handle); + } + } else { + LOG(ERROR) << "Curl reports info on unfinished download?!"; + } + } + } + } while(stillRunning && _isStarted); +} + +URLFetcher* URLFetcher::_instance = NULL; + +URLFetcher* URLFetcher::getInstance() { + if (_instance == NULL) { + _instance = new URLFetcher(); + } + return _instance; +} + + +}
\ No newline at end of file diff --git a/src/uscxml/util/URL.h b/src/uscxml/util/URL.h new file mode 100644 index 0000000..f545fe8 --- /dev/null +++ b/src/uscxml/util/URL.h @@ -0,0 +1,333 @@ +/** + * @file + * @author 2012-2013 Stefan Radomski (stefan.radomski@cs.tu-darmstadt.de) + * @copyright Simplified BSD + * + * @cond + * This program is free software: you can redistribute it and/or modify + * it under the terms of the FreeBSD license as published by the FreeBSD + * project. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the FreeBSD license along with this + * program. If not, see <http://www.opensource.org/licenses/bsd-license>. + * @endcond + */ + +#ifndef URL_H_9DAEGSMV +#define URL_H_9DAEGSMV + +#include "uscxml/Common.h" +#include "uscxml/messages/Event.h" + +#define DOWNLOAD_IF_NECESSARY if (!_isDownloaded) { download(true); } + +#include <string> +#include <sstream> +#include <map> +#include <set> +#include <list> +#include <thread> +#include <condition_variable> +#include <curl/curl.h> +#include <uriparser/Uri.h> + +#define USCXML_URI_STRING(obj, field) std::string(obj.field.first, obj.field.afterLast - obj.field.first) + +namespace uscxml { + +class URL; + +class USCXML_API URLMonitor { +public: + virtual void downloadStarted(const URL& url) {}; + virtual void downloadCompleted(const URL& url) {}; + virtual void downloadFailed(const URL& url, int errorCode) {}; + virtual void headerChunkReceived(const URL& url, const std::string& headerChunk) {}; + virtual void contentChunkReceived(const URL& url, const std::string& contentChunk) {}; +}; + +enum URLRequestType { + POST, + GET +}; + +class USCXML_API URLImpl : public std::enable_shared_from_this<URLImpl> { +public: + URLImpl(const std::string& url); + ~URLImpl(); + + bool isAbsolute() const { + // see https://sourceforge.net/p/uriparser/bugs/3/ + return _uri.absolutePath || ((_uri.hostText.first != nullptr) && (_uri.pathHead != nullptr)); + } + + std::string scheme() const { + return USCXML_URI_STRING(_uri, scheme); + } + + std::string userInfo() const { + return USCXML_URI_STRING(_uri, userInfo); + } + + std::string host() const { + return USCXML_URI_STRING(_uri, hostText); + } + + std::string port() const { + return USCXML_URI_STRING(_uri, portText); + } + + std::string fragment() const { + return USCXML_URI_STRING(_uri, fragment); + } + + std::map<std::string, std::string> query() const; + std::string path() const; + std::list<std::string> pathComponents() const; + + void normalize(); + + static URL resolve(URLImpl* relativeURL, URLImpl* absoluteURL); + static URL resolveWithCWD(URLImpl* relativeURL); + static URL refer(URLImpl* absoluteSource, URLImpl* absoluteBase); + + void addMonitor(URLMonitor* monitor) { + _monitors.insert(monitor); + } + void removeMonitor(URLMonitor* monitor) { + _monitors.erase(monitor); + } + + // downloading / uploading + void addOutHeader(const std::string& key, const std::string& value) { + _outHeader[key] = value; + } + void setOutContent(const std::string& content) { + _outContent = content; + _requestType = URLRequestType::POST; + } + void setRequestType(URLRequestType requestType) { + _requestType = requestType; + + } + + const std::map<std::string, std::string> getInHeaderFields() { + DOWNLOAD_IF_NECESSARY + return _inHeaders; + } + + const std::string getInHeaderField(const std::string& key) { + DOWNLOAD_IF_NECESSARY + if (_inHeaders.find(key) != _inHeaders.end()) { + return _inHeaders[key]; + } + return ""; + } + + const std::string getStatusCode() const { +// DOWNLOAD_IF_NECESSARY + return _statusCode; + } + + const std::string getStatusMessage() const { +// DOWNLOAD_IF_NECESSARY + return _statusMsg; + } + + const std::string getInContent(bool forceReload = false) { + if (forceReload) + _isDownloaded = false; + DOWNLOAD_IF_NECESSARY + return _rawInContent.str(); + } + + const void download(bool blocking = false); + + operator Data() const; + operator std::string() const; + +protected: + URLImpl(); + UriUriA _uri; + std::string _orig; + + CURL* getCurlHandle(); + static size_t writeHandler(void *ptr, size_t size, size_t nmemb, void *userdata); + static size_t headerHandler(void *ptr, size_t size, size_t nmemb, void *userdata); + + void downloadStarted(); + void downloadCompleted(); + void downloadFailed(CURLcode errorCode); + + static void prepareException(ErrorEvent& exception, int errorCode, const std::string& origUri, UriParserStateA* parser); + + CURL* _handle; + std::stringstream _rawInContent; + std::stringstream _rawInHeader; + std::map<std::string, std::string> _inHeaders; + + std::string _outContent; + std::map<std::string, std::string> _outHeader; + URLRequestType _requestType; + + std::string _statusCode; + std::string _statusMsg; + bool _isDownloaded; + bool _hasFailed; + std::string _error; + + std::condition_variable_any _condVar; + std::recursive_mutex _mutex; + + std::set<URLMonitor*> _monitors; + + friend class URLFetcher; +}; + +class USCXML_API URL { +public: + PIMPL_OPERATORS(URL) + URL(const std::string url) : _impl(new URLImpl(url)) {} + + bool isAbsolute() { + return _impl->isAbsolute(); + } + + std::string scheme() { + return _impl->scheme(); + } + + std::string userInfo() { + return _impl->userInfo(); + } + + std::string host() { + return _impl->host(); + } + + std::string port() { + return _impl->port(); + } + + std::string fragment() { + return _impl->fragment(); + } + + std::map<std::string, std::string> query() { + return _impl->query(); + } + + std::string path() { + return _impl->path(); + } + + std::list<std::string> pathComponents() { + return _impl->pathComponents(); + } + + void normalize() { + return _impl->normalize(); + } + + static URL resolve(URL relativeURL, URL absoluteURL) { + return URLImpl::resolve(relativeURL._impl.get(), absoluteURL._impl.get()); + } + + static URL resolveWithCWD(URL relativeURL) { + return URLImpl::resolveWithCWD(relativeURL._impl.get()); + } + + static URL refer(URL absoluteSource, URL absoluteBase) { + return URLImpl::refer(absoluteSource._impl.get(), absoluteBase._impl.get()); + } + + void addOutHeader(const std::string& key, const std::string& value) { + return _impl->addOutHeader(key, value); + } + + void setOutContent(const std::string& content) { + return _impl->setOutContent(content); + } + void setRequestType(URLRequestType requestType) { + return _impl->setRequestType(requestType); + } + + const std::map<std::string, std::string> getInHeaderFields() { + return _impl->getInHeaderFields(); + } + + const std::string getInHeaderField(const std::string& key) { + return _impl->getInHeaderField(key); + } + + const std::string getStatusCode() const { + return _impl->getStatusCode(); + } + + const std::string getStatusMessage() const { + return _impl->getStatusMessage(); + } + + const std::string getInContent(bool forceReload = false) { + return _impl->getInContent(forceReload); + } + + const void download(bool blocking = false) const { + return _impl->download(blocking); + } + + void addMonitor(URLMonitor* monitor) { + return _impl->addMonitor(monitor); + } + void removeMonitor(URLMonitor* monitor) { + return _impl->removeMonitor(monitor); + } + + operator Data() const { + return _impl->operator Data(); + } + + operator std::string() { + return (*_impl.get()); + } + +protected: + std::shared_ptr<URLImpl> _impl; + friend class URLFetcher; +}; + +class USCXML_API URLFetcher { +public: + static void fetchURL(URL& url); + static void breakURL(URL& url); + + void start(); + void stop(); + +protected: + URLFetcher(); + ~URLFetcher(); + + static URLFetcher* _instance; + static URLFetcher* getInstance(); + + static void run(void* instance); + void perform(); + + std::thread* _thread; + std::condition_variable_any _condVar; + std::recursive_mutex _mutex; + bool _isStarted; + + std::map<CURL*, URL> _handlesToURLs; + CURLM* _multiHandle; + char* _envProxy; +}; + +} + +#endif /* end of include guard: URL_H_9DAEGSMV */ diff --git a/src/uscxml/util/UUID.cpp b/src/uscxml/util/UUID.cpp new file mode 100644 index 0000000..34e3e75 --- /dev/null +++ b/src/uscxml/util/UUID.cpp @@ -0,0 +1,70 @@ +/** + * @file + * @author 2012-2013 Stefan Radomski (stefan.radomski@cs.tu-darmstadt.de) + * @copyright Simplified BSD + * + * @cond + * This program is free software: you can redistribute it and/or modify + * it under the terms of the FreeBSD license as published by the FreeBSD + * project. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the FreeBSD license along with this + * program. If not, see <http://www.opensource.org/licenses/bsd-license>. + * @endcond + */ + +#include "UUID.h" +#include <sstream> +#include <boost/uuid/uuid_io.hpp> + +namespace uscxml { +boost::uuids::random_generator UUID::uuidGen; + +std::string UUID::getUUID() { + boost::uuids::uuid uuid = uuidGen(); + std::ostringstream os; + os << uuid; + return os.str(); +} + +bool UUID::isUUID(const std::string& uuid) { + if (uuid.size() != 36) + return false; + + if (uuid[8] != '-' || uuid[13] != '-' || uuid[18] != '-' || uuid[23] != '-') + return false; + + for (size_t i = 0; i < 36; i++) { + if (i == 8 || i == 13 || i == 18 || i ==23) + continue; + + char c = uuid[i]; + if (c == 'a' || + c == 'b' || + c == 'c' || + c == 'd' || + c == 'e' || + c == 'f' || + c == '0' || + c == '1' || + c == '2' || + c == '3' || + c == '4' || + c == '5' || + c == '6' || + c == '7' || + c == '8' || + c == '9') { + continue; + } else { + return false; + } + } + return true; +} + +}
\ No newline at end of file diff --git a/src/uscxml/util/UUID.h b/src/uscxml/util/UUID.h new file mode 100644 index 0000000..873d963 --- /dev/null +++ b/src/uscxml/util/UUID.h @@ -0,0 +1,39 @@ +/** + * @file + * @author 2012-2013 Stefan Radomski (stefan.radomski@cs.tu-darmstadt.de) + * @copyright Simplified BSD + * + * @cond + * This program is free software: you can redistribute it and/or modify + * it under the terms of the FreeBSD license as published by the FreeBSD + * project. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the FreeBSD license along with this + * program. If not, see <http://www.opensource.org/licenses/bsd-license>. + * @endcond + */ + +#ifndef UUID_H_8X65R2EI +#define UUID_H_8X65R2EI + +#include "uscxml/Common.h" +#include <boost/uuid/uuid_generators.hpp> +#include <string> + +namespace uscxml { + +class USCXML_API UUID { +public: + static std::string getUUID(); + static bool isUUID(const std::string& uuid); + static boost::uuids::random_generator uuidGen; +}; + +} + + +#endif /* end of include guard: UUID_H_8X65R2EI */ |