From e3c62dc1def9270761ca63c73ae76fdca9d61582 Mon Sep 17 00:00:00 2001 From: Ariya Hidayat Date: Wed, 2 Sep 2009 13:51:47 +0200 Subject: CSS parser speed-up: skip the need to call toLower(). Instead of taking the lowercase of each tokenized character, it is better to encode the logic in the automaton itself. During the parsing of tiger.svg (tests/benchmarks/qsvgrenderer), the time spent inside QCssScanner_Generated::lex() goes down from 0.53 millions instructions to just 0.23 millions (2x faster). Autotests for qcssparser, qtextdocumentfragment, qstylesheet still pass. Reviewed-by: Simon Hausmann --- src/gui/text/qcssscanner.cpp | 84 +++++++++++++++++++------------------- util/lexgen/css2-simplified.lexgen | 3 +- 2 files changed, 43 insertions(+), 44 deletions(-) diff --git a/src/gui/text/qcssscanner.cpp b/src/gui/text/qcssscanner.cpp index 74ab7d9..06a13de 100644 --- a/src/gui/text/qcssscanner.cpp +++ b/src/gui/text/qcssscanner.cpp @@ -73,7 +73,7 @@ int QCssScanner_Generated::lex() int lastAcceptingPos = -1; int token = -1; QChar ch; - + // initial state ch = next(); if (ch.unicode() >= 9 && ch.unicode() <= 10) @@ -146,7 +146,7 @@ int QCssScanner_Generated::lex() } if (ch.unicode() == 95) goto state_24; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_24; if (ch.unicode() == 123) goto state_25; @@ -196,7 +196,7 @@ int QCssScanner_Generated::lex() goto state_32; if (ch.unicode() >= 93 && ch.unicode() <= 96) goto state_30; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_30; if (ch.unicode() >= 123) goto state_30; @@ -211,7 +211,7 @@ int QCssScanner_Generated::lex() goto state_34; if (ch.unicode() == 95) goto state_33; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_33; goto out; state_5: @@ -232,7 +232,7 @@ int QCssScanner_Generated::lex() goto state_37; if (ch.unicode() >= 93 && ch.unicode() <= 96) goto state_35; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_35; if (ch.unicode() >= 123) goto state_35; @@ -255,7 +255,7 @@ int QCssScanner_Generated::lex() goto state_22; if (ch.unicode() == 95) goto state_24; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_24; goto out; state_12: @@ -290,7 +290,7 @@ int QCssScanner_Generated::lex() goto state_45; if (ch.unicode() == 95) goto state_46; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_46; goto out; state_17: @@ -310,7 +310,7 @@ int QCssScanner_Generated::lex() goto state_49; if (ch.unicode() == 95) goto state_50; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_50; goto out; state_22: @@ -340,7 +340,7 @@ int QCssScanner_Generated::lex() goto state_54; if (ch.unicode() == 95) goto state_53; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_53; goto out; state_25: @@ -400,7 +400,7 @@ int QCssScanner_Generated::lex() goto state_32; if (ch.unicode() >= 93 && ch.unicode() <= 96) goto state_30; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_30; if (ch.unicode() >= 123) goto state_30; @@ -440,7 +440,7 @@ int QCssScanner_Generated::lex() goto state_62; if (ch.unicode() == 95) goto state_61; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_61; goto out; state_34: @@ -474,7 +474,7 @@ int QCssScanner_Generated::lex() goto state_37; if (ch.unicode() >= 93 && ch.unicode() <= 96) goto state_35; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_35; if (ch.unicode() >= 123) goto state_35; @@ -523,7 +523,7 @@ int QCssScanner_Generated::lex() goto state_45; if (ch.unicode() == 95) goto state_46; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_46; goto out; state_41: @@ -536,7 +536,7 @@ int QCssScanner_Generated::lex() goto state_45; if (ch.unicode() == 95) goto state_46; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_46; goto out; state_43: @@ -560,7 +560,7 @@ int QCssScanner_Generated::lex() goto state_45; if (ch.unicode() == 95) goto state_46; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_46; goto out; state_45: @@ -588,7 +588,7 @@ int QCssScanner_Generated::lex() goto state_72; if (ch.unicode() == 95) goto state_71; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_71; goto out; state_47: @@ -602,7 +602,7 @@ int QCssScanner_Generated::lex() goto state_49; if (ch.unicode() == 95) goto state_50; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_50; goto out; state_49: @@ -630,7 +630,7 @@ int QCssScanner_Generated::lex() goto state_76; if (ch.unicode() == 95) goto state_75; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_75; goto out; state_51: @@ -647,7 +647,7 @@ int QCssScanner_Generated::lex() goto state_54; if (ch.unicode() == 95) goto state_53; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_53; goto out; state_52: @@ -668,7 +668,7 @@ int QCssScanner_Generated::lex() goto state_54; if (ch.unicode() == 95) goto state_53; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_53; goto out; state_54: @@ -702,7 +702,7 @@ int QCssScanner_Generated::lex() goto state_32; if (ch.unicode() >= 93 && ch.unicode() <= 96) goto state_30; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_30; if (ch.unicode() >= 123) goto state_30; @@ -725,7 +725,7 @@ int QCssScanner_Generated::lex() goto state_32; if (ch.unicode() >= 93 && ch.unicode() <= 96) goto state_30; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_30; if (ch.unicode() >= 123) goto state_30; @@ -748,7 +748,7 @@ int QCssScanner_Generated::lex() goto state_32; if (ch.unicode() >= 93 && ch.unicode() <= 96) goto state_30; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_30; if (ch.unicode() >= 123) goto state_30; @@ -773,7 +773,7 @@ int QCssScanner_Generated::lex() goto state_32; if (ch.unicode() >= 93 && ch.unicode() <= 96) goto state_30; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_30; if (ch.unicode() >= 123) goto state_30; @@ -790,7 +790,7 @@ int QCssScanner_Generated::lex() goto state_62; if (ch.unicode() == 95) goto state_61; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_61; goto out; state_62: @@ -818,7 +818,7 @@ int QCssScanner_Generated::lex() goto state_62; if (ch.unicode() == 95) goto state_61; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_61; goto out; state_64: @@ -839,7 +839,7 @@ int QCssScanner_Generated::lex() goto state_37; if (ch.unicode() >= 93 && ch.unicode() <= 96) goto state_35; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_35; if (ch.unicode() >= 123) goto state_35; @@ -862,7 +862,7 @@ int QCssScanner_Generated::lex() goto state_37; if (ch.unicode() >= 93 && ch.unicode() <= 96) goto state_35; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_35; if (ch.unicode() >= 123) goto state_35; @@ -885,7 +885,7 @@ int QCssScanner_Generated::lex() goto state_37; if (ch.unicode() >= 93 && ch.unicode() <= 96) goto state_35; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_35; if (ch.unicode() >= 123) goto state_35; @@ -910,7 +910,7 @@ int QCssScanner_Generated::lex() goto state_37; if (ch.unicode() >= 93 && ch.unicode() <= 96) goto state_35; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_35; if (ch.unicode() >= 123) goto state_35; @@ -929,7 +929,7 @@ int QCssScanner_Generated::lex() goto state_45; if (ch.unicode() == 95) goto state_46; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_46; goto out; state_70: @@ -944,7 +944,7 @@ int QCssScanner_Generated::lex() goto state_72; if (ch.unicode() == 95) goto state_71; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_71; goto out; state_71: @@ -959,7 +959,7 @@ int QCssScanner_Generated::lex() goto state_72; if (ch.unicode() == 95) goto state_71; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_71; goto out; state_72: @@ -994,7 +994,7 @@ int QCssScanner_Generated::lex() goto state_76; if (ch.unicode() == 95) goto state_75; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_75; goto out; state_75: @@ -1009,7 +1009,7 @@ int QCssScanner_Generated::lex() goto state_76; if (ch.unicode() == 95) goto state_75; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_75; goto out; state_76: @@ -1039,7 +1039,7 @@ int QCssScanner_Generated::lex() goto state_54; if (ch.unicode() == 95) goto state_53; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_53; goto out; state_78: @@ -1060,7 +1060,7 @@ int QCssScanner_Generated::lex() goto state_32; if (ch.unicode() >= 93 && ch.unicode() <= 96) goto state_30; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_30; if (ch.unicode() >= 123) goto state_30; @@ -1077,7 +1077,7 @@ int QCssScanner_Generated::lex() goto state_62; if (ch.unicode() == 95) goto state_61; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_61; goto out; state_80: @@ -1098,7 +1098,7 @@ int QCssScanner_Generated::lex() goto state_37; if (ch.unicode() >= 93 && ch.unicode() <= 96) goto state_35; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_35; if (ch.unicode() >= 123) goto state_35; @@ -1115,7 +1115,7 @@ int QCssScanner_Generated::lex() goto state_72; if (ch.unicode() == 95) goto state_71; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_71; goto out; state_83: @@ -1130,12 +1130,12 @@ int QCssScanner_Generated::lex() goto state_76; if (ch.unicode() == 95) goto state_75; - if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256) + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256) goto state_75; goto out; found: lastAcceptingPos = pos; - + out: if (lastAcceptingPos != -1) { lexemLength = lastAcceptingPos - lexemStart; diff --git a/util/lexgen/css2-simplified.lexgen b/util/lexgen/css2-simplified.lexgen index 3976632..299ff5e 100644 --- a/util/lexgen/css2-simplified.lexgen +++ b/util/lexgen/css2-simplified.lexgen @@ -1,9 +1,8 @@ [Options] -case-insensitive classname = QCssScanner_Generated [Code Generator Options] -MapToCode[a-z] = (ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256 +MapToCode[a-z] = (ch.unicode() >= 'a' && ch.unicode() <= 'z') || (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || ch.unicode() >= 256 TokenPrefix = QCss:: FileHeader = ../moc/licenseheader.txt -- cgit v0.12