123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832 |
- --[[--------------------------------------------------------------------
- optlex.lua: does lexer-based optimizations
- This file is part of LuaSrcDiet.
- Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net>
- The COPYRIGHT file describes the conditions
- under which this software may be distributed.
- See the ChangeLog for more information.
- ----------------------------------------------------------------------]]
- --[[--------------------------------------------------------------------
- -- NOTES:
- -- * For more lexer-based optimization ideas, see the TODO items or
- -- look at technotes.txt.
- -- * TODO: general string delimiter conversion optimizer
- -- * TODO: (numbers) warn if overly significant digit
- ----------------------------------------------------------------------]]
- local base = _G
- local string = require "string"
- module "optlex"
- local match = string.match
- local sub = string.sub
- local find = string.find
- local rep = string.rep
- local print
- ------------------------------------------------------------------------
- -- variables and data structures
- ------------------------------------------------------------------------
- -- error function, can override by setting own function into module
- error = base.error
- warn = {} -- table for warning flags
- local stoks, sinfos, stoklns -- source lists
- local is_realtoken = { -- significant (grammar) tokens
- TK_KEYWORD = true,
- TK_NAME = true,
- TK_NUMBER = true,
- TK_STRING = true,
- TK_LSTRING = true,
- TK_OP = true,
- TK_EOS = true,
- }
- local is_faketoken = { -- whitespace (non-grammar) tokens
- TK_COMMENT = true,
- TK_LCOMMENT = true,
- TK_EOL = true,
- TK_SPACE = true,
- }
- local opt_details -- for extra information
- ------------------------------------------------------------------------
- -- true if current token is at the start of a line
- -- * skips over deleted tokens via recursion
- ------------------------------------------------------------------------
- local function atlinestart(i)
- local tok = stoks[i - 1]
- if i <= 1 or tok == "TK_EOL" then
- return true
- elseif tok == "" then
- return atlinestart(i - 1)
- end
- return false
- end
- ------------------------------------------------------------------------
- -- true if current token is at the end of a line
- -- * skips over deleted tokens via recursion
- ------------------------------------------------------------------------
- local function atlineend(i)
- local tok = stoks[i + 1]
- if i >= #stoks or tok == "TK_EOL" or tok == "TK_EOS" then
- return true
- elseif tok == "" then
- return atlineend(i + 1)
- end
- return false
- end
- ------------------------------------------------------------------------
- -- counts comment EOLs inside a long comment
- -- * in order to keep line numbering, EOLs need to be reinserted
- ------------------------------------------------------------------------
- local function commenteols(lcomment)
- local sep = #match(lcomment, "^%-%-%[=*%[")
- local z = sub(lcomment, sep + 1, -(sep - 1)) -- remove delims
- local i, c = 1, 0
- while true do
- local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
- if not p then break end -- if no matches, done
- i = p + 1
- c = c + 1
- if #s > 0 and r ~= s then -- skip CRLF or LFCR
- i = i + 1
- end
- end
- return c
- end
- ------------------------------------------------------------------------
- -- compares two tokens (i, j) and returns the whitespace required
- -- * important! see technotes.txt for more information
- -- * only two grammar/real tokens are being considered
- -- * if "", no separation is needed
- -- * if " ", then at least one whitespace (or EOL) is required
- ------------------------------------------------------------------------
- local function checkpair(i, j)
- local match = match
- local t1, t2 = stoks[i], stoks[j]
- --------------------------------------------------------------------
- if t1 == "TK_STRING" or t1 == "TK_LSTRING" or
- t2 == "TK_STRING" or t2 == "TK_LSTRING" then
- return ""
- --------------------------------------------------------------------
- elseif t1 == "TK_OP" or t2 == "TK_OP" then
- if (t1 == "TK_OP" and (t2 == "TK_KEYWORD" or t2 == "TK_NAME")) or
- (t2 == "TK_OP" and (t1 == "TK_KEYWORD" or t1 == "TK_NAME")) then
- return ""
- end
- if t1 == "TK_OP" and t2 == "TK_OP" then
- -- for TK_OP/TK_OP pairs, see notes in technotes.txt
- local op, op2 = sinfos[i], sinfos[j]
- if (match(op, "^%.%.?$") and match(op2, "^%.")) or
- (match(op, "^[~=<>]$") and op2 == "=") or
- (op == "[" and (op2 == "[" or op2 == "=")) then
- return " "
- end
- return ""
- end
- -- "TK_OP" + "TK_NUMBER" case
- local op = sinfos[i]
- if t2 == "TK_OP" then op = sinfos[j] end
- if match(op, "^%.%.?%.?$") then
- return " "
- end
- return ""
- --------------------------------------------------------------------
- else-- "TK_KEYWORD" | "TK_NAME" | "TK_NUMBER" then
- return " "
- --------------------------------------------------------------------
- end
- end
- ------------------------------------------------------------------------
- -- repack tokens, removing deletions caused by optimization process
- ------------------------------------------------------------------------
- local function repack_tokens()
- local dtoks, dinfos, dtoklns = {}, {}, {}
- local j = 1
- for i = 1, #stoks do
- local tok = stoks[i]
- if tok ~= "" then
- dtoks[j], dinfos[j], dtoklns[j] = tok, sinfos[i], stoklns[i]
- j = j + 1
- end
- end
- stoks, sinfos, stoklns = dtoks, dinfos, dtoklns
- end
- ------------------------------------------------------------------------
- -- number optimization
- -- * optimization using string formatting functions is one way of doing
- -- this, but here, we consider all cases and handle them separately
- -- (possibly an idiotic approach...)
- -- * scientific notation being generated is not in canonical form, this
- -- may or may not be a bad thing, feedback welcome
- -- * note: intermediate portions need to fit into a normal number range
- -- * optimizations can be divided based on number patterns:
- -- * hexadecimal:
- -- (1) no need to remove leading zeros, just skip to (2)
- -- (2) convert to integer if size equal or smaller
- -- * change if equal size -> lose the 'x' to reduce entropy
- -- (3) number is then processed as an integer
- -- (4) note: does not make 0[xX] consistent
- -- * integer:
- -- (1) note: includes anything with trailing ".", ".0", ...
- -- (2) remove useless fractional part, if present, e.g. 123.000
- -- (3) remove leading zeros, e.g. 000123
- -- (4) switch to scientific if shorter, e.g. 123000 -> 123e3
- -- * with fraction:
- -- (1) split into digits dot digits
- -- (2) if no integer portion, take as zero (can omit later)
- -- (3) handle degenerate .000 case, after which the fractional part
- -- must be non-zero (if zero, it's matched as an integer)
- -- (4) remove trailing zeros for fractional portion
- -- (5) p.q where p > 0 and q > 0 cannot be shortened any more
- -- (6) otherwise p == 0 and the form is .q, e.g. .000123
- -- (7) if scientific shorter, convert, e.g. .000123 -> 123e-6
- -- * scientific:
- -- (1) split into (digits dot digits) [eE] ([+-] digits)
- -- (2) if significand has ".", shift it out so it becomes an integer
- -- (3) if significand is zero, just use zero
- -- (4) remove leading zeros for significand
- -- (5) shift out trailing zeros for significand
- -- (6) examine exponent and determine which format is best:
- -- integer, with fraction, scientific
- ------------------------------------------------------------------------
- local function do_number(i)
- local before = sinfos[i] -- 'before'
- local z = before -- working representation
- local y -- 'after', if better
- --------------------------------------------------------------------
- if match(z, "^0[xX]") then -- hexadecimal number
- local v = base.tostring(base.tonumber(z))
- if #v <= #z then
- z = v -- change to integer, AND continue
- else
- return -- no change; stick to hex
- end
- end
- --------------------------------------------------------------------
- if match(z, "^%d+%.?0*$") then -- integer or has useless frac
- z = match(z, "^(%d+)%.?0*$") -- int portion only
- if z + 0 > 0 then
- z = match(z, "^0*([1-9]%d*)$") -- remove leading zeros
- local v = #match(z, "0*$")
- local nv = base.tostring(v)
- if v > #nv + 1 then -- scientific is shorter
- z = sub(z, 1, #z - v).."e"..nv
- end
- y = z
- else
- y = "0" -- basic zero
- end
- --------------------------------------------------------------------
- elseif not match(z, "[eE]") then -- number with fraction part
- local p, q = match(z, "^(%d*)%.(%d+)$") -- split
- if p == "" then p = 0 end -- int part zero
- if q + 0 == 0 and p == 0 then
- y = "0" -- degenerate .000 case
- else
- -- now, q > 0 holds and p is a number
- local v = #match(q, "0*$") -- remove trailing zeros
- if v > 0 then
- q = sub(q, 1, #q - v)
- end
- -- if p > 0, nothing else we can do to simplify p.q case
- if p + 0 > 0 then
- y = p.."."..q
- else
- y = "."..q -- tentative, e.g. .000123
- local v = #match(q, "^0*") -- # leading spaces
- local w = #q - v -- # significant digits
- local nv = base.tostring(#q)
- -- e.g. compare 123e-6 versus .000123
- if w + 2 + #nv < 1 + #q then
- y = sub(q, -w).."e-"..nv
- end
- end
- end
- --------------------------------------------------------------------
- else -- scientific number
- local sig, ex = match(z, "^([^eE]+)[eE]([%+%-]?%d+)$")
- ex = base.tonumber(ex)
- -- if got ".", shift out fractional portion of significand
- local p, q = match(sig, "^(%d*)%.(%d*)$")
- if p then
- ex = ex - #q
- sig = p..q
- end
- if sig + 0 == 0 then
- y = "0" -- basic zero
- else
- local v = #match(sig, "^0*") -- remove leading zeros
- sig = sub(sig, v + 1)
- v = #match(sig, "0*$") -- shift out trailing zeros
- if v > 0 then
- sig = sub(sig, 1, #sig - v)
- ex = ex + v
- end
- -- examine exponent and determine which format is best
- local nex = base.tostring(ex)
- if ex == 0 then -- it's just an integer
- y = sig
- elseif ex > 0 and (ex <= 1 + #nex) then -- a number
- y = sig..rep("0", ex)
- elseif ex < 0 and (ex >= -#sig) then -- fraction, e.g. .123
- v = #sig + ex
- y = sub(sig, 1, v).."."..sub(sig, v + 1)
- elseif ex < 0 and (#nex >= -ex - #sig) then
- -- e.g. compare 1234e-5 versus .01234
- -- gives: #sig + 1 + #nex >= 1 + (-ex - #sig) + #sig
- -- -> #nex >= -ex - #sig
- v = -ex - #sig
- y = "."..rep("0", v)..sig
- else -- non-canonical scientific representation
- y = sig.."e"..ex
- end
- end--if sig
- end
- --------------------------------------------------------------------
- if y and y ~= sinfos[i] then
- if opt_details then
- print("<number> (line "..stoklns[i]..") "..sinfos[i].." -> "..y)
- opt_details = opt_details + 1
- end
- sinfos[i] = y
- end
- end
- ------------------------------------------------------------------------
- -- string optimization
- -- * note: works on well-formed strings only!
- -- * optimizations on characters can be summarized as follows:
- -- \a\b\f\n\r\t\v -- no change
- -- \\ -- no change
- -- \"\' -- depends on delim, other can remove \
- -- \[\] -- remove \
- -- \<char> -- general escape, remove \
- -- \<eol> -- normalize the EOL only
- -- \ddd -- if \a\b\f\n\r\t\v, change to latter
- -- if other < ascii 32, keep ddd but zap leading zeros
- -- if >= ascii 32, translate it into the literal, then also
- -- do escapes for \\,\",\' cases
- -- <other> -- no change
- -- * switch delimiters if string becomes shorter
- ------------------------------------------------------------------------
- local function do_string(I)
- local info = sinfos[I]
- local delim = sub(info, 1, 1) -- delimiter used
- local ndelim = (delim == "'") and '"' or "'" -- opposite " <-> '
- local z = sub(info, 2, -2) -- actual string
- local i = 1
- local c_delim, c_ndelim = 0, 0 -- "/' counts
- --------------------------------------------------------------------
- while i <= #z do
- local c = sub(z, i, i)
- ----------------------------------------------------------------
- if c == "\\" then -- escaped stuff
- local j = i + 1
- local d = sub(z, j, j)
- local p = find("abfnrtv\\\n\r\"\'0123456789", d, 1, true)
- ------------------------------------------------------------
- if not p then -- \<char> -- remove \
- z = sub(z, 1, i - 1)..sub(z, j)
- i = i + 1
- ------------------------------------------------------------
- elseif p <= 8 then -- \a\b\f\n\r\t\v\\
- i = i + 2 -- no change
- ------------------------------------------------------------
- elseif p <= 10 then -- \<eol> -- normalize EOL
- local eol = sub(z, j, j + 1)
- if eol == "\r\n" or eol == "\n\r" then
- z = sub(z, 1, i).."\n"..sub(z, j + 2)
- elseif p == 10 then -- \r case
- z = sub(z, 1, i).."\n"..sub(z, j + 1)
- end
- i = i + 2
- ------------------------------------------------------------
- elseif p <= 12 then -- \"\' -- remove \ for ndelim
- if d == delim then
- c_delim = c_delim + 1
- i = i + 2
- else
- c_ndelim = c_ndelim + 1
- z = sub(z, 1, i - 1)..sub(z, j)
- i = i + 1
- end
- ------------------------------------------------------------
- else -- \ddd -- various steps
- local s = match(z, "^(%d%d?%d?)", j)
- j = i + 1 + #s -- skip to location
- local cv = s + 0
- local cc = string.char(cv)
- local p = find("\a\b\f\n\r\t\v", cc, 1, true)
- if p then -- special escapes
- s = "\\"..sub("abfnrtv", p, p)
- elseif cv < 32 then -- normalized \ddd
- s = "\\"..cv
- elseif cc == delim then -- \<delim>
- s = "\\"..cc
- c_delim = c_delim + 1
- elseif cc == "\\" then -- \\
- s = "\\\\"
- else -- literal character
- s = cc
- if cc == ndelim then
- c_ndelim = c_ndelim + 1
- end
- end
- z = sub(z, 1, i - 1)..s..sub(z, j)
- i = i + #s
- ------------------------------------------------------------
- end--if p
- ----------------------------------------------------------------
- else-- c ~= "\\" -- <other> -- no change
- i = i + 1
- if c == ndelim then -- count ndelim, for switching delimiters
- c_ndelim = c_ndelim + 1
- end
- ----------------------------------------------------------------
- end--if c
- end--while
- --------------------------------------------------------------------
- -- switching delimiters, a long-winded derivation:
- -- (1) delim takes 2+2*c_delim bytes, ndelim takes c_ndelim bytes
- -- (2) delim becomes c_delim bytes, ndelim becomes 2+2*c_ndelim bytes
- -- simplifying the condition (1)>(2) --> c_delim > c_ndelim
- if c_delim > c_ndelim then
- i = 1
- while i <= #z do
- local p, q, r = find(z, "([\'\"])", i)
- if not p then break end
- if r == delim then -- \<delim> -> <delim>
- z = sub(z, 1, p - 2)..sub(z, p)
- i = p
- else-- r == ndelim -- <ndelim> -> \<ndelim>
- z = sub(z, 1, p - 1).."\\"..sub(z, p)
- i = p + 2
- end
- end--while
- delim = ndelim -- actually change delimiters
- end
- --------------------------------------------------------------------
- z = delim..z..delim
- if z ~= sinfos[I] then
- if opt_details then
- print("<string> (line "..stoklns[I]..") "..sinfos[I].." -> "..z)
- opt_details = opt_details + 1
- end
- sinfos[I] = z
- end
- end
- ------------------------------------------------------------------------
- -- long string optimization
- -- * note: warning flagged if trailing whitespace found, not trimmed
- -- * remove first optional newline
- -- * normalize embedded newlines
- -- * reduce '=' separators in delimiters if possible
- ------------------------------------------------------------------------
- local function do_lstring(I)
- local info = sinfos[I]
- local delim1 = match(info, "^%[=*%[") -- cut out delimiters
- local sep = #delim1
- local delim2 = sub(info, -sep, -1)
- local z = sub(info, sep + 1, -(sep + 1)) -- lstring without delims
- local y = ""
- local i = 1
- --------------------------------------------------------------------
- while true do
- local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
- -- deal with a single line
- local ln
- if not p then
- ln = sub(z, i)
- elseif p >= i then
- ln = sub(z, i, p - 1)
- end
- if ln ~= "" then
- -- flag a warning if there are trailing spaces, won't optimize!
- if match(ln, "%s+$") then
- warn.lstring = "trailing whitespace in long string near line "..stoklns[I]
- end
- y = y..ln
- end
- if not p then -- done if no more EOLs
- break
- end
- -- deal with line endings, normalize them
- i = p + 1
- if p then
- if #s > 0 and r ~= s then -- skip CRLF or LFCR
- i = i + 1
- end
- -- skip first newline, which can be safely deleted
- if not(i == 1 and i == p) then
- y = y.."\n"
- end
- end
- end--while
- --------------------------------------------------------------------
- -- handle possible deletion of one or more '=' separators
- if sep >= 3 then
- local chk, okay = sep - 1
- -- loop to test ending delimiter with less of '=' down to zero
- while chk >= 2 do
- local delim = "%]"..rep("=", chk - 2).."%]"
- if not match(y, delim) then okay = chk end
- chk = chk - 1
- end
- if okay then -- change delimiters
- sep = rep("=", okay - 2)
- delim1, delim2 = "["..sep.."[", "]"..sep.."]"
- end
- end
- --------------------------------------------------------------------
- sinfos[I] = delim1..y..delim2
- end
- ------------------------------------------------------------------------
- -- long comment optimization
- -- * note: does not remove first optional newline
- -- * trim trailing whitespace
- -- * normalize embedded newlines
- -- * reduce '=' separators in delimiters if possible
- ------------------------------------------------------------------------
- local function do_lcomment(I)
- local info = sinfos[I]
- local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters
- local sep = #delim1
- local delim2 = sub(info, -sep, -1)
- local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims
- local y = ""
- local i = 1
- --------------------------------------------------------------------
- while true do
- local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
- -- deal with a single line, extract and check trailing whitespace
- local ln
- if not p then
- ln = sub(z, i)
- elseif p >= i then
- ln = sub(z, i, p - 1)
- end
- if ln ~= "" then
- -- trim trailing whitespace if non-empty line
- local ws = match(ln, "%s*$")
- if #ws > 0 then ln = sub(ln, 1, -(ws + 1)) end
- y = y..ln
- end
- if not p then -- done if no more EOLs
- break
- end
- -- deal with line endings, normalize them
- i = p + 1
- if p then
- if #s > 0 and r ~= s then -- skip CRLF or LFCR
- i = i + 1
- end
- y = y.."\n"
- end
- end--while
- --------------------------------------------------------------------
- -- handle possible deletion of one or more '=' separators
- sep = sep - 2
- if sep >= 3 then
- local chk, okay = sep - 1
- -- loop to test ending delimiter with less of '=' down to zero
- while chk >= 2 do
- local delim = "%]"..rep("=", chk - 2).."%]"
- if not match(y, delim) then okay = chk end
- chk = chk - 1
- end
- if okay then -- change delimiters
- sep = rep("=", okay - 2)
- delim1, delim2 = "--["..sep.."[", "]"..sep.."]"
- end
- end
- --------------------------------------------------------------------
- sinfos[I] = delim1..y..delim2
- end
- ------------------------------------------------------------------------
- -- short comment optimization
- -- * trim trailing whitespace
- ------------------------------------------------------------------------
- local function do_comment(i)
- local info = sinfos[i]
- local ws = match(info, "%s*$") -- just look from end of string
- if #ws > 0 then
- info = sub(info, 1, -(ws + 1)) -- trim trailing whitespace
- end
- sinfos[i] = info
- end
- ------------------------------------------------------------------------
- -- returns true if string found in long comment
- -- * this is a feature to keep copyright or license texts
- ------------------------------------------------------------------------
- local function keep_lcomment(opt_keep, info)
- if not opt_keep then return false end -- option not set
- local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters
- local sep = #delim1
- local delim2 = sub(info, -sep, -1)
- local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims
- if find(z, opt_keep, 1, true) then -- try to match
- return true
- end
- end
- ------------------------------------------------------------------------
- -- main entry point
- -- * currently, lexer processing has 2 passes
- -- * processing is done on a line-oriented basis, which is easier to
- -- grok due to the next point...
- -- * since there are various options that can be enabled or disabled,
- -- processing is a little messy or convoluted
- ------------------------------------------------------------------------
- function optimize(option, toklist, semlist, toklnlist)
- --------------------------------------------------------------------
- -- set option flags
- --------------------------------------------------------------------
- local opt_comments = option["opt-comments"]
- local opt_whitespace = option["opt-whitespace"]
- local opt_emptylines = option["opt-emptylines"]
- local opt_eols = option["opt-eols"]
- local opt_strings = option["opt-strings"]
- local opt_numbers = option["opt-numbers"]
- local opt_keep = option.KEEP
- opt_details = option.DETAILS and 0 -- upvalues for details display
- print = print or base.print
- if opt_eols then -- forced settings, otherwise won't work properly
- opt_comments = true
- opt_whitespace = true
- opt_emptylines = true
- end
- --------------------------------------------------------------------
- -- variable initialization
- --------------------------------------------------------------------
- stoks, sinfos, stoklns -- set source lists
- = toklist, semlist, toklnlist
- local i = 1 -- token position
- local tok, info -- current token
- local prev -- position of last grammar token
- -- on same line (for TK_SPACE stuff)
- --------------------------------------------------------------------
- -- changes a token, info pair
- --------------------------------------------------------------------
- local function settoken(tok, info, I)
- I = I or i
- stoks[I] = tok or ""
- sinfos[I] = info or ""
- end
- --------------------------------------------------------------------
- -- processing loop (PASS 1)
- --------------------------------------------------------------------
- while true do
- tok, info = stoks[i], sinfos[i]
- ----------------------------------------------------------------
- local atstart = atlinestart(i) -- set line begin flag
- if atstart then prev = nil end
- ----------------------------------------------------------------
- if tok == "TK_EOS" then -- end of stream/pass
- break
- ----------------------------------------------------------------
- elseif tok == "TK_KEYWORD" or -- keywords, identifiers,
- tok == "TK_NAME" or -- operators
- tok == "TK_OP" then
- -- TK_KEYWORD and TK_OP can't be optimized without a big
- -- optimization framework; it would be more of an optimizing
- -- compiler, not a source code compressor
- -- TK_NAME that are locals needs parser to analyze/optimize
- prev = i
- ----------------------------------------------------------------
- elseif tok == "TK_NUMBER" then -- numbers
- if opt_numbers then
- do_number(i) -- optimize
- end
- prev = i
- ----------------------------------------------------------------
- elseif tok == "TK_STRING" or -- strings, long strings
- tok == "TK_LSTRING" then
- if opt_strings then
- if tok == "TK_STRING" then
- do_string(i) -- optimize
- else
- do_lstring(i) -- optimize
- end
- end
- prev = i
- ----------------------------------------------------------------
- elseif tok == "TK_COMMENT" then -- short comments
- if opt_comments then
- if i == 1 and sub(info, 1, 1) == "#" then
- -- keep shbang comment, trim whitespace
- do_comment(i)
- else
- -- safe to delete, as a TK_EOL (or TK_EOS) always follows
- settoken() -- remove entirely
- end
- elseif opt_whitespace then -- trim whitespace only
- do_comment(i)
- end
- ----------------------------------------------------------------
- elseif tok == "TK_LCOMMENT" then -- long comments
- if keep_lcomment(opt_keep, info) then
- ------------------------------------------------------------
- -- if --keep, we keep a long comment if <msg> is found;
- -- this is a feature to keep copyright or license texts
- if opt_whitespace then -- trim whitespace only
- do_lcomment(i)
- end
- prev = i
- elseif opt_comments then
- local eols = commenteols(info)
- ------------------------------------------------------------
- -- prepare opt_emptylines case first, if a disposable token
- -- follows, current one is safe to dump, else keep a space;
- -- it is implied that the operation is safe for '-', because
- -- current is a TK_LCOMMENT, and must be separate from a '-'
- if is_faketoken[stoks[i + 1]] then
- settoken() -- remove entirely
- tok = ""
- else
- settoken("TK_SPACE", " ")
- end
- ------------------------------------------------------------
- -- if there are embedded EOLs to keep and opt_emptylines is
- -- disabled, then switch the token into one or more EOLs
- if not opt_emptylines and eols > 0 then
- settoken("TK_EOL", rep("\n", eols))
- end
- ------------------------------------------------------------
- -- if optimizing whitespaces, force reinterpretation of the
- -- token to give a chance for the space to be optimized away
- if opt_whitespace and tok ~= "" then
- i = i - 1 -- to reinterpret
- end
- ------------------------------------------------------------
- else -- disabled case
- if opt_whitespace then -- trim whitespace only
- do_lcomment(i)
- end
- prev = i
- end
- ----------------------------------------------------------------
- elseif tok == "TK_EOL" then -- line endings
- if atstart and opt_emptylines then
- settoken() -- remove entirely
- elseif info == "\r\n" or info == "\n\r" then
- -- normalize the rest of the EOLs for CRLF/LFCR only
- -- (note that TK_LCOMMENT can change into several EOLs)
- settoken("TK_EOL", "\n")
- end
- ----------------------------------------------------------------
- elseif tok == "TK_SPACE" then -- whitespace
- if opt_whitespace then
- if atstart or atlineend(i) then
- -- delete leading and trailing whitespace
- settoken() -- remove entirely
- else
- ------------------------------------------------------------
- -- at this point, since leading whitespace have been removed,
- -- there should be a either a real token or a TK_LCOMMENT
- -- prior to hitting this whitespace; the TK_LCOMMENT case
- -- only happens if opt_comments is disabled; so prev ~= nil
- local ptok = stoks[prev]
- if ptok == "TK_LCOMMENT" then
- -- previous TK_LCOMMENT can abut with anything
- settoken() -- remove entirely
- else
- -- prev must be a grammar token; consecutive TK_SPACE
- -- tokens is impossible when optimizing whitespace
- local ntok = stoks[i + 1]
- if is_faketoken[ntok] then
- -- handle special case where a '-' cannot abut with
- -- either a short comment or a long comment
- if (ntok == "TK_COMMENT" or ntok == "TK_LCOMMENT") and
- ptok == "TK_OP" and sinfos[prev] == "-" then
- -- keep token
- else
- settoken() -- remove entirely
- end
- else--is_realtoken
- -- check a pair of grammar tokens, if can abut, then
- -- delete space token entirely, otherwise keep one space
- local s = checkpair(prev, i + 1)
- if s == "" then
- settoken() -- remove entirely
- else
- settoken("TK_SPACE", " ")
- end
- end
- end
- ------------------------------------------------------------
- end
- end
- ----------------------------------------------------------------
- else
- error("unidentified token encountered")
- end
- ----------------------------------------------------------------
- i = i + 1
- end--while
- repack_tokens()
- --------------------------------------------------------------------
- -- processing loop (PASS 2)
- --------------------------------------------------------------------
- if opt_eols then
- i = 1
- -- aggressive EOL removal only works with most non-grammar tokens
- -- optimized away because it is a rather simple scheme -- basically
- -- it just checks 'real' token pairs around EOLs
- if stoks[1] == "TK_COMMENT" then
- -- first comment still existing must be shbang, skip whole line
- i = 3
- end
- while true do
- tok, info = stoks[i], sinfos[i]
- --------------------------------------------------------------
- if tok == "TK_EOS" then -- end of stream/pass
- break
- --------------------------------------------------------------
- elseif tok == "TK_EOL" then -- consider each TK_EOL
- local t1, t2 = stoks[i - 1], stoks[i + 1]
- if is_realtoken[t1] and is_realtoken[t2] then -- sanity check
- local s = checkpair(i - 1, i + 1)
- if s == "" then
- settoken() -- remove entirely
- end
- end
- end--if tok
- --------------------------------------------------------------
- i = i + 1
- end--while
- repack_tokens()
- end
- --------------------------------------------------------------------
- if opt_details and opt_details > 0 then print() end -- spacing
- return stoks, sinfos, stoklns
- end
|