shellcheck/ShellCheck/Parser.hs

{-
    Copyright 2012-2015 Vidar Holen

    This file is part of ShellCheck.
    http://www.vidarholen.net/contents/shellcheck

    ShellCheck is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    ShellCheck is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    yOU should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-}
{-# LANGUAGE NoMonomorphismRestriction, TemplateHaskell, FlexibleContexts #-}
module ShellCheck.Parser (parseScript, runTests) where

import ShellCheck.AST
import ShellCheck.ASTLib
import ShellCheck.Data
import ShellCheck.Interface

import Control.Applicative ((<*))
import Control.Monad
import Control.Monad.Identity
import Control.Monad.Trans
import Data.Char
import Data.Functor
import Data.List (isPrefixOf, isInfixOf, isSuffixOf, partition, sortBy, intercalate, nub)
import Data.Maybe
import Data.Monoid
import Debug.Trace
import GHC.Exts (sortWith)
import Prelude hiding (readList)
import System.IO
import Text.Parsec hiding (runParser, (<?>))
import Text.Parsec.Error
import Text.Parsec.Pos
import qualified Control.Monad.Reader as Mr
import qualified Control.Monad.State as Ms
import qualified Data.Map as Map

import Test.QuickCheck.All (quickCheckAll)

type SCBase m = Mr.ReaderT (SystemInterface m) (Ms.StateT SystemState m)
type SCParser m v = ParsecT String UserState (SCBase m) v

backslash :: Monad m => SCParser m Char
backslash = char '\\'
linefeed :: Monad m => SCParser m Char
linefeed = do
    optional carriageReturn
    c <- char '\n'
    readPendingHereDocs
    return c
singleQuote = char '\'' <|> unicodeSingleQuote
doubleQuote = char '"' <|> unicodeDoubleQuote
variableStart = upper <|> lower <|> oneOf "_"
variableChars = upper <|> lower <|> digit <|> oneOf "_"
functionChars = variableChars <|> oneOf ":+-.?"
specialVariable = oneOf "@*#?-$!"
quotableChars = "|&;<>()\\ '\t\n\r\xA0" ++ doubleQuotableChars
quotable = almostSpace <|> unicodeDoubleQuote <|> oneOf quotableChars
bracedQuotable = oneOf "}\"$`'"
doubleQuotableChars = "\"$`" ++ unicodeDoubleQuoteChars
doubleQuotable = unicodeDoubleQuote <|> oneOf doubleQuotableChars
whitespace = oneOf " \t" <|> carriageReturn <|> almostSpace <|> linefeed
linewhitespace = oneOf " \t" <|> almostSpace

suspectCharAfterQuotes = variableChars <|> char '%'

extglobStartChars = "?*@!+"
extglobStart = oneOf extglobStartChars

unicodeDoubleQuoteChars = "\x201C\x201D\x2033\x2036"

prop_spacing = isOk spacing "  \\\n # Comment"
spacing = do
    x <- many (many1 linewhitespace <|> try (string "\\\n" >> return ""))
    optional readComment
    return $ concat x

spacing1 = do
    spacing <- spacing
    when (null spacing) $ fail "Expected whitespace"
    return spacing

prop_allspacing = isOk allspacing "#foo"
prop_allspacing2 = isOk allspacing " #foo\n # bar\n#baz\n"
prop_allspacing3 = isOk allspacing "#foo\n#bar\n#baz\n"
allspacing = do
    s <- spacing
    more <- option False (linefeed >> return True)
    if more then do
        rest <- allspacing
        return $ s ++ "\n" ++ rest
      else
        return s

allspacingOrFail = do
    s <- allspacing
    when (null s) $ fail "Expected whitespace"

unicodeDoubleQuote = do
    pos <- getPosition
    oneOf unicodeDoubleQuoteChars
    parseProblemAt pos WarningC 1015 "This is a unicode double quote. Delete and retype it."
    return '"'

unicodeSingleQuote = do
    pos <- getPosition
    char '\x2018' <|> char '\x2019'
    parseProblemAt pos WarningC 1016 "This is a unicode single quote. Delete and retype it."
    return '"'

carriageReturn = do
    parseNote ErrorC 1017 "Literal carriage return. Run script through tr -d '\\r' ."
    char '\r'

almostSpace =
    choice [
        check '\xA0' "unicode non-breaking space",
        check '\x200B' "unicode zerowidth space"
    ]
  where
    check c name = do
        parseNote ErrorC 1018 $ "This is a " ++ name ++ ". Delete and retype it."
        char c
        return ' '

--------- Message/position annotation on top of user state
data Note = Note Id Severity Code String deriving (Show, Eq)
data ParseNote = ParseNote SourcePos SourcePos Severity Code String deriving (Show, Eq)
data Context =
        ContextName SourcePos String
        | ContextAnnotation [Annotation]
        | ContextSource String
    deriving (Show)

data HereDocContext =
        HereDocPending Token -- on linefeed, read this T_HereDoc
        | HereDocBoundary -- but don't consider heredocs before this
    deriving (Show)

data UserState = UserState {
    lastId :: Id,
    positionMap :: Map.Map Id SourcePos,
    parseNotes :: [ParseNote],
    hereDocMap :: Map.Map Id [Token],
    pendingHereDocs :: [HereDocContext]
}
initialUserState = UserState {
    lastId = Id $ -1,
    positionMap = Map.empty,
    parseNotes = [],
    hereDocMap = Map.empty,
    pendingHereDocs = []
}

codeForParseNote (ParseNote _ _ _ code _) = code
noteToParseNote map (Note id severity code message) =
        ParseNote pos pos severity code message
    where
        pos = fromJust $ Map.lookup id map

getLastId = lastId <$> getState

getNextIdAt sourcepos = do
    state <- getState
    let newId = incId (lastId state)
    let newMap = Map.insert newId sourcepos (positionMap state)
    putState $ state {
        lastId = newId,
        positionMap = newMap
    }
    return newId
  where incId (Id n) = Id $ n+1

getNextId = do
    pos <- getPosition
    getNextIdAt pos

addToHereDocMap id list = do
    state <- getState
    let map = hereDocMap state
    putState $ state {
        hereDocMap = Map.insert id list map
    }

withHereDocBoundary p = do
    pushBoundary
    do
        v <- p
        popBoundary
        return v
     <|> do
        popBoundary
        fail ""
  where
    pushBoundary = do
        state <- getState
        let docs = pendingHereDocs state
        putState $ state {
            pendingHereDocs = HereDocBoundary : docs
        }
    popBoundary = do
        state <- getState
        let docs = tail $ dropWhile (not . isHereDocBoundary) $
                    pendingHereDocs state
        putState $ state {
            pendingHereDocs = docs
        }

addPendingHereDoc t = do
    state <- getState
    let docs = pendingHereDocs state
    putState $ state {
        pendingHereDocs = HereDocPending t : docs
    }

popPendingHereDocs = do
    state <- getState
    let (pending, boundary) = break isHereDocBoundary $ pendingHereDocs state
    putState $ state {
        pendingHereDocs = boundary
    }
    return . map extract . reverse $ pendingHereDocs state
  where
    extract (HereDocPending t) = t

isHereDocBoundary x = case x of
    HereDocBoundary -> True
    otherwise -> False

getMap = positionMap <$> getState
getParseNotes = parseNotes <$> getState

addParseNote n = do
    irrelevant <- shouldIgnoreCode (codeForParseNote n)
    unless irrelevant $ do
        state <- getState
        putState $ state {
            parseNotes = n : parseNotes state
        }

shouldIgnoreCode code = do
    context <- getCurrentContexts
    return $ any disabling context
  where
    disabling (ContextAnnotation list) =
        any disabling' list
    disabling (ContextSource _) = True -- Don't add messages for sourced files
    disabling _ = False
    disabling' (DisableComment n) = code == n
    disabling' _ = False

shouldFollow file = do
    context <- getCurrentContexts
    if any isThisFile context
      then return False
      else
        if length (filter isSource context) >= 100
          then do
            parseProblem ErrorC 1092 "Stopping at 100 'source' frames :O"
            return False
          else
            return True
  where
    isSource (ContextSource _) = True
    isSource _ = False
    isThisFile (ContextSource name) | name == file = True
    isThisFile _= False

getSourceOverride = do
    context <- getCurrentContexts
    return . msum . map findFile $ takeWhile isSameFile context
  where
    isSameFile (ContextSource _) = False
    isSameFile _ = True

    findFile (ContextAnnotation list) = msum $ map getFile list
    findFile _ = Nothing
    getFile (SourceOverride str) = Just str
    getFile _ = Nothing

-- Store potential parse problems outside of parsec

data SystemState = SystemState {
    contextStack :: [Context],
    parseProblems :: [ParseNote]
}
initialSystemState = SystemState {
    contextStack = [],
    parseProblems = []
}

parseProblem level code msg = do
    pos <- getPosition
    parseProblemAt pos level code msg

setCurrentContexts c = Ms.modify (\state -> state { contextStack = c })
getCurrentContexts = contextStack <$> Ms.get

popContext = do
    v <- getCurrentContexts
    if not $ null v
        then do
            let (a:r) = v
            setCurrentContexts r
            return $ Just a
        else
            return Nothing

pushContext c = do
    v <- getCurrentContexts
    setCurrentContexts (c:v)

parseProblemAtWithEnd start end level code msg = do
    irrelevant <- shouldIgnoreCode code
    unless irrelevant $
        Ms.modify (\state -> state {
            parseProblems = note:parseProblems state
        })
  where
    note = ParseNote start end level code msg

parseProblemAt pos = parseProblemAtWithEnd pos pos

-- Store non-parse problems inside

parseNote c l a = do
    pos <- getPosition
    parseNoteAt pos c l a

parseNoteAt pos c l a = addParseNote $ ParseNote pos pos c l a

parseNoteAtWithEnd start end c l a = addParseNote $ ParseNote start end c l a

--------- Convenient combinators
thenSkip main follow = do
    r <- main
    optional follow
    return r

unexpecting s p = try $
    (try p >> fail ("Unexpected " ++ s)) <|> return ()

notFollowedBy2 = unexpecting ""

disregard = void

reluctantlyTill p end =
    (lookAhead (disregard (try end) <|> eof) >> return []) <|> do
        x <- p
        more <- reluctantlyTill p end
        return $ x:more
      <|> return []

reluctantlyTill1 p end = do
    notFollowedBy2 end
    x <- p
    more <- reluctantlyTill p end
    return $ x:more

attempting rest branch =
    (try branch >> rest) <|> rest

orFail parser errorAction =
    try parser <|> (errorAction >>= fail)

-- Construct a node with a parser, e.g. T_Literal `withParser` (readGenericLiteral ",")
withParser node parser = do
    id <- getNextId
    contents <- parser
    return $ node id contents

wasIncluded p = option False (p >> return True)

acceptButWarn parser level code note =
    optional $ try (do
        pos <- getPosition
        parser
        parseProblemAt pos level code note
      )

withContext entry p = do
    pushContext entry
    do
        v <- p
        popContext
        return v
     <|> do -- p failed without consuming input, abort context
        v <- popContext
        fail ""

called s p = do
    pos <- getPosition
    withContext (ContextName pos s) p

withAnnotations anns =
    withContext (ContextAnnotation anns)

readConditionContents single =
    readCondContents `attempting` lookAhead (do
                                pos <- getPosition
                                s <- many1 letter
                                when (s `elem` commonCommands) $
                                    parseProblemAt pos WarningC 1009 "Use 'if cmd; then ..' to check exit code, or 'if [[ $(cmd) == .. ]]' to check output.")

  where
    spacingOrLf = condSpacing True
    condSpacing required = do
        pos <- getPosition
        space <- allspacing
        when (required && null space) $
            parseProblemAt pos ErrorC 1035 "You are missing a required space here."
        when (single && '\n' `elem` space) $
            parseProblemAt pos ErrorC 1080 "When breaking lines in [ ], you need \\ before the linefeed."
        return space

    typ = if single then SingleBracket else DoubleBracket
    readCondBinaryOp = try $ do
        optional guardArithmetic
        id <- getNextId
        op <- getOp
        spacingOrLf
        return op
      where
        flaglessOps = [ "==", "!=", "<=", ">=", "=~", ">", "<", "=" ]

        getOp = do
            id <- getNextId
            op <- anyQuotedOp <|> anyEscapedOp <|> anyOp
            return $ TC_Binary id typ op

        -- hacks to read quoted operators without having to read a shell word
        anyEscapedOp = try $ do
            char '\\'
            escaped <$> anyOp
        anyQuotedOp = try $ do
            c <- oneOf "'\""
            s <- anyOp
            char c
            return s

        anyOp = flagOp <|> flaglessOp <|> fail
                    "Expected comparison operator (don't wrap commands in []/[[]])"
        flagOp = try $ do
            s <- readOp
            when (s == "-a" || s == "-o") $ fail "Unexpected operator"
            return s
        flaglessOp =
            choice $ map (try . string) flaglessOps
        escaped s = if any (`elem` s) "<>" then '\\':s else s

    guardArithmetic = do
        try . lookAhead $ disregard (oneOf "+*/%") <|> disregard (string "- ")
        parseProblem ErrorC 1076 $
            if single
            then "Trying to do math? Use e.g. [ $((i/2+7)) -ge 18 ]."
            else "Trying to do math? Use e.g. [[ $((i/2+7)) -ge 18 ]]."

    readCondUnaryExp = do
      op <- readCondUnaryOp
      pos <- getPosition
      liftM op readCondWord `orFail` do
          parseProblemAt pos ErrorC 1019 "Expected this to be an argument to the unary condition."
          return "Expected an argument for the unary operator"

    readCondUnaryOp = try $ do
        id <- getNextId
        s <- readOp
        spacingOrLf
        return $ TC_Unary id typ s

    readOp = try $ do
        char '-' <|> weirdDash
        s <- many1 letter <|> fail "Expected a test operator"
        return ('-':s)

    weirdDash = do
        pos <- getPosition
        oneOf "\x058A\x05BE\x2010\x2011\x2012\x2013\x2014\x2015\xFE63\xFF0D"
        parseProblemAt pos ErrorC 1100
            "This is a unicode dash. Delete and retype as ASCII minus."
        return '-'

    readCondWord = do
        notFollowedBy2 (try (spacing >> string "]"))
        x <- readNormalWord
        pos <- getPosition
        when (endedWith "]" x) $ do
            parseProblemAt pos ErrorC 1020 $
                "You need a space before the " ++ (if single then "]" else "]]") ++ "."
            fail "Missing space before ]"
        when (single && endedWith ")" x) $ do
            parseProblemAt pos ErrorC 1021
                "You need a space before the \\)"
            fail "Missing space before )"
        disregard spacing
        return x
      where endedWith str (T_NormalWord id s@(_:_)) =
                case last s of T_Literal id s -> str `isSuffixOf` s
                               _ -> False
            endedWith _ _ = False

    readCondAndOp = do
        id <- getNextId
        x <- try (readAndOrOp "&&" False <|> readAndOrOp "-a" True)
        return $ TC_And id typ x

    readCondOrOp = do
        optional guardArithmetic
        id <- getNextId
        x <- try (readAndOrOp "||" False <|> readAndOrOp "-o" True)
        return $ TC_Or id typ x

    readAndOrOp op requiresSpacing = do
        optional $ lookAhead weirdDash
        x <- string op
        condSpacing requiresSpacing
        return x

    readCondNoaryOrBinary = do
      id <- getNextId
      x <- readCondWord `attempting` (do
              pos <- getPosition
              lookAhead (char '[')
              parseProblemAt pos ErrorC 1026 $ if single
                  then "If grouping expressions inside [..], use \\( ..\\)."
                  else "If grouping expressions inside [[..]], use ( .. )."
            )
      (do
            pos <- getPosition
            isRegex <- regexOperatorAhead
            op <- readCondBinaryOp
            y <- if isRegex
                    then readRegex
                    else  readCondWord <|> (parseProblemAt pos ErrorC 1027 "Expected another argument for this operator." >> mzero)
            return (x `op` y)
          ) <|> return (TC_Noary id typ x)

    readCondGroup = do
          id <- getNextId
          pos <- getPosition
          lparen <- try $ string "(" <|> string "\\("
          when (single && lparen == "(") $
              parseProblemAt pos ErrorC 1028 "In [..] you have to escape (). Use [[..]] instead."
          when (not single && lparen == "\\(") $
              parseProblemAt pos ErrorC 1029 "In [[..]] you shouldn't escape ()."
          condSpacing single
          x <- readCondContents
          cpos <- getPosition
          rparen <- string ")" <|> string "\\)"
          condSpacing single
          when (single && rparen == ")") $
              parseProblemAt cpos ErrorC 1030 "In [..] you have to escape (). Use [[..]] instead."
          when (not single && rparen == "\\)") $
              parseProblemAt cpos ErrorC 1031 "In [[..]] you shouldn't escape ()."
          when (isEscaped lparen `xor` isEscaped rparen) $
              parseProblemAt pos ErrorC 1032 "Did you just escape one half of () but not the other?"
          return $ TC_Group id typ x
      where
        isEscaped ('\\':_) = True
        isEscaped _ = False
        xor x y = x && not y || not x && y

    -- Currently a bit of a hack since parsing rules are obscure
    regexOperatorAhead = lookAhead (do
        try (string "=~") <|> try (string "~=")
        return True)
          <|> return False
    readRegex = called "regex" $ do
        id <- getNextId
        parts <- many1 (
                readGroup <|>
                readSingleQuoted <|>
                readDoubleQuoted <|>
                readDollarExpression <|>
                readNormalLiteral "( " <|>
                readPipeLiteral <|>
                readGlobLiteral)
        disregard spacing
        return $ T_NormalWord id parts
      where
        readGlobLiteral = do
            id <- getNextId
            s <- extglobStart <|> oneOf "{}[]$"
            return $ T_Literal id [s]
        readGroup = called "regex grouping" $ do
            id <- getNextId
            char '('
            parts <- many (readGroup <|> readSingleQuoted <|> readDoubleQuoted <|> readDollarExpression <|> readRegexLiteral <|> readGlobLiteral)
            char ')'
            return $ T_NormalWord id parts
        readRegexLiteral = do
            id <- getNextId
            str <- readGenericLiteral1 (singleQuote <|> doubleQuotable <|> oneOf "()")
            return $ T_Literal id str
        readPipeLiteral = do
            id <- getNextId
            str <- string "|"
            return $ T_Literal id str

    readCondTerm = do
        term <- readCondNot <|> readCondExpr
        condSpacing False
        return term

    readCondNot = do
        id <- getNextId
        char '!'
        spacingOrLf
        expr <- readCondExpr
        return $ TC_Unary id typ "!" expr

    readCondExpr =
      readCondGroup <|> readCondUnaryExp <|> readCondNoaryOrBinary

    readCondOr = chainl1 readCondAnd readCondAndOp
    readCondAnd = chainl1 readCondTerm readCondOrOp
    readCondContents = readCondOr


prop_a1 = isOk readArithmeticContents " n++ + ++c"
prop_a2 = isOk readArithmeticContents "$N*4-(3,2)"
prop_a3 = isOk readArithmeticContents "n|=2<<1"
prop_a4 = isOk readArithmeticContents "n &= 2 **3"
prop_a5 = isOk readArithmeticContents "1 |= 4 && n >>= 4"
prop_a6 = isOk readArithmeticContents " 1 | 2 ||3|4"
prop_a7 = isOk readArithmeticContents "3*2**10"
prop_a8 = isOk readArithmeticContents "3"
prop_a9 = isOk readArithmeticContents "a^!-b"
prop_a10= isOk readArithmeticContents "! $?"
prop_a11= isOk readArithmeticContents "10#08 * 16#f"
prop_a12= isOk readArithmeticContents "\"$((3+2))\" + '37'"
prop_a13= isOk readArithmeticContents "foo[9*y+x]++"
prop_a14= isOk readArithmeticContents "1+`echo 2`"
prop_a15= isOk readArithmeticContents "foo[`echo foo | sed s/foo/4/g` * 3] + 4"
prop_a16= isOk readArithmeticContents "$foo$bar"
prop_a17= isOk readArithmeticContents "i<(0+(1+1))"
prop_a18= isOk readArithmeticContents "a?b:c"
prop_a19= isOk readArithmeticContents "\\\n3 +\\\n  2"
prop_a20= isOk readArithmeticContents "a ? b ? c : d : e"
prop_a21= isOk readArithmeticContents "a ? b : c ? d : e"
prop_a22= isOk readArithmeticContents "!!a"
readArithmeticContents =
    readSequence
  where
    spacing =
        let lf = try (string "\\\n") >> return '\n'
        in many (whitespace <|> lf)

    splitBy x ops = chainl1 x (readBinary ops)
    readBinary ops = readComboOp ops TA_Binary
    readComboOp op token = do
        id <- getNextId
        op <- choice (map (\x -> try $ do
                                        s <- string x
                                        notFollowedBy2 $ oneOf "&|<>="
                                        return s
                            ) op)
        spacing
        return $ token id op

    readArrayIndex = do
        id <- getNextId
        char '['
        middle <- readArithmeticContents
        char ']'
        return $ TA_Index id middle

    literal s = do
        id <- getNextId
        string s
        return $ T_Literal id s

    readArithmeticLiteral =
        readArrayIndex <|> literal "#"

    readExpansion = do
        id <- getNextId
        pieces <- many1 $ choice [
            readArithmeticLiteral,
            readSingleQuoted,
            readDoubleQuoted,
            readNormalDollar,
            readBraced,
            readUnquotedBackTicked,
            readNormalLiteral "+-*/=%^,]?:"
            ]
        spacing
        return $ TA_Expansion id pieces

    readGroup = do
        char '('
        s <- readSequence
        char ')'
        spacing
        return s

    readArithTerm = readGroup <|> readExpansion

    readSequence = do
        spacing
        id <- getNextId
        l <- readAssignment `sepBy` (char ',' >> spacing)
        return $ TA_Sequence id l

    readAssignment = chainr1 readTrinary readAssignmentOp
    readAssignmentOp = readComboOp ["=", "*=", "/=", "%=", "+=", "-=", "<<=", ">>=", "&=", "^=", "|="] TA_Assignment

    readTrinary = do
        x <- readLogicalOr
        do
            id <- getNextId
            string "?"
            spacing
            y <- readTrinary
            string ":"
            spacing
            z <- readTrinary
            return $ TA_Trinary id x y z
         <|>
          return x

    readLogicalOr  = readLogicalAnd `splitBy` ["||"]
    readLogicalAnd = readBitOr `splitBy` ["&&"]
    readBitOr  = readBitXor `splitBy` ["|"]
    readBitXor = readBitAnd `splitBy` ["^"]
    readBitAnd = readEquated `splitBy` ["&"]
    readEquated = readCompared `splitBy` ["==", "!="]
    readCompared = readShift `splitBy` ["<=", ">=", "<", ">"]
    readShift = readAddition `splitBy` ["<<", ">>"]
    readAddition = readMultiplication `splitBy` ["+", "-"]
    readMultiplication = readExponential `splitBy` ["*", "/", "%"]
    readExponential = readAnyNegated `splitBy` ["**"]

    readAnyNegated = readNegated <|> readAnySigned
    readNegated = do
        id <- getNextId
        op <- oneOf "!~"
        spacing
        x <- readAnyNegated
        return $ TA_Unary id [op] x

    readAnySigned = readSigned <|> readAnycremented
    readSigned = do
        id <- getNextId
        op <- choice (map readSignOp "+-")
        spacing
        x <- readAnycremented
        return $ TA_Unary id [op] x
     where
        readSignOp c = try $ do
            char c
            notFollowedBy2 $ char c
            spacing
            return c

    readAnycremented = readNormalOrPostfixIncremented <|> readPrefixIncremented
    readPrefixIncremented = do
        id <- getNextId
        op <- try $ string "++" <|> string "--"
        spacing
        x <- readArithTerm
        return $ TA_Unary id (op ++ "|") x

    readNormalOrPostfixIncremented = do
        x <- readArithTerm
        spacing
        do
            id <- getNextId
            op <- try $ string "++" <|> string "--"
            spacing
            return $ TA_Unary id ('|':op) x
         <|>
            return x


prop_readCondition = isOk readCondition "[ \\( a = b \\) -a \\( c = d \\) ]"
prop_readCondition2 = isOk readCondition "[[ (a = b) || (c = d) ]]"
prop_readCondition3 = isOk readCondition "[[ $c = [[:alpha:].~-] ]]"
prop_readCondition4 = isOk readCondition "[[ $c =~ *foo* ]]"
prop_readCondition5 = isOk readCondition "[[ $c =~ f( ]] )* ]]"
prop_readCondition5a= isOk readCondition "[[ $c =~ a(b) ]]"
prop_readCondition5b= isOk readCondition "[[ $c =~ f( ($var ]]) )* ]]"
prop_readCondition6 = isOk readCondition "[[ $c =~ ^[yY]$ ]]"
prop_readCondition7 = isOk readCondition "[[ ${line} =~ ^[[:space:]]*# ]]"
prop_readCondition8 = isOk readCondition "[[ $l =~ ogg|flac ]]"
prop_readCondition9 = isOk readCondition "[ foo -a -f bar ]"
prop_readCondition10= isOk readCondition "[[\na == b\n||\nc == d ]]"
prop_readCondition10a= isOk readCondition "[[\na == b  ||\nc == d ]]"
prop_readCondition10b= isOk readCondition "[[ a == b\n||\nc == d ]]"
prop_readCondition11= isOk readCondition "[[ a == b ||\n c == d ]]"
prop_readCondition12= isWarning readCondition "[ a == b \n -o c == d ]"
prop_readCondition13= isOk readCondition "[[ foo =~ ^fo{1,3}$ ]]"
prop_readCondition14= isOk readCondition "[ foo '>' bar ]"
prop_readCondition15= isOk readCondition "[ foo \">=\" bar ]"
prop_readCondition16= isOk readCondition "[ foo \\< bar ]"
prop_readCondition17= isOk readCondition "[[ ${file::1} = [-.\\|/\\\\] ]]"
readCondition = called "test expression" $ do
    opos <- getPosition
    id <- getNextId
    open <- try (string "[[") <|> string "["
    let single = open == "["

    pos <- getPosition
    space <- allspacing
    when (null space) $
        parseProblemAt pos ErrorC 1035 $ "You need a space after the " ++
            if single
                then "[ and before the ]."
                else "[[ and before the ]]."
    when (single && '\n' `elem` space) $
        parseProblemAt pos ErrorC 1080 "You need \\ before line feeds to break lines in [ ]."

    condition <- readConditionContents single

    cpos <- getPosition
    close <- try (string "]]") <|> string "]" <|> fail "Expected test to end here (don't wrap commands in []/[[]])"
    when (open == "[[" && close /= "]]") $ parseProblemAt cpos ErrorC 1033 "Did you mean ]] ?"
    when (open == "[" && close /= "]" ) $ parseProblemAt opos ErrorC 1034 "Did you mean [[ ?"
    spacing
    many readCmdWord -- Read and throw away remainders to get then/do warnings. Fixme?
    return $ T_Condition id (if single then SingleBracket else DoubleBracket) condition

readAnnotationPrefix = do
    char '#'
    many linewhitespace
    string "shellcheck"

prop_readAnnotation1 = isOk readAnnotation "# shellcheck disable=1234,5678\n"
prop_readAnnotation2 = isOk readAnnotation "# shellcheck disable=SC1234 disable=SC5678\n"
prop_readAnnotation3 = isOk readAnnotation "# shellcheck disable=SC1234 source=/dev/null disable=SC5678\n"
readAnnotation = called "shellcheck annotation" $ do
    try readAnnotationPrefix
    many1 linewhitespace
    values <- many1 (readDisable <|> readSourceOverride <|> readShellOverride)
    linefeed
    many linewhitespace
    return $ concat values
  where
    readDisable = forKey "disable" $
        readCode `sepBy` char ','
      where
        readCode = do
            optional $ string "SC"
            int <- many1 digit
            return $ DisableComment (read int)

    readSourceOverride = forKey "source" $ do
        filename <- many1 $ noneOf " \n"
        return [SourceOverride filename]

    readShellOverride = forKey "shell" $ do
        pos <- getPosition
        shell <- many1 $ noneOf " \n"
        when (isNothing $ shellForExecutable shell) $
            parseNoteAt pos ErrorC 1103
                "This shell type is unknown. Use e.g. sh or bash."
        return [ShellOverride shell]

    forKey s p = do
        try $ string s
        char '='
        value <- p
        many linewhitespace
        return value

readAnnotations = do
    annotations <- many (readAnnotation `thenSkip` allspacing)
    return $ concat annotations

readComment = do
    unexpecting "shellcheck annotation" readAnnotationPrefix
    char '#'
    many $ noneOf "\r\n"

prop_readNormalWord = isOk readNormalWord "'foo'\"bar\"{1..3}baz$(lol)"
prop_readNormalWord2 = isOk readNormalWord "foo**(foo)!!!(@@(bar))"
prop_readNormalWord3 = isOk readNormalWord "foo#"
prop_readNormalWord4 = isOk readNormalWord "$\"foo\"$'foo\nbar'"
prop_readNormalWord5 = isWarning readNormalWord "${foo}}"
prop_readNormalWord6 = isOk readNormalWord "foo/{}"
prop_readNormalWord7 = isOk readNormalWord "foo\\\nbar"
prop_readNormalWord8 = isWarning readSubshell "(foo\\ \nbar)"
prop_readNormalWord9 = isOk readSubshell "(foo\\ ;\nbar)"
readNormalWord = readNormalishWord ""

readNormalishWord end = do
    id <- getNextId
    pos <- getPosition
    x <- many1 (readNormalWordPart end)
    checkPossibleTermination pos x
    return $ T_NormalWord id x

checkPossibleTermination pos [T_Literal _ x] =
    when (x `elem` ["do", "done", "then", "fi", "esac"]) $
        parseProblemAt pos WarningC 1010 $ "Use semicolon or linefeed before '" ++ x ++ "' (or quote to make it literal)."
checkPossibleTermination _ _ = return ()

readNormalWordPart end = do
    notFollowedBy2 $ oneOf end
    checkForParenthesis
    choice [
        readSingleQuoted,
        readDoubleQuoted,
        readGlob,
        readNormalDollar,
        readBraced,
        readUnquotedBackTicked,
        readProcSub,
        readNormalLiteral end,
        readLiteralCurlyBraces
      ]
  where
    checkForParenthesis =
        return () `attempting` do
            pos <- getPosition
            lookAhead $ char '('
            parseProblemAt pos ErrorC 1036 "'(' is invalid here. Did you forget to escape it?"

    readLiteralCurlyBraces = do
        id <- getNextId
        str <- findParam <|> literalBraces
        return $ T_Literal id str

    findParam = try $ string "{}"
    literalBraces = do
        pos <- getPosition
        c <- oneOf "{}"
        parseProblemAt pos WarningC 1083 $
            "This " ++ [c] ++ " is literal. Check expression (missing ;/\\n?) or quote it."
        return [c]


readSpacePart = do
    id <- getNextId
    x <- many1 whitespace
    return $ T_Literal id x

readDollarBracedWord = do
    id <- getNextId
    list <- many readDollarBracedPart
    return $ T_NormalWord id list

readDollarBracedPart = readSingleQuoted <|> readDoubleQuoted <|> readExtglob <|> readNormalDollar <|> readUnquotedBackTicked <|> readDollarBracedLiteral

readDollarBracedLiteral = do
    id <- getNextId
    vars <- (readBraceEscaped <|> (anyChar >>= \x -> return [x])) `reluctantlyTill1` bracedQuotable
    return $ T_Literal id $ concat vars

prop_readProcSub1 = isOk readProcSub "<(echo test | wc -l)"
prop_readProcSub2 = isOk readProcSub "<(  if true; then true; fi )"
prop_readProcSub3 = isOk readProcSub "<( # nothing here \n)"
readProcSub = called "process substitution" $ do
    id <- getNextId
    dir <- try $ do
                    x <- oneOf "<>"
                    char '('
                    return [x]
    list <- readCompoundListOrEmpty
    allspacing
    char ')'
    return $ T_ProcSub id dir list

prop_readSingleQuoted = isOk readSingleQuoted "'foo bar'"
prop_readSingleQuoted2 = isWarning readSingleQuoted "'foo bar\\'"
prop_readsingleQuoted3 = isWarning readSingleQuoted "\x2018hello\x2019"
prop_readSingleQuoted4 = isWarning readNormalWord "'it's"
prop_readSingleQuoted5 = isWarning readSimpleCommand "foo='bar\ncow 'arg"
prop_readSingleQuoted6 = isOk readSimpleCommand "foo='bar cow 'arg"
readSingleQuoted = called "single quoted string" $ do
    id <- getNextId
    startPos <- getPosition
    singleQuote
    s <- readSingleQuotedPart `reluctantlyTill` singleQuote
    let string = concat s
    endPos <- getPosition
    singleQuote <|> fail "Expected end of single quoted string"

    optional $ do
        c <- try . lookAhead $ suspectCharAfterQuotes <|> oneOf "'"
        if not (null string) && isAlpha c && isAlpha (last string)
          then
            parseProblemAt endPos WarningC 1011
                "This apostrophe terminated the single quoted string!"
          else
            when ('\n' `elem` string && not ("\n" `isPrefixOf` string)) $
                suggestForgotClosingQuote startPos endPos "single quoted string"

    return (T_SingleQuoted id string)

readSingleQuotedLiteral = do
    singleQuote
    strs <- many1 readSingleQuotedPart
    singleQuote
    return $ concat strs

readSingleQuotedPart =
    readSingleEscaped
    <|> many1 (noneOf "'\\\x2018\x2019")


prop_readBackTicked = isOk (readBackTicked False) "`ls *.mp3`"
prop_readBackTicked2 = isOk (readBackTicked False) "`grep \"\\\"\"`"
prop_readBackTicked3 = isWarning (readBackTicked False) "´grep \"\\\"\"´"
prop_readBackTicked4 = isOk readSimpleCommand "`echo foo\necho bar`"
prop_readBackTicked5 = isOk readSimpleCommand "echo `foo`bar"
prop_readBackTicked6 = isWarning readSimpleCommand "echo `foo\necho `bar"
prop_readBackTicked7 = isOk readSimpleCommand "`#inline comment`"
prop_readBackTicked8 = isOk readSimpleCommand "echo `#comment` \\\nbar baz"
readQuotedBackTicked = readBackTicked True
readUnquotedBackTicked = readBackTicked False
readBackTicked quoted = called "backtick expansion" $ do
    id <- getNextId
    startPos <- getPosition
    backtick
    subStart <- getPosition
    subString <- readGenericLiteral "`´"
    endPos <- getPosition
    backtick

    optional $ do
        c <- try . lookAhead $ suspectCharAfterQuotes
        when ('\n' `elem` subString && not ("\n" `isPrefixOf` subString)) $
            suggestForgotClosingQuote startPos endPos "backtick expansion"

    -- Result positions may be off due to escapes
    result <- subParse subStart subParser (unEscape subString)
    return $ T_Backticked id result
  where
    unEscape [] = []
    unEscape ('\\':'"':rest) | quoted = '"' : unEscape rest
    unEscape ('\\':x:rest) | x `elem` "$`\\" = x : unEscape rest
    unEscape ('\\':'\n':rest) = unEscape rest
    unEscape (c:rest) = c : unEscape rest
    subParser = do
        cmds <- readCompoundListOrEmpty
        verifyEof
        return cmds
    backtick =
      disregard (char '`') <|> do
         pos <- getPosition
         char '´'
         parseProblemAt pos ErrorC 1077
            "For command expansion, the tick should slant left (` vs ´). Use $(..) instead."

subParse pos parser input = do
    lastPosition <- getPosition
    lastInput <- getInput
    setPosition pos
    setInput input
    result <- parser
    setInput lastInput
    setPosition lastPosition
    return result

inSeparateContext parser = do
    context <- Ms.get
    success context <|> failure context
  where
    success c = do
        res <- try parser
        Ms.put c
        return res
    failure c = do
        Ms.put c
        fail ""

prop_readDoubleQuoted = isOk readDoubleQuoted "\"Hello $FOO\""
prop_readDoubleQuoted2 = isOk readDoubleQuoted "\"$'\""
prop_readDoubleQuoted3 = isWarning readDoubleQuoted "\x201Chello\x201D"
prop_readDoubleQuoted4 = isWarning readSimpleCommand "\"foo\nbar\"foo"
prop_readDoubleQuoted5 = isOk readSimpleCommand "lol \"foo\nbar\" etc"
prop_readDoubleQuoted6 = isOk readSimpleCommand "echo \"${ ls; }\""
prop_readDoubleQuoted7 = isOk readSimpleCommand "echo \"${ ls;}bar\""
readDoubleQuoted = called "double quoted string" $ do
    id <- getNextId
    startPos <- getPosition
    doubleQuote
    x <- many doubleQuotedPart
    endPos <- getPosition
    doubleQuote <|> fail "Expected end of double quoted string"
    optional $ do
        try . lookAhead $ suspectCharAfterQuotes <|> oneOf "$\""
        when (any hasLineFeed x && not (startsWithLineFeed x)) $
            suggestForgotClosingQuote startPos endPos "double quoted string"
    return $ T_DoubleQuoted id x
  where
    startsWithLineFeed (T_Literal _ ('\n':_):_) = True
    startsWithLineFeed _ = False
    hasLineFeed (T_Literal _ str) | '\n' `elem` str = True
    hasLineFeed _ = False

suggestForgotClosingQuote startPos endPos name = do
    parseProblemAt startPos WarningC 1078 $
        "Did you forget to close this " ++ name ++ "?"
    parseProblemAt endPos InfoC 1079
        "This is actually an end quote, but due to next char it looks suspect."

doubleQuotedPart = readDoubleLiteral <|> readDoubleQuotedDollar <|> readQuotedBackTicked

readDoubleQuotedLiteral = do
    doubleQuote
    x <- readDoubleLiteral
    doubleQuote
    return x

readDoubleLiteral = do
    id <- getNextId
    s <- many1 readDoubleLiteralPart
    return $ T_Literal id (concat s)

readDoubleLiteralPart = do
    x <- many1 (readDoubleEscaped <|> many1 (noneOf ('\\':doubleQuotableChars)))
    return $ concat x

readNormalLiteral end = do
    id <- getNextId
    s <- many1 (readNormalLiteralPart end)
    return $ T_Literal id (concat s)

prop_readGlob1 = isOk readGlob "*"
prop_readGlob2 = isOk readGlob "[^0-9]"
prop_readGlob3 = isOk readGlob "[a[:alpha:]]"
prop_readGlob4 = isOk readGlob "[[:alnum:]]"
prop_readGlob5 = isOk readGlob "[^[:alpha:]1-9]"
prop_readGlob6 = isOk readGlob "[\\|]"
prop_readGlob7 = isOk readGlob "[^[]"
prop_readGlob8 = isOk readGlob "[*?]"
readGlob = readExtglob <|> readSimple <|> readClass <|> readGlobbyLiteral
    where
        readSimple = do
            id <- getNextId
            c <- oneOf "*?"
            return $ T_Glob id [c]
        -- Doesn't handle weird things like [^]a] and [$foo]. fixme?
        readClass = try $ do
            id <- getNextId
            char '['
            s <- many1 (predefined <|> readNormalLiteralPart "]" <|> globchars)
            char ']'
            return $ T_Glob id $ "[" ++ concat s ++ "]"
          where
           globchars = liftM return . oneOf $ "!$[" ++ extglobStartChars
           predefined = do
              try $ string "[:"
              s <- many1 letter
              string ":]"
              return $ "[:" ++ s ++ ":]"

        readGlobbyLiteral = do
            id <- getNextId
            c <- extglobStart <|> char '['
            return $ T_Literal id [c]

readNormalLiteralPart end =
    readNormalEscaped <|> many1 (noneOf (end ++ quotableChars ++ extglobStartChars ++ "[{}"))

readNormalEscaped = called "escaped char" $ do
    pos <- getPosition
    backslash
    do
        next <- quotable <|> oneOf "?*@!+[]{}.,~#"
        when (next == ' ') $ checkTrailingSpaces pos <|> return ()
        return $ if next == '\n' then "" else [next]
      <|>
        do
            next <- anyChar
            case escapedChar next of
                Just name -> parseNoteAt pos WarningC 1012 $ "\\" ++ [next] ++ " is just literal '" ++ [next] ++ "' here. For " ++ name ++ ", use " ++ alternative next ++ " instead."
                Nothing -> parseNoteAt pos InfoC 1001 $ "This \\" ++ [next] ++ " will be a regular '" ++ [next] ++ "' in this context."
            return [next]
  where
    alternative 'n' = "a quoted, literal line feed"
    alternative t = "\"$(printf \"\\" ++ [t] ++ "\")\""
    escapedChar 'n' = Just "line feed"
    escapedChar 't' = Just "tab"
    escapedChar 'r' = Just "carriage return"
    escapedChar _ = Nothing

    checkTrailingSpaces pos = lookAhead . try $ do
        many linewhitespace
        void linefeed <|> eof
        parseProblemAt pos ErrorC 1101 "Delete trailing spaces after \\ to break line (or use quotes for literal space)."


prop_readExtglob1 = isOk readExtglob "!(*.mp3)"
prop_readExtglob2 = isOk readExtglob "!(*.mp3|*.wmv)"
prop_readExtglob4 = isOk readExtglob "+(foo \\) bar)"
prop_readExtglob5 = isOk readExtglob "+(!(foo *(bar)))"
prop_readExtglob6 = isOk readExtglob "*(((||))|())"
prop_readExtglob7 = isOk readExtglob "*(<>)"
prop_readExtglob8 = isOk readExtglob "@(|*())"
readExtglob = called "extglob" $ do
    id <- getNextId
    c <- try $ do
            f <- extglobStart
            char '('
            return f
    contents <- readExtglobPart `sepBy` char '|'
    char ')'
    return $ T_Extglob id [c] contents

readExtglobPart = do
    id <- getNextId
    x <- many (readExtglobGroup <|> readNormalWordPart "" <|> readSpacePart <|> readExtglobLiteral)
    return $ T_NormalWord id x
  where
    readExtglobGroup = do
        id <- getNextId
        char '('
        contents <- readExtglobPart `sepBy` char '|'
        char ')'
        return $ T_Extglob id "" contents
    readExtglobLiteral = do
        id <- getNextId
        str <- many1 (oneOf "<>#;&")
        return $ T_Literal id str


readSingleEscaped = do
    s <- backslash
    let attempt level code p msg = do { try $ parseNote level code msg; x <- p; return [s,x]; }

    do {
        x <- lookAhead singleQuote;
        parseProblem InfoC 1003 "Are you trying to escape that single quote? echo 'You'\\''re doing it wrong'.";
        return [s];
    }
        <|> attempt InfoC 1004 linefeed "You don't break lines with \\ in single quotes, it results in literal backslash-linefeed."
        <|> do
            x <- anyChar
            return [s,x]


readDoubleEscaped = do
    bs <- backslash
    (linefeed >> return "")
        <|> liftM return doubleQuotable
        <|> liftM (\ x -> [bs, x]) anyChar

readBraceEscaped = do
    bs <- backslash
    (linefeed >> return "")
        <|> liftM return bracedQuotable
        <|> liftM (\ x -> [bs, x]) anyChar


readGenericLiteral endChars = do
    strings <- many (readGenericEscaped <|> many1 (noneOf ('\\':endChars)))
    return $ concat strings

readGenericLiteral1 endExp = do
    strings <- (readGenericEscaped <|> (anyChar >>= \x -> return [x])) `reluctantlyTill1` endExp
    return $ concat strings

readGenericEscaped = do
    backslash
    x <- anyChar
    return $ if x == '\n' then [] else ['\\', x]

prop_readBraced = isOk readBraced "{1..4}"
prop_readBraced2 = isOk readBraced "{foo,bar,\"baz lol\"}"
prop_readBraced3 = isOk readBraced "{1,\\},2}"
prop_readBraced4 = isOk readBraced "{1,{2,3}}"
prop_readBraced5 = isOk readBraced "{JP{,E}G,jp{,e}g}"
prop_readBraced6 = isOk readBraced "{foo,bar,$((${var}))}"
prop_readBraced7 = isNotOk readBraced "{}"
prop_readBraced8 = isNotOk readBraced "{foo}"
readBraced = try braceExpansion
  where
    braceExpansion =
        T_BraceExpansion `withParser` do
            char '{'
            elements <- bracedElement `sepBy1` char ','
            guard $
                case elements of
                    (_:_:_) -> True
                    [t] -> ".." `isInfixOf` onlyLiteralString t
                    [] -> False
            char '}'
            return elements
    bracedElement =
        T_NormalWord `withParser` do
            many $ choice [
                braceExpansion,
                readDollarExpression,
                readSingleQuoted,
                readDoubleQuoted,
                braceLiteral
                ]
    braceLiteral =
        T_Literal `withParser` readGenericLiteral1 (oneOf "{}\"$'," <|> whitespace)

readNormalDollar = readDollarExpression <|> readDollarDoubleQuote <|> readDollarSingleQuote <|> readDollarLonely
readDoubleQuotedDollar = readDollarExpression <|> readDollarLonely

prop_readDollarExpression1 = isOk readDollarExpression "$(((1) && 3))"
prop_readDollarExpression2 = isWarning readDollarExpression "$(((1)) && 3)"
readDollarExpression = readTripleParenthesis "$" readDollarArithmetic readDollarExpansion <|> readDollarArithmetic <|> readDollarBracket <|> readDollarBraceCommandExpansion <|> readDollarBraced <|> readDollarExpansion <|> readDollarVariable

prop_readDollarSingleQuote = isOk readDollarSingleQuote "$'foo\\\'lol'"
readDollarSingleQuote = called "$'..' expression" $ do
    id <- getNextId
    try $ string "$'"
    str <- readGenericLiteral "'"
    char '\''
    return $ T_DollarSingleQuoted id str

prop_readDollarDoubleQuote = isOk readDollarDoubleQuote "$\"hello\""
readDollarDoubleQuote = do
    lookAhead . try $ string "$\""
    id <- getNextId
    char '$'
    doubleQuote
    x <- many doubleQuotedPart
    doubleQuote <|> fail "Expected end of translated double quoted string"
    return $ T_DollarDoubleQuoted id x

prop_readDollarArithmetic = isOk readDollarArithmetic "$(( 3 * 4 +5))"
prop_readDollarArithmetic2 = isOk readDollarArithmetic "$(((3*4)+(1*2+(3-1))))"
readDollarArithmetic = called "$((..)) expression" $ do
    id <- getNextId
    try (string "$((")
    c <- readArithmeticContents
    pos <- getPosition
    char ')'
    char ')' <|> fail "Expected a double )) to end the $((..))"
    return (T_DollarArithmetic id c)

readDollarBracket = called "$[..] expression" $ do
    id <- getNextId
    try (string "$[")
    c <- readArithmeticContents
    string "]"
    return (T_DollarBracket id c)

prop_readArithmeticExpression = isOk readArithmeticExpression "((a?b:c))"
readArithmeticExpression = called "((..)) command" $ do
    id <- getNextId
    try (string "((")
    c <- readArithmeticContents
    string "))"
    return (T_Arithmetic id c)

-- Check if maybe ((( was intended as ( (( rather than (( (
readTripleParenthesis prefix expected alternative = do
    pos <- try . lookAhead $ do
        string prefix
        p <- getPosition
        string "(((" -- should optimally be "((" but it's noisy and rarely helpful
        return p

    -- If the expected parser fails, try the alt.
    -- If the alt fails, run the expected one again for the errors.
    try expected <|> tryAlt pos <|> expected
  where
    tryAlt pos = do
        t <- try alternative
        parseNoteAt pos WarningC 1102 $
            "Shells differ in parsing ambiguous " ++ prefix ++ "(((. Use spaces: " ++ prefix ++ "( (( ."
        return t


prop_readDollarBraceCommandExpansion1 = isOk readDollarBraceCommandExpansion "${ ls; }"
prop_readDollarBraceCommandExpansion2 = isOk readDollarBraceCommandExpansion "${\nls\n}"
readDollarBraceCommandExpansion = called "ksh ${ ..; } command expansion" $ do
    id <- getNextId
    try $ do
        string "${"
        whitespace
    allspacing
    term <- readTerm
    char '}' <|> fail "Expected } to end the ksh ${ ..; } command expansion"
    return $ T_DollarBraceCommandExpansion id term

prop_readDollarBraced1 = isOk readDollarBraced "${foo//bar/baz}"
prop_readDollarBraced2 = isOk readDollarBraced "${foo/'{cow}'}"
prop_readDollarBraced3 = isOk readDollarBraced "${foo%%$(echo cow\\})}"
prop_readDollarBraced4 = isOk readDollarBraced "${foo#\\}}"
readDollarBraced = called "parameter expansion" $ do
    id <- getNextId
    try (string "${")
    word <- readDollarBracedWord
    char '}'
    return $ T_DollarBraced id word

prop_readDollarExpansion1= isOk readDollarExpansion "$(echo foo; ls\n)"
prop_readDollarExpansion2= isOk readDollarExpansion "$(  )"
prop_readDollarExpansion3= isOk readDollarExpansion "$( command \n#comment \n)"
readDollarExpansion = called "command expansion" $ do
    id <- getNextId
    try (string "$(")
    cmds <- readCompoundListOrEmpty
    char ')' <|> fail "Expected end of $(..) expression"
    return $ T_DollarExpansion id cmds

prop_readDollarVariable = isOk readDollarVariable "$@"
prop_readDollarVariable2 = isOk (readDollarVariable >> anyChar) "$?!"
prop_readDollarVariable3 = isWarning (readDollarVariable >> anyChar) "$10"
prop_readDollarVariable4 = isWarning (readDollarVariable >> string "[@]") "$arr[@]"

readDollarVariable = do
    id <- getNextId
    pos <- getPosition

    let singleCharred p = do
        n <- p
        value <- wrap [n]
        return (T_DollarBraced id value)

    let positional = do
        value <- singleCharred digit
        return value `attempting` do
            lookAhead digit
            parseNoteAt pos ErrorC 1037 "Braces are required for positionals over 9, e.g. ${10}."

    let special = singleCharred specialVariable

    let regular = do
        name <- readVariableName
        value <- wrap name
        return (T_DollarBraced id value) `attempting` do
            lookAhead $ void (string "[@]") <|> void (string "[*]") <|> void readArrayIndex
            parseNoteAt pos ErrorC 1087 "Braces are required when expanding arrays, as in ${array[idx]}."

    try $ char '$' >> (positional <|> special <|> regular)

  where
    wrap s = do
        x <- getNextId
        y <- getNextId
        return $ T_NormalWord x [T_Literal y s]

readVariableName = do
    f <- variableStart
    rest <- many variableChars
    return (f:rest)

readDollarLonely = do
    id <- getNextId
    pos <- getPosition
    char '$'
    n <- lookAhead (anyChar <|> (eof >> return '_'))
    return $ T_Literal id "$"

prop_readHereDoc = isOk readScript "cat << foo\nlol\ncow\nfoo"
prop_readHereDoc2 = isWarning readScript "cat <<- EOF\n  cow\n  EOF"
prop_readHereDoc3 = isOk readScript "cat << foo\n$\"\nfoo"
prop_readHereDoc4 = isOk readScript "cat << foo\n`\nfoo"
prop_readHereDoc5 = isOk readScript "cat <<- !foo\nbar\n!foo"
prop_readHereDoc6 = isOk readScript "cat << foo\\ bar\ncow\nfoo bar"
prop_readHereDoc7 = isOk readScript "cat << foo\n\\$(f ())\nfoo"
prop_readHereDoc8 = isOk readScript "cat <<foo>>bar\netc\nfoo"
prop_readHereDoc9 = isOk readScript "if true; then cat << foo; fi\nbar\nfoo\n"
prop_readHereDoc10= isOk readScript "if true; then cat << foo << bar; fi\nfoo\nbar\n"
prop_readHereDoc11= isOk readScript "cat << foo $(\nfoo\n)lol\nfoo\n"
readHereDoc = called "here document" $ do
    fid <- getNextId
    pos <- getPosition
    try $ string "<<"
    dashed <- (char '-' >> return Dashed) <|> return Undashed
    tokenPosition <- getPosition
    sp <- spacing
    optional $ do
        try . lookAhead $ char '('
        let message = "Shells are space sensitive. Use '< <(cmd)', not '<<" ++ sp ++ "(cmd)'."
        parseProblemAt pos ErrorC 1038 message
    hid <- getNextId
    (quoted, endToken) <-
            liftM (\ x -> (Quoted, stripLiteral x)) readDoubleQuotedLiteral
            <|> liftM (\ x -> (Quoted, x)) readSingleQuotedLiteral
            <|> (readToken >>= (\x -> return (Unquoted, x)))

    -- add empty tokens for now, read the rest in readPendingHereDocs
    let doc = T_HereDoc hid dashed quoted endToken []
    addPendingHereDoc doc
    return $ T_FdRedirect fid "" doc
  where
    stripLiteral (T_Literal _ x) = x
    stripLiteral (T_SingleQuoted _ x) = x

    readToken =
        liftM concat $ many1 (escaped <|> quoted <|> normal)
      where
        quoted = liftM stripLiteral readDoubleQuotedLiteral <|> readSingleQuotedLiteral
        normal = anyChar `reluctantlyTill1` (whitespace <|> oneOf "<>;&)'\"\\")
        escaped = do -- surely the user must be doing something wrong at this point
            char '\\'
            c <- anyChar
            return [c]


readPendingHereDocs = do
    docs <- popPendingHereDocs
    mapM_ readDoc docs
  where
    readDoc (T_HereDoc id dashed quoted endToken _) = do
        pos <- getPosition
        hereData <- anyChar `reluctantlyTill` do
                        spacing
                        string endToken
                        disregard (char '\n') <|> eof
        do
            spaces <- spacing
            verifyHereDoc dashed quoted spaces hereData
            string endToken
            parsedData <- parseHereData quoted pos hereData
            list <- parseHereData quoted pos hereData
            addToHereDocMap id list

         `attempting` (eof >> debugHereDoc pos endToken hereData)

    parseHereData Quoted startPos hereData = do
        id <- getNextIdAt startPos
        return [T_Literal id hereData]

    parseHereData Unquoted startPos hereData =
        subParse startPos readHereData hereData

    readHereData = many $ try doubleQuotedPart <|> readHereLiteral

    readHereLiteral = do
        id <- getNextId
        chars <- many1 $ noneOf "`$\\"
        return $ T_Literal id chars

    verifyHereDoc dashed quoted spacing hereInfo = do
        when (dashed == Undashed && spacing /= "") $
            parseNote ErrorC 1039 "Use <<- instead of << if you want to indent the end token."
        when (dashed == Dashed && filter (/= '\t') spacing /= "" ) $
            parseNote ErrorC 1040 "When using <<-, you can only indent with tabs."
        return ()

    debugHereDoc pos endToken doc
        | endToken `isInfixOf` doc =
            let lookAt line = when (endToken `isInfixOf` line) $
                      parseProblemAt pos ErrorC 1041 ("Close matches include '" ++ line ++ "' (!= '" ++ endToken ++ "').")
            in do
                  parseProblemAt pos ErrorC 1042 ("Found '" ++ endToken ++ "' further down, but not entirely by itself.")
                  mapM_ lookAt (lines doc)
        | map toLower endToken `isInfixOf` map toLower doc =
            parseProblemAt pos ErrorC 1043 ("Found " ++ endToken ++ " further down, but with wrong casing.")
        | otherwise =
            parseProblemAt pos ErrorC 1044 ("Couldn't find end token `" ++ endToken ++ "' in the here document.")


readFilename = readNormalWord
readIoFileOp = choice [g_LESSAND, g_GREATAND, g_DGREAT, g_LESSGREAT, g_CLOBBER, redirToken '<' T_Less, redirToken '>' T_Greater ]

prop_readIoFile = isOk readIoFile ">> \"$(date +%YYmmDD)\""
readIoFile = called "redirection" $ do
    id <- getNextId
    op <- readIoFileOp
    spacing
    file <- readFilename
    return $ T_FdRedirect id "" $ T_IoFile id op file

readIoVariable = try $ do
    char '{'
    x <- readVariableName
    char '}'
    lookAhead readIoFileOp
    return $ "{" ++ x ++ "}"

readIoNumber = try $ do
    x <- many1 digit <|> string "&"
    lookAhead readIoFileOp
    return x

prop_readIoNumberRedirect = isOk readIoNumberRedirect "3>&2"
prop_readIoNumberRedirect2 = isOk readIoNumberRedirect "2> lol"
prop_readIoNumberRedirect3 = isOk readIoNumberRedirect "4>&-"
prop_readIoNumberRedirect4 = isOk readIoNumberRedirect "&> lol"
prop_readIoNumberRedirect5 = isOk readIoNumberRedirect "{foo}>&2"
prop_readIoNumberRedirect6 = isOk readIoNumberRedirect "{foo}<&-"
readIoNumberRedirect = do
    id <- getNextId
    n <- readIoVariable <|> readIoNumber
    op <- readHereString <|> readHereDoc <|> readIoFile
    let actualOp = case op of T_FdRedirect _ "" x -> x
    spacing
    return $ T_FdRedirect id n actualOp

readIoRedirect = choice [ readIoNumberRedirect, readHereString, readHereDoc, readIoFile ] `thenSkip` spacing

readRedirectList = many1 readIoRedirect

prop_readHereString = isOk readHereString "<<< \"Hello $world\""
readHereString = called "here string" $ do
    id <- getNextId
    try $ string "<<<"
    spacing
    id2 <- getNextId
    word <- readNormalWord
    return $ T_FdRedirect id "" $ T_HereString id2 word

readNewlineList = many1 ((linefeed <|> carriageReturn) `thenSkip` spacing)
readLineBreak = optional readNewlineList

prop_readSeparator1 = isWarning readScript "a &; b"
prop_readSeparator2 = isOk readScript "a & b"
readSeparatorOp = do
    notFollowedBy2 (void g_AND_IF <|> void readCaseSeparator)
    notFollowedBy2 (string "&>")
    f <- try (do
                    char '&'
                    spacing
                    pos <- getPosition
                    char ';'
                    -- In case statements we might have foo & ;;
                    notFollowedBy2 $ char ';'
                    parseProblemAt pos ErrorC 1045 "It's not 'foo &; bar', just 'foo & bar'."
                    return '&'
            ) <|> char ';' <|> char '&'
    spacing
    return f

readSequentialSep = disregard (g_Semi >> readLineBreak) <|> disregard readNewlineList
readSeparator =
    do
        separator <- readSeparatorOp
        readLineBreak
        return separator
     <|>
        do
            readNewlineList
            return '\n'

makeSimpleCommand id1 id2 prefix cmd suffix =
    let
        (preAssigned, preRest) = partition assignment prefix
        (preRedirected, preRest2) = partition redirection preRest
        (postRedirected, postRest) = partition redirection suffix

        redirs = preRedirected ++ postRedirected
        assigns = preAssigned
        args = cmd ++ preRest2 ++ postRest
    in
        T_Redirecting id1 redirs $ T_SimpleCommand id2 assigns args
  where
    assignment (T_Assignment {}) = True
    assignment _ = False
    redirection (T_FdRedirect {}) = True
    redirection _ = False

prop_readSimpleCommand = isOk readSimpleCommand "echo test > file"
prop_readSimpleCommand2 = isOk readSimpleCommand "cmd &> file"
prop_readSimpleCommand3 = isOk readSimpleCommand "export foo=(bar baz)"
prop_readSimpleCommand4 = isOk readSimpleCommand "typeset -a foo=(lol)"
prop_readSimpleCommand5 = isOk readSimpleCommand "time if true; then echo foo; fi"
prop_readSimpleCommand6 = isOk readSimpleCommand "time -p ( ls -l; )"
readSimpleCommand = called "simple command" $ do
    pos <- getPosition
    id1 <- getNextId
    id2 <- getNextId
    prefix <- option [] readCmdPrefix
    cmd <- option Nothing $ do { f <- readCmdName; return $ Just f; }
    when (null prefix && isNothing cmd) $ fail "Expected a command"
    case cmd of
      Nothing -> return $ makeSimpleCommand id1 id2 prefix [] []
      Just cmd -> do
            suffix <- option [] $ getParser readCmdSuffix cmd [
                        (["declare", "export", "local", "readonly", "typeset"], readModifierSuffix),
                        (["time"], readTimeSuffix),
                        (["let"], readLetSuffix),
                        (["eval"], readEvalSuffix)
                    ]

            let result = makeSimpleCommand id1 id2 prefix [cmd] suffix
            if isCommand ["source", "."] cmd
                then readSource pos result
                else return result
  where
    isCommand strings (T_NormalWord _ [T_Literal _ s]) = s `elem` strings
    isCommand _ _ = False
    getParser def cmd [] = def
    getParser def cmd ((list, action):rest) =
        if isCommand list cmd
        then action
        else getParser def cmd rest


readSource :: Monad m => SourcePos -> Token -> SCParser m Token
readSource pos t@(T_Redirecting _ _ (T_SimpleCommand _ _ (cmd:file:_))) = do
    override <- getSourceOverride
    let literalFile = do
        name <- override `mplus` getLiteralString file
        -- Hack to avoid 'source ~/foo' trying to read from literal tilde
        guard . not $ "~/" `isPrefixOf` name
        return name
    case literalFile of
        Nothing -> do
            parseNoteAt pos WarningC 1090
                "Can't follow non-constant source. Use a directive to specify location."
            return t
        Just filename -> do
            proceed <- shouldFollow filename
            if not proceed
              then do
                parseNoteAt pos InfoC 1093
                    "This file appears to be recursively sourced. Ignoring."
                return t
              else do
                sys <- Mr.ask
                input <-
                    if filename == "/dev/null" -- always allow /dev/null
                    then return (Right "")
                    else system $ siReadFile sys filename
                case input of
                    Left err -> do
                        parseNoteAt pos InfoC 1091 $
                            "Not following: " ++ err
                        return t
                    Right script -> do
                        id <- getNextIdAt pos

                        let included = do
                            src <- subRead filename script
                            return $ T_Include id t src

                        let failed = do
                            parseNoteAt pos WarningC 1094
                                "Parsing of sourced file failed. Ignoring it."
                            return t

                        included <|> failed
  where
    subRead name script =
        withContext (ContextSource name) $
            inSeparateContext $
                subParse (initialPos name) readScript script
readSource _ t = return t


prop_readPipeline = isOk readPipeline "! cat /etc/issue | grep -i ubuntu"
prop_readPipeline2 = isWarning readPipeline "!cat /etc/issue | grep -i ubuntu"
prop_readPipeline3 = isOk readPipeline "for f; do :; done|cat"
readPipeline = do
    unexpecting "keyword/token" readKeyword
    do
        (T_Bang id) <- g_Bang
        pipe <- readPipeSequence
        return $ T_Banged id pipe
      <|>
        readPipeSequence

prop_readAndOr = isOk readAndOr "grep -i lol foo || exit 1"
prop_readAndOr1 = isOk readAndOr "# shellcheck disable=1\nfoo"
prop_readAndOr2 = isOk readAndOr "# shellcheck disable=1\n# lol\n# shellcheck disable=3\nfoo"
readAndOr = do
    aid <- getNextId
    annotations <- readAnnotations

    andOr <- withAnnotations annotations $
        chainr1 readPipeline $ do
            op <- g_AND_IF <|> g_OR_IF
            readLineBreak
            return $ case op of T_AND_IF id -> T_AndIf id
                                T_OR_IF  id -> T_OrIf id

    return $ if null annotations
                then andOr
                else T_Annotation aid annotations andOr

readTermOrNone = do
    allspacing
    readTerm <|> do
        eof
        return []

prop_readTerm = isOk readTerm "time ( foo; bar; )"
readTerm = do
    allspacing
    m <- readAndOr
    readTerm' m

readTerm' current =
    do
        id <- getNextId
        sep <- readSeparator
        more <- option (T_EOF id) readAndOr
        case more of (T_EOF _) -> return [transformWithSeparator id sep current]
                     _         -> do
                                list <- readTerm' more
                                return (transformWithSeparator id sep current : list)
      <|>
        return [current]

transformWithSeparator i '&' = T_Backgrounded i
transformWithSeparator i _  = id


readPipeSequence = do
    id <- getNextId
    (cmds, pipes) <- sepBy1WithSeparators readCommand
                        (readPipe `thenSkip` (spacing >> readLineBreak))
    spacing
    return $ T_Pipeline id pipes cmds
  where
    sepBy1WithSeparators p s = do
        let elems = p >>= \x -> return ([x], [])
        let seps = do
            separator <- s
            return $ \(a,b) (c,d) -> (a++c, b ++ d ++ [separator])
        elems `chainl1` seps

readPipe = do
    notFollowedBy2 g_OR_IF
    id <- getNextId
    char '|'
    qualifier <- string "&" <|> return ""
    spacing
    return $ T_Pipe id ('|':qualifier)

readCommand = choice [
    readCompoundCommand,
    readCoProc,
    readSimpleCommand
    ]

readCmdName = readCmdWord
readCmdWord = readNormalWord <* spacing

prop_readIfClause = isOk readIfClause "if false; then foo; elif true; then stuff; more stuff; else cows; fi"
prop_readIfClause2 = isWarning readIfClause "if false; then; echo oo; fi"
prop_readIfClause3 = isWarning readIfClause "if false; then true; else; echo lol; fi"
prop_readIfClause4 = isWarning readIfClause "if false; then true; else if true; then echo lol; fi"
prop_readIfClause5 = isOk readIfClause "if false; then true; else\nif true; then echo lol; fi; fi"
readIfClause = called "if expression" $ do
    id <- getNextId
    pos <- getPosition
    (condition, action) <- readIfPart
    elifs <- many readElifPart
    elses <- option [] readElsePart

    g_Fi `orFail` do
        parseProblemAt pos ErrorC 1046 "Couldn't find 'fi' for this 'if'."
        parseProblem ErrorC 1047 "Expected 'fi' matching previously mentioned 'if'."
        return "Expected 'fi'"

    return $ T_IfExpression id ((condition, action):elifs) elses


verifyNotEmptyIf s =
    optional (do
                emptyPos <- getPosition
                try . lookAhead $ (g_Fi <|> g_Elif <|> g_Else)
                parseProblemAt emptyPos ErrorC 1048 $ "Can't have empty " ++ s ++ " clauses (use 'true' as a no-op).")
readIfPart = do
    pos <- getPosition
    g_If
    allspacing
    condition <- readTerm

    ifNextToken (g_Fi <|> g_Elif <|> g_Else) $
        parseProblemAt pos ErrorC 1049 "Did you forget the 'then' for this 'if'?"

    called "then clause" $ do
        g_Then `orFail` do
            parseProblem ErrorC 1050 "Expected 'then'."
            return "Expected 'then'"

        acceptButWarn g_Semi ErrorC 1051 "No semicolons directly after 'then'."
        allspacing
        verifyNotEmptyIf "then"

        action <- readTerm
        return (condition, action)

readElifPart = called "elif clause" $ do
    pos <- getPosition
    correctElif <- elif
    unless correctElif $
        parseProblemAt pos ErrorC 1075 "Use 'elif' instead of 'else if'."
    allspacing
    condition <- readTerm

    ifNextToken (g_Fi <|> g_Elif <|> g_Else) $
        parseProblemAt pos ErrorC 1049 "Did you forget the 'then' for this 'elif'?"

    g_Then
    acceptButWarn g_Semi ErrorC 1052 "No semicolons directly after 'then'."
    allspacing
    verifyNotEmptyIf "then"
    action <- readTerm
    return (condition, action)
  where
    elif = (g_Elif >> return True) <|>
        try (g_Else >> g_If >> return False)

readElsePart = called "else clause" $ do
    pos <- getPosition
    g_Else
    acceptButWarn g_Semi ErrorC 1053 "No semicolons directly after 'else'."
    allspacing
    verifyNotEmptyIf "else"
    readTerm

ifNextToken parser action =
    optional $ do
        try . lookAhead $ parser
        action

prop_readSubshell = isOk readSubshell "( cd /foo; tar cf stuff.tar * )"
readSubshell = called "explicit subshell" $ do
    id <- getNextId
    char '('
    allspacing
    list <- readCompoundList
    allspacing
    char ')' <|> fail ") closing the subshell"
    return $ T_Subshell id list

prop_readBraceGroup = isOk readBraceGroup "{ a; b | c | d; e; }"
prop_readBraceGroup2 = isWarning readBraceGroup "{foo;}"
readBraceGroup = called "brace group" $ do
    id <- getNextId
    char '{'
    allspacingOrFail <|> parseProblem ErrorC 1054 "You need a space after the '{'."
    optional $ do
        pos <- getPosition
        lookAhead $ char '}'
        parseProblemAt pos ErrorC 1055 "You need at least one command here. Use 'true;' as a no-op."
    list <- readTerm
    char '}' <|> do
        parseProblem ErrorC 1056 "Expected a '}'. If you have one, try a ; or \\n in front of it."
        fail "Missing '}'"
    return $ T_BraceGroup id list

prop_readWhileClause = isOk readWhileClause "while [[ -e foo ]]; do sleep 1; done"
readWhileClause = called "while loop" $ do
    pos <- getPosition
    (T_While id) <- g_While
    condition <- readTerm
    statements <- readDoGroup pos
    return $ T_WhileExpression id condition statements

prop_readUntilClause = isOk readUntilClause "until kill -0 $PID; do sleep 1; done"
readUntilClause = called "until loop" $ do
    pos <- getPosition
    (T_Until id) <- g_Until
    condition <- readTerm
    statements <- readDoGroup pos
    return $ T_UntilExpression id condition statements

readDoGroup loopPos = do
    pos <- getPosition
    optional (do
                try . lookAhead $ g_Done
                parseProblemAt loopPos ErrorC 1057 "Did you forget the 'do' for this loop?")

    g_Do `orFail` do
        parseProblem ErrorC 1058 "Expected 'do'."
        return "Expected 'do'"

    acceptButWarn g_Semi ErrorC 1059 "No semicolons directly after 'do'."
    allspacing

    optional (do
                try . lookAhead $ g_Done
                parseProblemAt loopPos ErrorC 1060 "Can't have empty do clauses (use 'true' as a no-op).")

    commands <- readCompoundList
    g_Done `orFail` do
            parseProblemAt pos ErrorC 1061 "Couldn't find 'done' for this 'do'."
            parseProblem ErrorC 1062 "Expected 'done' matching previously mentioned 'do'."
            return "Expected 'done'"
    return commands


prop_readForClause = isOk readForClause "for f in *; do rm \"$f\"; done"
prop_readForClause3 = isOk readForClause "for f; do foo; done"
prop_readForClause4 = isOk readForClause "for((i=0; i<10; i++)); do echo $i; done"
prop_readForClause5 = isOk readForClause "for ((i=0;i<10 && n>x;i++,--n))\ndo \necho $i\ndone"
prop_readForClause6 = isOk readForClause "for ((;;))\ndo echo $i\ndone"
prop_readForClause7 = isOk readForClause "for ((;;)) do echo $i\ndone"
prop_readForClause8 = isOk readForClause "for ((;;)) ; do echo $i\ndone"
prop_readForClause9 = isOk readForClause "for i do true; done"
prop_readForClause10= isOk readForClause "for ((;;)) { true; }"
prop_readForClause12= isWarning readForClause "for $a in *; do echo \"$a\"; done"
readForClause = called "for loop" $ do
    pos <- getPosition
    (T_For id) <- g_For
    spacing
    readArithmetic id pos <|> readRegular id pos
  where
    readArithmetic id pos = called "arithmetic for condition" $ do
        try $ string "(("
        x <- readArithmeticContents
        char ';' >> spacing
        y <- readArithmeticContents
        char ';' >> spacing
        z <- readArithmeticContents
        spacing
        string "))"
        spacing
        optional $ readSequentialSep >> spacing
        group <- readBraced <|> readDoGroup pos
        return $ T_ForArithmetic id x y z group

    readBraced = do
        (T_BraceGroup _ list) <- readBraceGroup
        return list

    readRegular id pos = do
        acceptButWarn (char '$') ErrorC 1086
            "Don't use $ on the iterator name in for loops."
        name <- readVariableName `thenSkip` spacing
        values <- readInClause <|> (optional readSequentialSep >> return [])
        group <- readDoGroup pos
        return $ T_ForIn id name values group

prop_readSelectClause1 = isOk readSelectClause "select foo in *; do echo $foo; done"
prop_readSelectClause2 = isOk readSelectClause "select foo; do echo $foo; done"
readSelectClause = called "select loop" $ do
    pos <- getPosition
    (T_Select id) <- g_Select
    spacing
    typ <- readRegular
    group <- readDoGroup pos
    typ id group
  where
    readRegular = do
        name <- readVariableName
        spacing
        values <- readInClause <|> (readSequentialSep >> return [])
        return $ \id group -> (return $ T_SelectIn id name values group)

readInClause = do
    g_In
    things <- readCmdWord `reluctantlyTill`
                (disregard g_Semi <|> disregard linefeed <|> disregard g_Do)

    do {
        lookAhead g_Do;
        parseNote ErrorC 1063 "You need a line feed or semicolon before the 'do'.";
    } <|> do {
        optional g_Semi;
        disregard allspacing;
    }

    return things

prop_readCaseClause = isOk readCaseClause "case foo in a ) lol; cow;; b|d) fooo; esac"
prop_readCaseClause2 = isOk readCaseClause "case foo\n in * ) echo bar;; esac"
prop_readCaseClause3 = isOk readCaseClause "case foo\n in * ) echo bar & ;; esac"
prop_readCaseClause4 = isOk readCaseClause "case foo\n in *) echo bar ;& bar) foo; esac"
prop_readCaseClause5 = isOk readCaseClause "case foo\n in *) echo bar;;& foo) baz;; esac"
readCaseClause = called "case expression" $ do
    id <- getNextId
    g_Case
    word <- readNormalWord
    allspacing
    g_In <|> fail "Expected 'in'"
    readLineBreak
    list <- readCaseList
    g_Esac <|> fail "Expected 'esac' to close the case statement"
    return $ T_CaseExpression id word list

readCaseList = many readCaseItem

readCaseItem = called "case item" $ do
    notFollowedBy2 g_Esac
    optional g_Lparen
    spacing
    pattern <- readPattern
    void g_Rparen <|> do
        parseProblem ErrorC 1085
            "Did you forget to move the ;; after extending this case item?"
        fail "Expected ) to open a new case item"
    readLineBreak
    list <- (lookAhead readCaseSeparator >> return []) <|> readCompoundList
    separator <- readCaseSeparator `attempting` do
        pos <- getPosition
        lookAhead g_Rparen
        parseProblemAt pos ErrorC 1074
            "Did you forget the ;; after the previous case item?"
    readLineBreak
    return (separator, pattern, list)

readCaseSeparator = choice [
    tryToken ";;&" (const ()) >> return CaseContinue,
    tryToken ";&" (const ()) >> return CaseFallThrough,
    g_DSEMI >> return CaseBreak,
    lookAhead (readLineBreak >> g_Esac) >> return CaseBreak
    ]

prop_readFunctionDefinition = isOk readFunctionDefinition "foo() { command foo --lol \"$@\"; }"
prop_readFunctionDefinition1 = isOk readFunctionDefinition "foo   (){ command foo --lol \"$@\"; }"
prop_readFunctionDefinition4 = isWarning readFunctionDefinition "foo(a, b) { true; }"
prop_readFunctionDefinition5 = isOk readFunctionDefinition ":(){ :|:;}"
prop_readFunctionDefinition6 = isOk readFunctionDefinition "?(){ foo; }"
prop_readFunctionDefinition7 = isOk readFunctionDefinition "..(){ cd ..; }"
prop_readFunctionDefinition8 = isOk readFunctionDefinition "foo() (ls)"
prop_readFunctionDefinition9 = isOk readFunctionDefinition "function foo { true; }"
prop_readFunctionDefinition10= isOk readFunctionDefinition "function foo () { true; }"
prop_readFunctionDefinition11= isWarning readFunctionDefinition "function foo{\ntrue\n}"
readFunctionDefinition = called "function" $ do
    functionSignature <- try readFunctionSignature
    allspacing
    disregard (lookAhead $ oneOf "{(") <|> parseProblem ErrorC 1064 "Expected a { to open the function definition."
    group <- readBraceGroup <|> readSubshell
    return $ functionSignature group
  where
    readFunctionSignature =
        readWithFunction <|> readWithoutFunction
      where
        readWithFunction = do
            id <- getNextId
            try $ do
                string "function"
                whitespace
            spacing
            name <- readFunctionName
            spaces <- spacing
            hasParens <- wasIncluded readParens
            when (not hasParens && null spaces) $
                acceptButWarn (lookAhead (oneOf "{("))
                    ErrorC 1095 "You need a space or linefeed between the function name and body."
            return $ T_Function id (FunctionKeyword True) (FunctionParentheses hasParens) name

        readWithoutFunction = try $ do
            id <- getNextId
            name <- readFunctionName
            guard $ name /= "time"  -- Interfers with time ( foo )
            spacing
            readParens
            return $ T_Function id (FunctionKeyword False) (FunctionParentheses True) name

        readParens = do
            g_Lparen
            spacing
            g_Rparen <|> do
                parseProblem ErrorC 1065 "Trying to declare parameters? Don't. Use () and refer to params as $1, $2.."
                many $ noneOf "\n){"
                g_Rparen
            return ()

        readFunctionName = many1 functionChars

prop_readCoProc1 = isOk readCoProc "coproc foo { echo bar; }"
prop_readCoProc2 = isOk readCoProc "coproc { echo bar; }"
prop_readCoProc3 = isOk readCoProc "coproc echo bar"
readCoProc = called "coproc" $ do
    id <- getNextId
    try $ do
        string "coproc"
        whitespace
    choice [ try $ readCompoundCoProc id, readSimpleCoProc id ]
  where
    readCompoundCoProc id = do
        var <- optionMaybe $
            readVariableName `thenSkip` whitespace
        body <- readBody readCompoundCommand
        return $ T_CoProc id var body
    readSimpleCoProc id = do
        body <- readBody readSimpleCommand
        return $ T_CoProc id Nothing body
    readBody parser = do
        id <- getNextId
        body <- parser
        return $ T_CoProcBody id body


readPattern = (readNormalWord `thenSkip` spacing) `sepBy1` (char '|' `thenSkip` spacing)

prop_readCompoundCommand = isOk readCompoundCommand "{ echo foo; }>/dev/null"
readCompoundCommand = do
    id <- getNextId
    cmd <- choice [
        readBraceGroup,
        readTripleParenthesis "" readArithmeticExpression readSubshell,
        readArithmeticExpression,
        readSubshell,
        readCondition,
        readWhileClause,
        readUntilClause,
        readIfClause,
        readForClause,
        readSelectClause,
        readCaseClause,
        readFunctionDefinition
        ]
    spacing
    redirs <- many readIoRedirect
    unless (null redirs) $ optional $ do
        lookAhead $ try (spacing >> needsSeparator)
        parseProblem WarningC 1013 "Bash requires ; or \\n here, after redirecting nested compound commands."
    return $ T_Redirecting id redirs cmd
  where
    needsSeparator = choice [ g_Then, g_Else, g_Elif, g_Fi, g_Do, g_Done, g_Esac, g_Rbrace ]


readCompoundList = readTerm
readCompoundListOrEmpty = do
    allspacing
    readTerm <|> return []

readCmdPrefix = many1 (readIoRedirect <|> readAssignmentWord)
readCmdSuffix = many1 (readIoRedirect <|> readCmdWord)
readModifierSuffix = many1 (readIoRedirect <|> readAssignmentWord <|> readCmdWord)
readTimeSuffix = do
    flags <- many readFlag
    pipeline <- readPipeline
    return $ flags ++ [pipeline]
  where
    -- This fails for quoted variables and such. Fixme?
    readFlag = do
        lookAhead $ char '-'
        readCmdWord

-- Fixme: this is a hack that doesn't handle let c='4'"5" or let a\>b
readLetSuffix :: Monad m => SCParser m [Token]
readLetSuffix = many1 (readIoRedirect <|> try readLetExpression <|> readCmdWord)
  where
    readLetExpression :: Monad m => SCParser m Token
    readLetExpression = do
        startPos <- getPosition
        expression <- readStringForParser readCmdWord
        let (unQuoted, newPos) = kludgeAwayQuotes expression startPos
        subParse newPos readArithmeticContents unQuoted

    kludgeAwayQuotes :: String -> SourcePos -> (String, SourcePos)
    kludgeAwayQuotes s p =
        case s of
            first:rest@(_:_) ->
                let (last:backwards) = reverse rest
                    middle = reverse backwards
                in
                    if first `elem` "'\"" && first == last
                    then (middle, updatePosChar p first)
                    else (s, p)
            x -> (s, p)


-- bash allows a=(b), ksh allows $a=(b). dash allows neither. Let's warn.
readEvalSuffix = many1 (readIoRedirect <|> readCmdWord <|> evalFallback)
  where
    evalFallback = do
        pos <- getPosition
        lookAhead $ char '('
        parseProblemAt pos WarningC 1098 "Quote/escape special characters when using eval, e.g. eval \"a=(b)\"."
        fail "Unexpected parentheses. Make sure to quote when eval'ing as shell parsers differ."

-- Get whatever a parser would parse as a string
readStringForParser parser = do
    pos <- lookAhead (parser >> getPosition)
    readUntil pos
  where
    readUntil endPos = anyChar `reluctantlyTill` (getPosition >>= guard . (== endPos))

prop_readAssignmentWord = isOk readAssignmentWord "a=42"
prop_readAssignmentWord2 = isOk readAssignmentWord "b=(1 2 3)"
prop_readAssignmentWord3 = isWarning readAssignmentWord "$b = 13"
prop_readAssignmentWord4 = isWarning readAssignmentWord "b = $(lol)"
prop_readAssignmentWord5 = isOk readAssignmentWord "b+=lol"
prop_readAssignmentWord6 = isWarning readAssignmentWord "b += (1 2 3)"
prop_readAssignmentWord7 = isOk readAssignmentWord "a[3$n'']=42"
prop_readAssignmentWord8 = isOk readAssignmentWord "a[4''$(cat foo)]=42"
prop_readAssignmentWord9 = isOk readAssignmentWord "IFS= "
prop_readAssignmentWord9a= isOk readAssignmentWord "foo="
prop_readAssignmentWord9b= isOk readAssignmentWord "foo=  "
prop_readAssignmentWord9c= isOk readAssignmentWord "foo=  #bar"
prop_readAssignmentWord10= isWarning readAssignmentWord "foo$n=42"
prop_readAssignmentWord11= isOk readAssignmentWord "foo=([a]=b [c] [d]= [e f )"
prop_readAssignmentWord12= isOk readAssignmentWord "a[b <<= 3 + c]='thing'"
readAssignmentWord = try $ do
    id <- getNextId
    pos <- getPosition
    optional (char '$' >> parseNote ErrorC 1066 "Don't use $ on the left side of assignments.")
    variable <- readVariableName
    optional (readNormalDollar >> parseNoteAt pos ErrorC
                                1067 "For indirection, use (associative) arrays or 'read \"var$n\" <<< \"value\"'")
    index <- optionMaybe readArrayIndex
    hasLeftSpace <- liftM (not . null) spacing
    pos <- getPosition
    op <- readAssignmentOp
    hasRightSpace <- liftM (not . null) spacing
    isEndOfCommand <- liftM isJust $ optionMaybe (try . lookAhead $ (disregard (oneOf "\r\n;&|)") <|> eof))
    if not hasLeftSpace && (hasRightSpace || isEndOfCommand)
      then do
        when (variable /= "IFS" && hasRightSpace && not isEndOfCommand) $
            parseNoteAt pos WarningC 1007
                "Remove space after = if trying to assign a value (for empty string, use var='' ... )."
        value <- readEmptyLiteral
        return $ T_Assignment id op variable index value
      else do
        when (hasLeftSpace || hasRightSpace) $
            parseNoteAt pos ErrorC 1068 "Don't put spaces around the = in assignments."
        value <- readArray <|> readNormalWord
        spacing
        return $ T_Assignment id op variable index value
  where
    readAssignmentOp = do
        pos <- getPosition
        unexpecting "" $ string "==="
        choice [
            string "+=" >> return Append,
            do
                try (string "==")
                parseProblemAt pos ErrorC 1097
                    "Unexpected ==. For assignment, use =. For comparison, use [/[[."
                return Assign,

            string "=" >> return Assign
            ]
    readEmptyLiteral = do
        id <- getNextId
        return $ T_Literal id ""

readArrayIndex = do
    char '['
    optional space
    x <- readArithmeticContents
    char ']'
    return x

readArray = called "array assignment" $ do
    id <- getNextId
    char '('
    allspacing
    words <- readElement `reluctantlyTill` char ')'
    char ')' <|> fail "Expected ) to close array assignment"
    return $ T_Array id words
  where
    readElement = (readIndexed <|> readRegular) `thenSkip` allspacing
    readIndexed = do
        id <- getNextId
        index <- try $ do
            x <- readArrayIndex
            char '='
            return x
        value <- readNormalWord <|> nothing
        return $ T_IndexedElement id index value
    readRegular = readNormalWord

    nothing = do
        id <- getNextId
        return $ T_Literal id ""

tryToken s t = try $ do
    id <- getNextId
    string s
    spacing
    return $ t id

redirToken c t = try $ do
    id <- getNextId
    char c
    notFollowedBy2 $ char '('
    return $ t id

tryWordToken s t = tryParseWordToken s t `thenSkip` spacing
tryParseWordToken keyword t = try $ do
    id <- getNextId
    str <- anycaseString keyword

    optional $ do
        try . lookAhead $ char '['
        parseProblem ErrorC 1069 "You need a space before the [."
    optional $ do
        try . lookAhead $ char '#'
        parseProblem ErrorC 1099 "You need a space before the #."

    try $ lookAhead keywordSeparator
    when (str /= keyword) $
        parseProblem ErrorC 1081 $
            "Scripts are case sensitive. Use '" ++ keyword ++ "', not '" ++ str ++ "'."
    return $ t id

anycaseString =
    mapM anycaseChar
  where
    anycaseChar c = char (toLower c) <|> char (toUpper c)

g_AND_IF = tryToken "&&" T_AND_IF
g_OR_IF = tryToken "||" T_OR_IF
g_DSEMI = tryToken ";;" T_DSEMI
g_DLESS = tryToken "<<" T_DLESS
g_DGREAT = tryToken ">>" T_DGREAT
g_LESSAND = tryToken "<&" T_LESSAND
g_GREATAND = tryToken ">&" T_GREATAND
g_LESSGREAT = tryToken "<>" T_LESSGREAT
g_DLESSDASH = tryToken "<<-" T_DLESSDASH
g_CLOBBER = tryToken ">|" T_CLOBBER
g_OPERATOR = g_AND_IF <|> g_OR_IF <|> g_DSEMI <|> g_DLESSDASH <|> g_DLESS <|> g_DGREAT <|> g_LESSAND <|> g_GREATAND <|> g_LESSGREAT

g_If = tryWordToken "if" T_If
g_Then = tryWordToken "then" T_Then
g_Else = tryWordToken "else" T_Else
g_Elif = tryWordToken "elif" T_Elif
g_Fi = tryWordToken "fi" T_Fi
g_Do = tryWordToken "do" T_Do
g_Done = tryWordToken "done" T_Done
g_Case = tryWordToken "case" T_Case
g_Esac = tryWordToken "esac" T_Esac
g_While = tryWordToken "while" T_While
g_Until = tryWordToken "until" T_Until
g_For = tryWordToken "for" T_For
g_Select = tryWordToken "select" T_Select
g_In = tryWordToken "in" T_In
g_Lbrace = tryWordToken "{" T_Lbrace
g_Rbrace = do -- handled specially due to ksh echo "${ foo; }bar"
    id <- getNextId
    char '}'
    return $ T_Rbrace id

g_Lparen = tryToken "(" T_Lparen
g_Rparen = tryToken ")" T_Rparen
g_Bang = do
    id <- getNextId
    char '!'
    void spacing1 <|> do
        pos <- getPosition
        parseProblemAt pos ErrorC 1035
            "You are missing a required space after the !."
    return $ T_Bang id

g_Semi = do
    notFollowedBy2 g_DSEMI
    tryToken ";" T_Semi

keywordSeparator =
    eof <|> disregard whitespace <|> disregard (oneOf ";()[<>&|")

readKeyword = choice [ g_Then, g_Else, g_Elif, g_Fi, g_Do, g_Done, g_Esac, g_Rbrace, g_Rparen, g_DSEMI ]

ifParse p t f =
    (lookAhead (try p) >> t) <|> f

prop_readShebang1 = isOk readShebang "#!/bin/sh\n"
prop_readShebang2 = isWarning readShebang "!# /bin/sh\n"
prop_readShebang3 = isNotOk readShebang "#shellcheck shell=/bin/sh\n"
prop_readShebang4 = isWarning readShebang "! /bin/sh"
readShebang = do
    try readCorrect <|> try readSwapped <|> try readMissingHash
    many linewhitespace
    str <- many $ noneOf "\r\n"
    optional carriageReturn
    optional linefeed
    return str
  where
    readCorrect = void $ string "#!"
    readSwapped = do
        pos <- getPosition
        string "!#"
        parseProblemAt pos ErrorC 1084
            "Use #!, not !#, for the shebang."

    readMissingHash = do
        pos <- getPosition
        char '!'
        lookAhead $ do
            many linewhitespace
            char '/'
        parseProblemAt pos ErrorC 1104
            "Use #!, not just !, for the shebang."

verifyEof = eof <|> choice [
        ifParsable g_Lparen $
            parseProblem ErrorC 1088 "Parsing stopped here. Invalid use of parentheses?",

        ifParsable readKeyword $
            parseProblem ErrorC 1089 "Parsing stopped here. Is this keyword correctly matched up?",

        parseProblem ErrorC 1070 "Parsing stopped here. Mismatched keywords or invalid parentheses?"
    ]
  where
    ifParsable p action = do
        try (lookAhead p)
        action

prop_readScript1 = isOk readScript "#!/bin/bash\necho hello world\n"
prop_readScript2 = isWarning readScript "#!/bin/bash\r\necho hello world\n"
prop_readScript3 = isWarning readScript "#!/bin/bash\necho hello\xA0world"
prop_readScript4 = isWarning readScript "#!/usr/bin/perl\nfoo=("
prop_readScript5 = isOk readScript "#!/bin/bash\n#This is an empty script\n\n"
readScript = do
    id <- getNextId
    pos <- getPosition
    optional $ do
        readUtf8Bom
        parseProblem ErrorC 1082
            "This file has a UTF-8 BOM. Remove it with: LC_CTYPE=C sed '1s/^...//' < yourscript ."
    sb <- option "" readShebang
    verifyShell pos (getShell sb)
    if isValidShell (getShell sb) /= Just False
      then do
            annotationId <- getNextId
            annotations <- readAnnotations
            commands <- withAnnotations annotations readCompoundListOrEmpty
            verifyEof
            return $ T_Annotation annotationId annotations $  T_Script id sb commands
        else do
            many anyChar
            return $ T_Script id sb []

  where
    basename s = reverse . takeWhile (/= '/') . reverse $ s
    getShell sb =
        case words sb of
            [] -> ""
            [x] -> basename x
            (first:second:_) ->
                if basename first == "env"
                    then second
                    else basename first

    verifyShell pos s =
        case isValidShell s of
            Just True -> return ()
            Just False -> parseProblemAt pos ErrorC 1071 "ShellCheck only supports sh/bash/ksh scripts. Sorry!"
            Nothing -> parseProblemAt pos InfoC 1008 "This shebang was unrecognized. Note that ShellCheck only handles sh/bash/ksh."

    isValidShell s =
        let good = s == "" || any (`isPrefixOf` s) goodShells
            bad = any (`isPrefixOf` s) badShells
        in
            if good
                then Just True
                else if bad
                        then Just False
                        else Nothing

    goodShells = [
        "sh",
        "ash",
        "dash",
        "bash",
        "ksh"
        ]
    badShells = [
        "awk",
        "csh",
        "expect",
        "perl",
        "python",
        "ruby",
        "tcsh",
        "zsh"
        ]

    readUtf8Bom = called "Byte Order Mark" $ string "\xFEFF"


isWarning p s = parsesCleanly p s == Just False
isOk p s =      parsesCleanly p s == Just True
isNotOk p s =   parsesCleanly p s == Nothing

testParse string = runIdentity $ do
    (res, _) <- runParser (mockedSystemInterface []) readScript "-" string
    return res

parsesCleanly parser string = runIdentity $ do
    (res, sys) <- runParser (mockedSystemInterface [])
                    (parser >> eof >> getState) "-" string
    case (res, sys) of
        (Right userState, systemState) ->
            return $ Just . null $ parseNotes userState ++ parseProblems systemState
        (Left _, _) -> return Nothing

parseWithNotes parser = do
    item <- parser
    state <- getState
    return (item, state)

compareNotes (ParseNote pos1 pos1' level1 _ s1) (ParseNote pos2 pos2' level2 _ s2) = compare (pos1, pos1', level1) (pos2, pos2', level2)
sortNotes = sortBy compareNotes


makeErrorFor parsecError =
    ParseNote pos pos ErrorC 1072 $
        getStringFromParsec $ errorMessages parsecError
    where
      pos = errorPos parsecError

getStringFromParsec errors =
        case map f errors of
            r -> unwords (take 1 $ catMaybes $ reverse r)  ++
                " Fix any mentioned problems and try again."
    where
        f err =
            case err of
                UnExpect s    ->  Nothing -- Due to not knowing Parsec, none of these
                SysUnExpect s ->  Nothing -- are actually helpful. <?> has been hidden
                Expect s      ->  Nothing -- and we only show explicit fail statements.
                Message s     ->  if null s then Nothing else return $ s ++ "."

runParser :: Monad m =>
    SystemInterface m ->
    SCParser m v ->
    String ->
    String ->
    m (Either ParseError v, SystemState)

runParser sys p filename contents =
    Ms.runStateT
        (Mr.runReaderT
            (runParserT p initialUserState filename contents)
            sys)
        initialSystemState
system = lift . lift . lift

parseShell sys name contents = do
    (result, state) <- runParser sys (parseWithNotes readScript) name contents
    case result of
        Right (script, userstate) ->
            return ParseResult {
                prComments = map toPositionedComment $ nub $ parseNotes userstate ++ parseProblems state,
                prTokenPositions = Map.map posToPos (positionMap userstate),
                prRoot = Just $
                    reattachHereDocs script (hereDocMap userstate)
            }
        Left err ->
            return ParseResult {
                prComments =
                    map toPositionedComment $
                        notesForContext (contextStack state)
                        ++ [makeErrorFor err]
                        ++ parseProblems state,
                prTokenPositions = Map.empty,
                prRoot = Nothing
            }
  where
    isName (ContextName _ _) = True
    isName _ = False
    notesForContext list = zipWith ($) [first, second] $ filter isName list
    first (ContextName pos str) = ParseNote pos pos ErrorC 1073 $
        "Couldn't parse this " ++ str ++ "."
    second (ContextName pos str) = ParseNote pos pos InfoC 1009 $
        "The mentioned parser error was in this " ++ str ++ "."

reattachHereDocs root map =
    doTransform f root
  where
    f t@(T_HereDoc id dash quote string []) = fromMaybe t $ do
        list <- Map.lookup id map
        return $ T_HereDoc id dash quote string list
    f t = t

toPositionedComment :: ParseNote -> PositionedComment
toPositionedComment (ParseNote start end severity code message) =
    PositionedComment (posToPos start) (posToPos end) $ Comment severity code message

posToPos :: SourcePos -> Position
posToPos sp = Position {
    posFile = sourceName sp,
    posLine = fromIntegral $ sourceLine sp,
    posColumn = fromIntegral $ sourceColumn sp
}

-- TODO: Clean up crusty old code that this is layered on top of
parseScript :: Monad m =>
        SystemInterface m -> ParseSpec -> m ParseResult
parseScript sys spec =
    parseShell sys (psFilename spec) (psScript spec)


lt x = trace (show x) x
ltt t = trace (show t)

return []
runTests = $quickCheckAll