{- | 
  Module      :  CGILight
  Copyright   :  (c) The University of Glasgow 2001
                 (c) Peter Thiemann 2001,2002
                 (c) Bjorn Bringert 2004-2006
  License     :  BSD-style

  Maintainer  :  bjorn@bringert.net
  Stability   :  experimental
  Portability :  portable


  This is a simple CGI library for teaching purposes.
  It does not support file uploads, custom error handling, or
  ByteString output.
  Use the cgi package,
  <http://www.cs.chalmers.se/~bringert/darcs/haskell-cgi/doc/>, 
  when you need any of those.

  A simple example:

@
import CGILight

cgiMain :: Environment -> Inputs -> IO (Headers,String)
cgiMain env input = return (hs, output)
  where 
  hs = [("Content-type","text/html")]
  output = "<html><body><h1>Hello World!</h1></body></html>"

main :: IO ()
main = runCGI cgiMain
@

  This module may include code by 
  Erik Meijer <mailto:erik@cs.ruu.nl>,
  Sven Panne <mailto:sven.panne@aedion.de>,
  Andy Gill <mailto:andy@galconn.com>,
  and Peter Thiemann.

-}
module CGILight (
                 -- * Types
                 Environment, Inputs, Headers, Body,
                 -- * Running CGI programs
                 runCGI,
                 -- * CGI results
                 output, outputHtml, redirect,
                 -- * Error handling
                 handleErrors,
                 -- * Error pages
                 outputError, outputException,
                 outputInternalServerError,
                 -- * Environment
                 serverName, serverPort,
                 requestMethod, pathInfo,
                 pathTranslated, scriptName,
                 queryString,
                 remoteHost, remoteAddr,
                 authType, remoteUser,
                 requestContentType, requestContentLength,
                 requestHeader,
                 -- * Program and request URI
                 progURI, queryURI, requestURI,
                 -- * Content type
                 ContentType(..), showContentType, parseContentType,
                 -- * URL encoding
                 formEncode, urlEncode, formDecode, urlDecode,
                ) where

import Control.Exception as Exception hiding (try)
import Data.Char
import Data.List
import Data.Maybe
import Network.URI
import System.Environment
import System.IO
import System.IO.Error hiding (try)
import Text.ParserCombinators.Parsec
import Text.XHtml.Strict


type Environment = [(String,String)]
type Inputs      = [(String,String)]

type Header      = (String, String)
type Headers     = [Header]
type Body        = String

runCGI :: (Environment -> Inputs -> IO (Headers,Body)) -> IO ()
runCGI f = 
    do env         <- getCgiVars
       input       <- hGetContents stdin
       let inputs  = decodeInput env input
       (hs,output) <- f env inputs
       mapM_ (hPutStrLn stdout . formatHeader) hs
       hPutStrLn stdout ""
       hPutStr stdout output
       hFlush stdout
  where formatHeader (n,v) = n ++ ": " ++ v

redirect :: URL -> (Headers,Body)
redirect url = ([("Location", url)], "")

output :: ContentType -> Body -> (Headers,Body)
output t body = ([("Content-type", showContentType t)], body)

outputHtml :: Html -> (Headers,Body)
outputHtml html = output (ContentType "text" "html" []) (renderHtml html)

--
-- * CGI environment
--

-- | The server\'s hostname, DNS alias, or IP address as it would 
--   appear in self-referencing URLs.
serverName :: Environment -> String
serverName = getVarWithDefault "SERVER_NAME" ""

-- | The port number to which the request was sent.
serverPort :: Environment -> Int
serverPort env = fromMaybe 80 (lookup "SERVER_PORT" env >>= maybeRead)

-- |  The method with which the request was made. 
--    For HTTP, this is \"GET\", \"HEAD\", \"POST\", etc.
requestMethod :: Environment -> String
requestMethod = getVarWithDefault "REQUEST_METHOD" "GET"

-- | The extra path information, as given by the client.
--   This is any part of the request path that follows the
--   CGI program path.
--   If the string returned by this function is not empty,
--   it is guaranteed to start with a @\'\/\'@.
pathInfo :: Environment -> String
pathInfo env = slash $ getVarWithDefault "PATH_INFO" "" env
  where slash s = if not (null s) && head s /= '/' then '/':s else s

-- | The path returned by 'pathInfo', but with any virtual-to-physical
--   mapping applied to it.
pathTranslated :: Environment -> String
pathTranslated = getVarWithDefault "PATH_TRANSLATED" ""

-- | A virtual path to the script being executed, 
--   used for self-referencing URLs.
scriptName :: Environment -> String
scriptName = getVarWithDefault "SCRIPT_NAME" ""

-- | The information which follows the ? in the URL which referenced 
--   this program. This is the encoded query information.
--   For most normal uses, 'getInput' and friends are probably
--   more convenient.
queryString :: Environment -> String
queryString = getVarWithDefault "QUERY_STRING" ""

-- | The hostname making the request. If the server does not have
--   this information, Nothing is returned. See also 'remoteAddr'.
remoteHost :: Environment -> Maybe String
remoteHost = lookup "REMOTE_HOST"

-- | The IP address of the remote host making the request.
remoteAddr :: Environment -> String
remoteAddr = getVarWithDefault "REMOTE_ADDR" ""

-- | If the server supports user authentication, and the script is 
-- protected, this is the protocol-specific authentication method 
-- used to validate the user.
authType :: Environment -> Maybe String
authType = lookup "AUTH_TYPE"

-- | If the server supports user authentication, and the script is 
--   protected, this is the username they have authenticated as.
remoteUser :: Environment -> Maybe String
remoteUser = lookup "REMOTE_USER"

-- | For queries which have attached information, such as 
--   HTTP POST and PUT, this is the content type of the data.
--   You can use 'parseContentType' to get a structured
--   representation of the the content-type value.
requestContentType :: Environment -> Maybe String
requestContentType = lookup "CONTENT_TYPE"

-- | For queries which have attached information, such as 
--   HTTP POST and PUT, this is the length of the content 
--   given by the client.
requestContentLength :: Environment -> Maybe Int
requestContentLength env = lookup "CONTENT_LENGTH" env >>= maybeRead

-- | Gets the value of the request header with the given name.
--   The header name is case-insensitive.
--   Example:
--
-- > requestHeader "User-Agent"
requestHeader :: Environment -> String -> Maybe String
requestHeader env name = lookup var env
  where var = "HTTP_" ++ map toUpper (replace '-' '_' name)



--
-- * Program and request URI
--

-- | Attempts to reconstruct the absolute URI of this program. 
--   This does not include
--   any extra path information or query parameters. See
--   'queryURI' for that.
--   If the server is rewriting request URIs, this URI can
--   be different from the one requested by the client.
--   See also 'requestURI'.
progURI :: Environment -> URI
progURI env =
    let host = serverName env
        port = serverPort env
        name = scriptName env
        scheme = if port == 443 then "https:" else "http:"
        auth = URIAuth { uriUserInfo = "",
                         uriRegName = host,
                         uriPort = if port == 80 || port == 443 
                                      then "" else ":"++show port }
     in nullURI { uriScheme = scheme, 
                  uriAuthority = Just auth,
                  uriPath = name }

-- | Like 'progURI', but the returned 'URI' also includes
--   any extra path information, and any query parameters.
--   If the server is rewriting request URIs, this URI can
--   be different from the one requested by the client.
--   See also 'requestURI'.
queryURI :: Environment -> URI
queryURI env = 
    let uri  = progURI env
        path = pathInfo env
        qs   = (\q -> if null q then q else '?':q) $ queryString env
     in uri { uriPath = uriPath uri ++ path, uriQuery = qs } 

-- | Attempts to reconstruct the absolute URI requested by the client,
--   including extra path information and query parameters.
--   If no request URI rewriting is done, or if the web server does not
--   provide the information needed to reconstruct the request URI,
--   this function returns the same value as 'queryURI'.
requestURI :: Environment -> URI
requestURI env =
    let uri = queryURI env
       -- Apache sets REQUEST_URI to the original request URI
        mreq = lookup "REQUEST_URI" env >>= parseRelativeReference
     in case mreq of
                 Nothing  -> uri
                 Just req -> uri { 
                                  uriPath  = uriPath req,
                                  uriQuery = uriQuery req
                                 }

--
-- * Error handling
--

-- | Catches any exception thrown by the given action,
--   returns an error page with a 500 Internal Server Error,
--   showing the exception information, and logs the error.
--   
--   Typical usage:
--
-- > cgiMain :: Environment -> Inputs -> IO (Headers,Body)
-- > cgiMain = ...
-- >
-- > main :: IO ()
-- > main = runCGI (handleErrors cgiMain)
handleErrors :: (Environment -> Inputs -> IO (Headers,Body))
             -> Environment -> Inputs -> IO (Headers,Body)
handleErrors a env inputs = a env inputs `Exception.catch` outputException env

--
-- * Error output
--

-- | Output a 500 Internal Server Error with information from
--   an 'Exception'.
outputException :: Environment -> Exception -> IO (Headers,Body)
outputException env e = outputInternalServerError env es
    where es = case e of
                 ErrorCall msg  -> [msg]
                 IOException ie -> ioe ie
                 _              -> [show e]
          ioe ie = if isUserError ie then [ioeGetErrorString ie] else [show ie]

-- | Output an error page to the user, with the given
--   HTTP status code in the response. Also logs the error information
--   to 'stderr'.
outputError :: Environment
            -> Int      -- ^ HTTP Status code
            -> String   -- ^ Status message
            -> [String] -- ^ Error information
            -> IO (Headers,Body)
outputError env c m es = 
      do hPutStrLn stderr $ show (c,m,es)
         return ([("Status", show c ++ " " ++ m), 
                  ("Content-type", "text/html; charset=ISO-8859-1")], 
                 renderHtml $ errorPage env c m es)

-- | Create an HTML error page.
errorPage :: Environment
          -> Int      -- ^ Status code
          -> String   -- ^ Status message
          -> [String] -- ^ Error information
          -> Html
errorPage env c m es = 
    let server = fromMaybe "" $ lookup "SERVER_SOFTWARE" env
        host   = serverName env
        port   = show $ serverPort env
        tit = show c ++ " " ++ m
        sig = "Haskell CGILight" 
              ++ " on " ++ server
              ++ " at " ++ host ++ ", port "++ port
     in header << thetitle << tit 
          +++ body << (h1 << tit +++ map (paragraph <<) es 
                       +++ hr +++ address << sig)


-- | Use 'outputError' to output and log a 500 Internal Server Error.
outputInternalServerError :: Environment
                          -> [String] -- ^ Error information.
                          -> IO (Headers,Body)
outputInternalServerError env es = outputError env 500 "Internal Server Error" es


--
-- * Internals
--

getVarWithDefault :: String -- ^ The name of the variable.
                  -> String -- ^ Default value 
                  -> Environment
                  -> String
getVarWithDefault name def env = fromMaybe def $ lookup name env


-- This ought to be getEnvironment, but olders versions
-- of System.Environment don't have that.
getCgiVars :: IO [(String,String)]
getCgiVars = do vals <- mapM myGetEnv cgiVarNames
                return (zip cgiVarNames vals)

myGetEnv :: String -> IO String
myGetEnv v = Prelude.catch (getEnv v) (const (return ""))

cgiVarNames :: [String]
cgiVarNames =
   [ "DOCUMENT_ROOT"
   , "AUTH_TYPE"
   , "GATEWAY_INTERFACE"
   , "SERVER_SOFTWARE"
   , "SERVER_NAME"
   , "REQUEST_METHOD"
   , "SERVER_ADMIN"
   , "SERVER_PORT"
   , "QUERY_STRING"
   , "CONTENT_LENGTH"
   , "CONTENT_TYPE"
   , "REMOTE_USER"
   , "REMOTE_IDENT"
   , "REMOTE_ADDR"
   , "REMOTE_HOST"
   , "TZ"
   , "PATH"
   , "PATH_INFO"
   , "PATH_TRANSLATED"
   , "SCRIPT_NAME"
   , "SCRIPT_FILENAME"
   , "SCRIPT_URI"
   , "SCRIPT_URL"
   , "HTTP_CONNECTION"
   , "HTTP_ACCEPT_LANGUAGE"
   , "HTTP_ACCEPT"
   , "HTTP_HOST"
   , "HTTP_UA_COLOR"
   , "HTTP_UA_CPU"
   , "HTTP_UA_OS"
   , "HTTP_UA_PIXELS"
   , "HTTP_USER_AGENT"
   ]







--
-- * Inputs
--

-- | Get and decode the input according to the request
--   method and the content-type.
decodeInput :: [(String,String)] -- ^ CGI environment variables.
            -> String        -- ^ Request body.
            -> Inputs  -- ^ Input variables and values.
decodeInput env inp = queryInput env ++ bodyInput env inp


--
-- * Query string
--

-- | Get inputs from the query string.
queryInput :: [(String,String)] -- ^ CGI environment variables.
           -> Inputs -- ^ Input variables and values.
queryInput env = formInput $ lookupOrNil "QUERY_STRING" env

-- | Decode application\/x-www-form-urlencoded inputs.
formInput :: String
          -> Inputs -- ^ Input variables and values.
formInput qs = [(n, v) | (n,v) <- formDecode qs]


--
-- * URL encoding
--

-- | Format name-value pairs as application\/x-www-form-urlencoded.
formEncode :: [(String,String)] -> String
formEncode xs = 
    concat $ intersperse "&" [urlEncode n ++ "=" ++ urlEncode v | (n,v) <- xs]

-- | Convert a single value to the application\/x-www-form-urlencoded encoding.
urlEncode :: String -> String
urlEncode = replace ' ' '+' . escapeURIString okChar
  where okChar c = c == ' ' || 
                   (isUnescapedInURI c && c `notElem` "&=+")

-- | Get the name-value pairs from application\/x-www-form-urlencoded data.
formDecode :: String -> [(String,String)]
formDecode "" = []
formDecode s = (urlDecode n, urlDecode (drop 1 v)) : formDecode (drop 1 rs)
    where (nv,rs) = break (=='&') s
          (n,v) = break (=='=') nv

-- | Convert a single value from the 
--   application\/x-www-form-urlencoded encoding.
urlDecode :: String -> String
urlDecode = unEscapeString . replace '+' ' '



--
-- * Request content
--

-- | Get input variables from the body, if any.
bodyInput :: [(String,String)] -- ^ CGI environment variables.
          -> String        -- ^ Request body.
          -> Inputs -- ^ Input variables and values.
bodyInput env inp =
   case lookup "REQUEST_METHOD" env of
      Just "POST" -> 
          let ctype = lookup "CONTENT_TYPE" env >>= parseContentType
           in decodeBody ctype $ takeInput env inp
      _ -> []

-- | Decode a POST body.
decodeBody :: Maybe ContentType -- ^ Content-type, if any
           -> String        -- ^ Request body
           -> Inputs  -- ^ Input variables and values.
decodeBody ctype inp = 
    case ctype of
               Just (ContentType "application" "x-www-form-urlencoded" _) 
                   -> formInput inp
               Just _ -> [] -- unknown content-type, the user will have to
                            -- deal with it by looking at the raw content
               -- No content-type given, assume x-www-form-urlencoded
               Nothing -> formInput inp

-- | Take the right number of bytes from the input.
takeInput :: [(String,String)]  -- ^ CGI environment variables.
          -> String         -- ^ Request body.
          -> String         -- ^ CONTENT_LENGTH bytes from the request 
                            --   body, or the empty string if there is no
                            --   CONTENT_LENGTH.
takeInput env req = 
    case len of
           Just l  -> take l req
           Nothing -> ""
     where len = lookup "CONTENT_LENGTH" env >>= maybeRead



--
-- * Utilities
--

-- | Replace all instances of a value in a list by another value.
replace :: Eq a =>
           a   -- ^ Value to look for
        -> a   -- ^ Value to replace it with
        -> [a] -- ^ Input list
        -> [a] -- ^ Output list
replace x y = map (\z -> if z == x then y else z)

maybeRead :: Read a => String -> Maybe a
maybeRead = fmap fst . listToMaybe . reads

-- | Same as 'lookup' specialized to strings, but 
--   returns the empty string if lookup fails.
lookupOrNil :: String -> [(String,String)] -> String
lookupOrNil n = fromMaybe "" . lookup n




--
-- * Parameters (for Content-type etc.)
--

showParameters :: [(String,String)] -> String
showParameters = concatMap f
    where f (n,v) = "; " ++ n ++ "=\"" ++ concatMap esc v ++ "\""
          esc '\\' = "\\\\"
          esc '"'  = "\\\""
          esc c | c `elem` ['\\','"'] = '\\':[c]
                | otherwise = [c]

p_parameter :: Parser (String,String)
p_parameter =
  do lexeme $ char ';'
     p_name <- lexeme $ p_token
     lexeme $ char '='
     -- Workaround for seemingly standardized web browser bug
     -- where nothing is escaped in the filename parameter
     -- of the content-disposition header in multipart/form-data
     let litStr = if p_name == "filename" 
                   then buggyLiteralString
                   else literalString
     p_value <- litStr <|> p_token
     return (map toLower p_name, p_value)

-- 
-- * Content type
--

data ContentType = 
	ContentType String String [(String, String)]
    deriving (Show, Read, Eq, Ord)

-- | Produce the standard string representation of a content-type,
--   e.g. \"text\/html; charset=ISO-8859-1\".
showContentType :: ContentType -> String
showContentType (ContentType x y ps) = x ++ "/" ++ y ++ showParameters ps

pContentType :: Parser ContentType
pContentType = 
  do many ws1
     c_type <- p_token
     lexeme $ char '/'
     c_subtype <- lexeme $ p_token
     c_parameters <- many p_parameter
     return $ ContentType (map toLower c_type) (map toLower c_subtype) c_parameters

parseContentType :: Monad m => String -> m ContentType
parseContentType = parseM pContentType "Content-type"

--
-- * Utilities
--

parseM :: Monad m => Parser a -> SourceName -> String -> m a
parseM p n inp =
  case parse p n inp of
    Left e -> fail (show e)
    Right x -> return x

-- 
-- * Parsing utilities
--

-- | RFC 822 LWSP-char
ws1 :: Parser Char
ws1 = oneOf " \t"

lexeme :: Parser a -> Parser a
lexeme p = do x <- p; many ws1; return x

literalString :: Parser String
literalString = do char '\"'
		   str <- many (noneOf "\"\\" <|> quoted_pair)
		   char '\"'
		   return str

-- No web browsers seem to implement RFC 2046 correctly,
-- since they do not escape double quotes and backslashes
-- in the filename parameter in multipart/form-data.
--
-- Note that this eats everything until the last double quote on the line.
buggyLiteralString :: Parser String
buggyLiteralString = 
    do char '\"'
       str <- manyTill anyChar (try lastQuote)
       return str
  where lastQuote = do char '\"' 
                       notFollowedBy (try (many (noneOf "\"") >> char '\"'))

especials, tokenchar :: [Char]
especials = "()<>@,;:\\\"/[]?.="
tokenchar = "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" \\ especials

p_token :: Parser String
p_token = many1 (oneOf tokenchar)

text_chars :: [Char]
text_chars = map chr ([1..9] ++ [11,12] ++ [14..127])

p_text :: Parser Char
p_text = oneOf text_chars

quoted_pair :: Parser Char
quoted_pair = do char '\\'
		 p_text