{- | Module : CGILight Copyright : (c) The University of Glasgow 2001 (c) Peter Thiemann 2001,2002 (c) Bjorn Bringert 2004-2006 License : BSD-style Maintainer : bjorn@bringert.net Stability : experimental Portability : portable This is a simple CGI library for teaching purposes. It does not support file uploads, custom error handling, or ByteString output. Use the cgi package, , when you need any of those. A simple example: @ import CGILight cgiMain :: Environment -> Inputs -> IO (Headers,String) cgiMain env input = return (hs, output) where hs = [("Content-type","text/html")] output = "

Hello World!

" main :: IO () main = runCGI cgiMain @ This module may include code by Erik Meijer , Sven Panne , Andy Gill , and Peter Thiemann. -} module CGILight ( -- * Types Environment, Inputs, Headers, Body, -- * Running CGI programs runCGI, -- * CGI results output, outputHtml, redirect, -- * Error handling handleErrors, -- * Error pages outputError, outputException, outputInternalServerError, -- * Environment serverName, serverPort, requestMethod, pathInfo, pathTranslated, scriptName, queryString, remoteHost, remoteAddr, authType, remoteUser, requestContentType, requestContentLength, requestHeader, -- * Program and request URI progURI, queryURI, requestURI, -- * Content type ContentType(..), showContentType, parseContentType, -- * URL encoding formEncode, urlEncode, formDecode, urlDecode, ) where import Control.Exception as Exception hiding (try) import Data.Char import Data.List import Data.Maybe import Network.URI import System.Environment import System.IO import System.IO.Error hiding (try) import Text.ParserCombinators.Parsec import Text.XHtml.Strict type Environment = [(String,String)] type Inputs = [(String,String)] type Header = (String, String) type Headers = [Header] type Body = String runCGI :: (Environment -> Inputs -> IO (Headers,Body)) -> IO () runCGI f = do env <- getCgiVars input <- hGetContents stdin let inputs = decodeInput env input (hs,output) <- f env inputs mapM_ (hPutStrLn stdout . formatHeader) hs hPutStrLn stdout "" hPutStr stdout output hFlush stdout where formatHeader (n,v) = n ++ ": " ++ v redirect :: URL -> (Headers,Body) redirect url = ([("Location", url)], "") output :: ContentType -> Body -> (Headers,Body) output t body = ([("Content-type", showContentType t)], body) outputHtml :: Html -> (Headers,Body) outputHtml html = output (ContentType "text" "html" []) (renderHtml html) -- -- * CGI environment -- -- | The server\'s hostname, DNS alias, or IP address as it would -- appear in self-referencing URLs. serverName :: Environment -> String serverName = getVarWithDefault "SERVER_NAME" "" -- | The port number to which the request was sent. serverPort :: Environment -> Int serverPort env = fromMaybe 80 (lookup "SERVER_PORT" env >>= maybeRead) -- | The method with which the request was made. -- For HTTP, this is \"GET\", \"HEAD\", \"POST\", etc. requestMethod :: Environment -> String requestMethod = getVarWithDefault "REQUEST_METHOD" "GET" -- | The extra path information, as given by the client. -- This is any part of the request path that follows the -- CGI program path. -- If the string returned by this function is not empty, -- it is guaranteed to start with a @\'\/\'@. pathInfo :: Environment -> String pathInfo env = slash $ getVarWithDefault "PATH_INFO" "" env where slash s = if not (null s) && head s /= '/' then '/':s else s -- | The path returned by 'pathInfo', but with any virtual-to-physical -- mapping applied to it. pathTranslated :: Environment -> String pathTranslated = getVarWithDefault "PATH_TRANSLATED" "" -- | A virtual path to the script being executed, -- used for self-referencing URLs. scriptName :: Environment -> String scriptName = getVarWithDefault "SCRIPT_NAME" "" -- | The information which follows the ? in the URL which referenced -- this program. This is the encoded query information. -- For most normal uses, 'getInput' and friends are probably -- more convenient. queryString :: Environment -> String queryString = getVarWithDefault "QUERY_STRING" "" -- | The hostname making the request. If the server does not have -- this information, Nothing is returned. See also 'remoteAddr'. remoteHost :: Environment -> Maybe String remoteHost = lookup "REMOTE_HOST" -- | The IP address of the remote host making the request. remoteAddr :: Environment -> String remoteAddr = getVarWithDefault "REMOTE_ADDR" "" -- | If the server supports user authentication, and the script is -- protected, this is the protocol-specific authentication method -- used to validate the user. authType :: Environment -> Maybe String authType = lookup "AUTH_TYPE" -- | If the server supports user authentication, and the script is -- protected, this is the username they have authenticated as. remoteUser :: Environment -> Maybe String remoteUser = lookup "REMOTE_USER" -- | For queries which have attached information, such as -- HTTP POST and PUT, this is the content type of the data. -- You can use 'parseContentType' to get a structured -- representation of the the content-type value. requestContentType :: Environment -> Maybe String requestContentType = lookup "CONTENT_TYPE" -- | For queries which have attached information, such as -- HTTP POST and PUT, this is the length of the content -- given by the client. requestContentLength :: Environment -> Maybe Int requestContentLength env = lookup "CONTENT_LENGTH" env >>= maybeRead -- | Gets the value of the request header with the given name. -- The header name is case-insensitive. -- Example: -- -- > requestHeader "User-Agent" requestHeader :: Environment -> String -> Maybe String requestHeader env name = lookup var env where var = "HTTP_" ++ map toUpper (replace '-' '_' name) -- -- * Program and request URI -- -- | Attempts to reconstruct the absolute URI of this program. -- This does not include -- any extra path information or query parameters. See -- 'queryURI' for that. -- If the server is rewriting request URIs, this URI can -- be different from the one requested by the client. -- See also 'requestURI'. progURI :: Environment -> URI progURI env = let host = serverName env port = serverPort env name = scriptName env scheme = if port == 443 then "https:" else "http:" auth = URIAuth { uriUserInfo = "", uriRegName = host, uriPort = if port == 80 || port == 443 then "" else ":"++show port } in nullURI { uriScheme = scheme, uriAuthority = Just auth, uriPath = name } -- | Like 'progURI', but the returned 'URI' also includes -- any extra path information, and any query parameters. -- If the server is rewriting request URIs, this URI can -- be different from the one requested by the client. -- See also 'requestURI'. queryURI :: Environment -> URI queryURI env = let uri = progURI env path = pathInfo env qs = (\q -> if null q then q else '?':q) $ queryString env in uri { uriPath = uriPath uri ++ path, uriQuery = qs } -- | Attempts to reconstruct the absolute URI requested by the client, -- including extra path information and query parameters. -- If no request URI rewriting is done, or if the web server does not -- provide the information needed to reconstruct the request URI, -- this function returns the same value as 'queryURI'. requestURI :: Environment -> URI requestURI env = let uri = queryURI env -- Apache sets REQUEST_URI to the original request URI mreq = lookup "REQUEST_URI" env >>= parseRelativeReference in case mreq of Nothing -> uri Just req -> uri { uriPath = uriPath req, uriQuery = uriQuery req } -- -- * Error handling -- -- | Catches any exception thrown by the given action, -- returns an error page with a 500 Internal Server Error, -- showing the exception information, and logs the error. -- -- Typical usage: -- -- > cgiMain :: Environment -> Inputs -> IO (Headers,Body) -- > cgiMain = ... -- > -- > main :: IO () -- > main = runCGI (handleErrors cgiMain) handleErrors :: (Environment -> Inputs -> IO (Headers,Body)) -> Environment -> Inputs -> IO (Headers,Body) handleErrors a env inputs = a env inputs `Exception.catch` outputException env -- -- * Error output -- -- | Output a 500 Internal Server Error with information from -- an 'Exception'. outputException :: Environment -> Exception -> IO (Headers,Body) outputException env e = outputInternalServerError env es where es = case e of ErrorCall msg -> [msg] IOException ie -> ioe ie _ -> [show e] ioe ie = if isUserError ie then [ioeGetErrorString ie] else [show ie] -- | Output an error page to the user, with the given -- HTTP status code in the response. Also logs the error information -- to 'stderr'. outputError :: Environment -> Int -- ^ HTTP Status code -> String -- ^ Status message -> [String] -- ^ Error information -> IO (Headers,Body) outputError env c m es = do hPutStrLn stderr $ show (c,m,es) return ([("Status", show c ++ " " ++ m), ("Content-type", "text/html; charset=ISO-8859-1")], renderHtml $ errorPage env c m es) -- | Create an HTML error page. errorPage :: Environment -> Int -- ^ Status code -> String -- ^ Status message -> [String] -- ^ Error information -> Html errorPage env c m es = let server = fromMaybe "" $ lookup "SERVER_SOFTWARE" env host = serverName env port = show $ serverPort env tit = show c ++ " " ++ m sig = "Haskell CGILight" ++ " on " ++ server ++ " at " ++ host ++ ", port "++ port in header << thetitle << tit +++ body << (h1 << tit +++ map (paragraph <<) es +++ hr +++ address << sig) -- | Use 'outputError' to output and log a 500 Internal Server Error. outputInternalServerError :: Environment -> [String] -- ^ Error information. -> IO (Headers,Body) outputInternalServerError env es = outputError env 500 "Internal Server Error" es -- -- * Internals -- getVarWithDefault :: String -- ^ The name of the variable. -> String -- ^ Default value -> Environment -> String getVarWithDefault name def env = fromMaybe def $ lookup name env -- This ought to be getEnvironment, but olders versions -- of System.Environment don't have that. getCgiVars :: IO [(String,String)] getCgiVars = do vals <- mapM myGetEnv cgiVarNames return (zip cgiVarNames vals) myGetEnv :: String -> IO String myGetEnv v = Prelude.catch (getEnv v) (const (return "")) cgiVarNames :: [String] cgiVarNames = [ "DOCUMENT_ROOT" , "AUTH_TYPE" , "GATEWAY_INTERFACE" , "SERVER_SOFTWARE" , "SERVER_NAME" , "REQUEST_METHOD" , "SERVER_ADMIN" , "SERVER_PORT" , "QUERY_STRING" , "CONTENT_LENGTH" , "CONTENT_TYPE" , "REMOTE_USER" , "REMOTE_IDENT" , "REMOTE_ADDR" , "REMOTE_HOST" , "TZ" , "PATH" , "PATH_INFO" , "PATH_TRANSLATED" , "SCRIPT_NAME" , "SCRIPT_FILENAME" , "SCRIPT_URI" , "SCRIPT_URL" , "HTTP_CONNECTION" , "HTTP_ACCEPT_LANGUAGE" , "HTTP_ACCEPT" , "HTTP_HOST" , "HTTP_UA_COLOR" , "HTTP_UA_CPU" , "HTTP_UA_OS" , "HTTP_UA_PIXELS" , "HTTP_USER_AGENT" ] -- -- * Inputs -- -- | Get and decode the input according to the request -- method and the content-type. decodeInput :: [(String,String)] -- ^ CGI environment variables. -> String -- ^ Request body. -> Inputs -- ^ Input variables and values. decodeInput env inp = queryInput env ++ bodyInput env inp -- -- * Query string -- -- | Get inputs from the query string. queryInput :: [(String,String)] -- ^ CGI environment variables. -> Inputs -- ^ Input variables and values. queryInput env = formInput $ lookupOrNil "QUERY_STRING" env -- | Decode application\/x-www-form-urlencoded inputs. formInput :: String -> Inputs -- ^ Input variables and values. formInput qs = [(n, v) | (n,v) <- formDecode qs] -- -- * URL encoding -- -- | Format name-value pairs as application\/x-www-form-urlencoded. formEncode :: [(String,String)] -> String formEncode xs = concat $ intersperse "&" [urlEncode n ++ "=" ++ urlEncode v | (n,v) <- xs] -- | Convert a single value to the application\/x-www-form-urlencoded encoding. urlEncode :: String -> String urlEncode = replace ' ' '+' . escapeURIString okChar where okChar c = c == ' ' || (isUnescapedInURI c && c `notElem` "&=+") -- | Get the name-value pairs from application\/x-www-form-urlencoded data. formDecode :: String -> [(String,String)] formDecode "" = [] formDecode s = (urlDecode n, urlDecode (drop 1 v)) : formDecode (drop 1 rs) where (nv,rs) = break (=='&') s (n,v) = break (=='=') nv -- | Convert a single value from the -- application\/x-www-form-urlencoded encoding. urlDecode :: String -> String urlDecode = unEscapeString . replace '+' ' ' -- -- * Request content -- -- | Get input variables from the body, if any. bodyInput :: [(String,String)] -- ^ CGI environment variables. -> String -- ^ Request body. -> Inputs -- ^ Input variables and values. bodyInput env inp = case lookup "REQUEST_METHOD" env of Just "POST" -> let ctype = lookup "CONTENT_TYPE" env >>= parseContentType in decodeBody ctype $ takeInput env inp _ -> [] -- | Decode a POST body. decodeBody :: Maybe ContentType -- ^ Content-type, if any -> String -- ^ Request body -> Inputs -- ^ Input variables and values. decodeBody ctype inp = case ctype of Just (ContentType "application" "x-www-form-urlencoded" _) -> formInput inp Just _ -> [] -- unknown content-type, the user will have to -- deal with it by looking at the raw content -- No content-type given, assume x-www-form-urlencoded Nothing -> formInput inp -- | Take the right number of bytes from the input. takeInput :: [(String,String)] -- ^ CGI environment variables. -> String -- ^ Request body. -> String -- ^ CONTENT_LENGTH bytes from the request -- body, or the empty string if there is no -- CONTENT_LENGTH. takeInput env req = case len of Just l -> take l req Nothing -> "" where len = lookup "CONTENT_LENGTH" env >>= maybeRead -- -- * Utilities -- -- | Replace all instances of a value in a list by another value. replace :: Eq a => a -- ^ Value to look for -> a -- ^ Value to replace it with -> [a] -- ^ Input list -> [a] -- ^ Output list replace x y = map (\z -> if z == x then y else z) maybeRead :: Read a => String -> Maybe a maybeRead = fmap fst . listToMaybe . reads -- | Same as 'lookup' specialized to strings, but -- returns the empty string if lookup fails. lookupOrNil :: String -> [(String,String)] -> String lookupOrNil n = fromMaybe "" . lookup n -- -- * Parameters (for Content-type etc.) -- showParameters :: [(String,String)] -> String showParameters = concatMap f where f (n,v) = "; " ++ n ++ "=\"" ++ concatMap esc v ++ "\"" esc '\\' = "\\\\" esc '"' = "\\\"" esc c | c `elem` ['\\','"'] = '\\':[c] | otherwise = [c] p_parameter :: Parser (String,String) p_parameter = do lexeme $ char ';' p_name <- lexeme $ p_token lexeme $ char '=' -- Workaround for seemingly standardized web browser bug -- where nothing is escaped in the filename parameter -- of the content-disposition header in multipart/form-data let litStr = if p_name == "filename" then buggyLiteralString else literalString p_value <- litStr <|> p_token return (map toLower p_name, p_value) -- -- * Content type -- data ContentType = ContentType String String [(String, String)] deriving (Show, Read, Eq, Ord) -- | Produce the standard string representation of a content-type, -- e.g. \"text\/html; charset=ISO-8859-1\". showContentType :: ContentType -> String showContentType (ContentType x y ps) = x ++ "/" ++ y ++ showParameters ps pContentType :: Parser ContentType pContentType = do many ws1 c_type <- p_token lexeme $ char '/' c_subtype <- lexeme $ p_token c_parameters <- many p_parameter return $ ContentType (map toLower c_type) (map toLower c_subtype) c_parameters parseContentType :: Monad m => String -> m ContentType parseContentType = parseM pContentType "Content-type" -- -- * Utilities -- parseM :: Monad m => Parser a -> SourceName -> String -> m a parseM p n inp = case parse p n inp of Left e -> fail (show e) Right x -> return x -- -- * Parsing utilities -- -- | RFC 822 LWSP-char ws1 :: Parser Char ws1 = oneOf " \t" lexeme :: Parser a -> Parser a lexeme p = do x <- p; many ws1; return x literalString :: Parser String literalString = do char '\"' str <- many (noneOf "\"\\" <|> quoted_pair) char '\"' return str -- No web browsers seem to implement RFC 2046 correctly, -- since they do not escape double quotes and backslashes -- in the filename parameter in multipart/form-data. -- -- Note that this eats everything until the last double quote on the line. buggyLiteralString :: Parser String buggyLiteralString = do char '\"' str <- manyTill anyChar (try lastQuote) return str where lastQuote = do char '\"' notFollowedBy (try (many (noneOf "\"") >> char '\"')) especials, tokenchar :: [Char] especials = "()<>@,;:\\\"/[]?.=" tokenchar = "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" \\ especials p_token :: Parser String p_token = many1 (oneOf tokenchar) text_chars :: [Char] text_chars = map chr ([1..9] ++ [11,12] ++ [14..127]) p_text :: Parser Char p_text = oneOf text_chars quoted_pair :: Parser Char quoted_pair = do char '\\' p_text