hxt-9.3.1.22/examples/ 0000755 0000000 0000000 00000000000 12752557014 012565 5 ustar 00 0000000 0000000 hxt-9.3.1.22/examples/arrows/ 0000755 0000000 0000000 00000000000 12752557013 014101 5 ustar 00 0000000 0000000 hxt-9.3.1.22/examples/arrows/AGentleIntroductionToHXT/ 0000755 0000000 0000000 00000000000 12752557013 020711 5 ustar 00 0000000 0000000 hxt-9.3.1.22/examples/arrows/AGentleIntroductionToHXT/PicklerExample/ 0000755 0000000 0000000 00000000000 12752557013 023616 5 ustar 00 0000000 0000000 hxt-9.3.1.22/examples/arrows/HelloWorld/ 0000755 0000000 0000000 00000000000 12752557013 016154 5 ustar 00 0000000 0000000 hxt-9.3.1.22/examples/arrows/absurls/ 0000755 0000000 0000000 00000000000 12752557013 015554 5 ustar 00 0000000 0000000 hxt-9.3.1.22/examples/arrows/dtd2hxt/ 0000755 0000000 0000000 00000000000 12752557013 015462 5 ustar 00 0000000 0000000 hxt-9.3.1.22/examples/arrows/hparser/ 0000755 0000000 0000000 00000000000 12752557013 015545 5 ustar 00 0000000 0000000 hxt-9.3.1.22/examples/arrows/performance/ 0000755 0000000 0000000 00000000000 12752557013 016402 5 ustar 00 0000000 0000000 hxt-9.3.1.22/examples/arrows/pickle/ 0000755 0000000 0000000 00000000000 12752557013 015350 5 ustar 00 0000000 0000000 hxt-9.3.1.22/examples/xhtml/ 0000755 0000000 0000000 00000000000 12752557014 013721 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/ 0000755 0000000 0000000 00000000000 12752557014 011536 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Control/ 0000755 0000000 0000000 00000000000 12752557014 013156 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Control/Arrow/ 0000755 0000000 0000000 00000000000 13625174751 014253 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Data/ 0000755 0000000 0000000 00000000000 14025460147 012402 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Data/Function/ 0000755 0000000 0000000 00000000000 12752557014 014174 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Data/Tree/ 0000755 0000000 0000000 00000000000 12752557014 013306 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Data/Tree/NTree/ 0000755 0000000 0000000 00000000000 13625201404 014310 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Data/Tree/NTree/Zipper/ 0000755 0000000 0000000 00000000000 12752557014 015574 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Data/Tree/NavigatableTree/ 0000755 0000000 0000000 00000000000 12752557014 016343 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Text/ 0000755 0000000 0000000 00000000000 12752557014 012462 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Text/XML/ 0000755 0000000 0000000 00000000000 12752557014 013122 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Text/XML/HXT/ 0000755 0000000 0000000 00000000000 12752557014 013565 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Text/XML/HXT/Arrow/ 0000755 0000000 0000000 00000000000 13625174751 014662 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Text/XML/HXT/Arrow/Pickle/ 0000755 0000000 0000000 00000000000 14025460147 016061 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Text/XML/HXT/Arrow/XmlState/ 0000755 0000000 0000000 00000000000 14025461240 016406 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Text/XML/HXT/DOM/ 0000755 0000000 0000000 00000000000 14025463522 014177 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Text/XML/HXT/DTDValidation/ 0000755 0000000 0000000 00000000000 12752557014 016213 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Text/XML/HXT/IO/ 0000755 0000000 0000000 00000000000 12752557014 014074 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Text/XML/HXT/Parser/ 0000755 0000000 0000000 00000000000 14025461263 015014 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Text/XML/HXT/XMLSchema/ 0000755 0000000 0000000 00000000000 12752557014 015346 5 ustar 00 0000000 0000000 hxt-9.3.1.22/src/Control/Arrow/ArrowExc.hs 0000644 0000000 0000000 00000001667 12752557014 016350 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Control.Arrow.ArrowExc
Copyright : Copyright (C) 2010 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe\@fh-wedel.de)
Stability : experimental
Portability: not portable
The exception arrow class
-}
-- ------------------------------------------------------------
module Control.Arrow.ArrowExc
( ArrowExc(..)
)
where
import Control.Arrow
import Control.Arrow.ArrowIO
import Control.Exception ( SomeException
)
class (Arrow a, ArrowChoice a, ArrowZero a, ArrowIO a) => ArrowExc a where
tryA :: a b c -> a b (Either SomeException c)
catchA :: a b c -> a SomeException c -> a b c
catchA f h = tryA f
>>>
( h ||| returnA )
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Control/Arrow/ArrowIO.hs 0000644 0000000 0000000 00000004202 12752557014 016124 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Control.Arrow.ArrowIO
Copyright : Copyright (C) 2005 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe\@fh-wedel.de)
Stability : experimental
Portability: portable
Lifting of IO actions to arrows
-}
-- ------------------------------------------------------------
module Control.Arrow.ArrowIO
( ArrowIO(..)
, ArrowIOIf(..)
)
where
import Control.Arrow
-- | the interface for converting an IO action into an arrow
class Arrow a => ArrowIO a where
-- | construct an arrow from an IO action
arrIO :: (b -> IO c) -> a b c
-- | construct an arrow from an IO action without any parameter
arrIO0 :: IO c -> a b c
arrIO0 f = arrIO (const f)
{-# INLINE arrIO0 #-}
-- | construction of a 2 argument arrow from a binary IO action
-- |
-- | example: @ a1 &&& a2 >>> arr2 f @
arrIO2 :: (b1 -> b2 -> IO c) -> a (b1, b2) c
arrIO2 f = arrIO (\ ~(x1, x2) -> f x1 x2)
{-# INLINE arrIO2 #-}
-- | construction of a 3 argument arrow from a 3-ary IO action
-- |
-- | example: @ a1 &&& a2 &&& a3 >>> arr3 f @
arrIO3 :: (b1 -> b2 -> b3 -> IO c) -> a (b1, (b2, b3)) c
arrIO3 f = arrIO (\ ~(x1, ~(x2, x3)) -> f x1 x2 x3)
{-# INLINE arrIO3 #-}
-- | construction of a 4 argument arrow from a 4-ary IO action
-- |
-- | example: @ a1 &&& a2 &&& a3 &&& a4 >>> arr4 f @
arrIO4 :: (b1 -> b2 -> b3 -> b4 -> IO c) -> a (b1, (b2, (b3, b4))) c
arrIO4 f = arrIO (\ ~(x1, ~(x2, ~(x3, x4))) -> f x1 x2 x3 x4)
{-# INLINE arrIO4 #-}
-- | the interface for converting an IO predicate into a list arrow
class (Arrow a, ArrowIO a) => ArrowIOIf a where
-- | builds an arrow from an IO predicate
--
-- if the predicate holds, the single list containing the input is returned, else the empty list,
-- similar to 'Control.Arrow.ArrowList.isA'
isIOA :: (b -> IO Bool) -> a b b
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Control/Arrow/ArrowIf.hs 0000644 0000000 0000000 00000012536 12752557014 016164 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Control.Arrow.ArrowIf
Copyright : Copyright (C) 2005 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe\@fh-wedel.de)
Stability : experimental
Portability: portable
Conditionals for List Arrows
This module defines conditional combinators for list arrows.
The empty list as result represents False, none empty lists True.
-}
-- ------------------------------------------------------------
module Control.Arrow.ArrowIf
( module Control.Arrow.ArrowIf
)
where
import Control.Arrow
import Control.Arrow.ArrowList
import Data.List
( partition )
-- ------------------------------------------------------------
-- | The interface for arrows as conditionals.
--
-- Requires list arrows because False is represented as empty list, True as none empty lists.
--
-- Only 'ifA' and 'orElse' don't have default implementations
class ArrowList a => ArrowIf a where
-- | if lifted to arrows
ifA :: a b c -> a b d -> a b d -> a b d
-- | shortcut: @ ifP p = ifA (isA p) @
ifP :: (b -> Bool) -> a b d -> a b d -> a b d
ifP p = ifA (isA p)
{-# INLINE ifP #-}
-- | negation: @ neg f = ifA f none this @
neg :: a b c -> a b b
neg f = ifA f none this
{-# INLINE neg #-}
-- | @ f \`when\` g @ : when the predicate g holds, f is applied, else the identity filter this
when :: a b b -> a b c -> a b b
f `when` g = ifA g f this
{-# INLINE when #-}
-- | shortcut: @ f \`whenP\` p = f \`when\` (isA p) @
whenP :: a b b -> (b -> Bool) -> a b b
f `whenP` g = ifP g f this
{-# INLINE whenP #-}
-- | @ f \`whenNot\` g @ : when the predicate g does not hold, f is applied, else the identity filter this
whenNot :: a b b -> a b c -> a b b
f `whenNot` g = ifA g this f
{-# INLINE whenNot #-}
-- | like 'whenP'
whenNotP :: a b b -> (b -> Bool) -> a b b
f `whenNotP` g = ifP g this f
{-# INLINE whenNotP #-}
-- | @ g \`guards\` f @ : when the predicate g holds, f is applied, else none
guards :: a b c -> a b d -> a b d
f `guards` g = ifA f g none
{-# INLINE guards #-}
-- | like 'whenP'
guardsP :: (b -> Bool) -> a b d -> a b d
f `guardsP` g = ifP f g none
{-# INLINE guardsP #-}
-- | shortcut for @ f `guards` this @
filterA :: a b c -> a b b
filterA f = ifA f this none
{-# INLINE filterA #-}
-- | @ f \`containing\` g @ : keep only those results from f for which g holds
--
-- definition: @ f \`containing\` g = f >>> g \`guards\` this @
containing :: a b c -> a c d -> a b c
f `containing` g = f >>> g `guards` this
{-# INLINE containing #-}
-- | @ f \`notContaining\` g @ : keep only those results from f for which g does not hold
--
-- definition: @ f \`notContaining\` g = f >>> ifA g none this @
notContaining :: a b c -> a c d -> a b c
f `notContaining` g = f >>> ifA g none this
{-# INLINE notContaining #-}
-- | @ f \`orElse\` g @ : directional choice: if f succeeds, the result of f is the result, else g is applied
orElse :: a b c -> a b c -> a b c
-- | generalisation of 'orElse' for multi way branches like in case expressions.
--
-- An auxiliary data type 'IfThen' with an infix constructor ':->' is used for writing multi way branches
--
-- example: @ choiceA [ p1 :-> e1, p2 :-> e2, this :-> default ] @
choiceA :: [IfThen (a b c) (a b d)] -> a b d
choiceA = foldr ifA' none
where
ifA' (g :-> f) = ifA g f
-- | tag a value with Left or Right, if arrow has success, input is tagged with Left, else with Right
tagA :: a b c -> a b (Either b b)
tagA p = ifA p (arr Left) (arr Right)
-- | split a list value with an arrow and returns a pair of lists.
-- This is the arrow version of 'span'. The arrow is deterministic.
--
-- example: @ runLA (spanA (isA (\/= \'-\'))) \"abc-def\" @ gives @ [(\"abc\",\"-def\")] @ as result
spanA :: a b b -> a [b] ([b],[b])
spanA p = ifA ( arrL (take 1) >>> p )
( arr head &&& (arr tail >>> spanA p)
>>>
arr (\ ~(x, ~(xs,ys)) -> (x : xs, ys))
)
( arr (\ l -> ([],l)) )
-- | partition a list of values into a pair of lists
--
-- This is the arrow Version of 'Data.List.partition'
partitionA :: a b b -> a [b] ([b],[b])
partitionA p = listA ( arrL id >>> tagA p )
>>^
( (\ ~(l1, l2) -> (unTag l1, unTag l2) ) . partition (isLeft) )
where
isLeft (Left _) = True
isLeft _ = False
unTag = map (either id id)
-- ------------------------------------------------------------
-- | an auxiliary data type for 'choiceA'
data IfThen a b = a :-> b
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Control/Arrow/ArrowList.hs 0000644 0000000 0000000 00000027774 13625174751 016556 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Control.Arrow.ArrowList
Copyright : Copyright (C) 2005 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe\@fh-wedel.de)
Stability : experimental
Portability: portable
The list arrow class
This module defines the interface for list arrows.
A list arrow is a function that gives a list of results
for a given argument. A single element result represents a normal function.
An empty list often indicates that the function is undefined
for the given argument.
The empty list may also represent False, non-empty lists True.
A list with more than one element gives all results for a
so called nondeterministic function.
-}
-- ------------------------------------------------------------
module Control.Arrow.ArrowList
( ArrowList(..)
)
where
import Control.Arrow
infixl 8 >>., >.
infixl 2 $<, $<<, $<<<, $<<<<
infixl 2 $<$
-- ------------------------------------------------------------
-- | The interface for list arrows
--
-- Only 'mkA', 'isA' '(>>.)' don't have default implementations
class (Arrow a, ArrowPlus a, ArrowZero a, ArrowApply a) => ArrowList a where
-- | construction of a 2 argument arrow from a binary function
-- |
-- | example: @ a1 &&& a2 >>> arr2 f @
arr2 :: (b1 -> b2 -> c) -> a (b1, b2) c
arr2 = arr . uncurry
{-# INLINE arr2 #-}
-- | construction of a 3 argument arrow from a 3-ary function
-- |
-- | example: @ a1 &&& a2 &&& a3 >>> arr3 f @
arr3 :: (b1 -> b2 -> b3 -> c) -> a (b1, (b2, b3)) c
arr3 f = arr (\ ~(x1, ~(x2, x3)) -> f x1 x2 x3)
{-# INLINE arr3 #-}
-- | construction of a 4 argument arrow from a 4-ary function
-- |
-- | example: @ a1 &&& a2 &&& a3 &&& a4 >>> arr4 f @
arr4 :: (b1 -> b2 -> b3 -> b4 -> c) -> a (b1, (b2, (b3, b4))) c
arr4 f = arr (\ ~(x1, ~(x2, ~(x3, x4))) -> f x1 x2 x3 x4)
{-# INLINE arr4 #-}
-- | construction of a 2 argument arrow from a singe argument arrow
arr2A :: (b -> a c d) -> a (b, c) d
arr2A f = first (arr f) >>> app
{-# INLINE arr2A #-}
-- | constructor for a list arrow from a function with a list as result
arrL :: (b -> [c]) -> a b c
-- | constructor for a list arrow with 2 arguments
arr2L :: (b -> c -> [d]) -> a (b, c) d
arr2L = arrL . uncurry
{-# INLINE arr2L #-}
-- | constructor for a const arrow: @ constA = arr . const @
constA :: c -> a b c
constA = arr . const
{-# INLINE constA #-}
-- | constructor for a const arrow: @ constL = arrL . const @
constL :: [c] -> a b c
constL = arrL . const
{-# INLINE constL #-}
-- | builds an arrow from a predicate.
-- If the predicate holds, the single list containing the input is returned, else the empty list
isA :: (b -> Bool) -> a b b
-- | combinator for converting the result of a list arrow into another list
--
-- example: @ foo >>. reverse @ reverses the the result of foo
--
-- example: @ foo >>. take 1 @ constructs a deterministic version of foo by deleting all further results
(>>.) :: a b c -> ([c] -> [d]) -> a b d
-- | combinator for converting the result of an arrow into a single element result
(>.) :: a b c -> ([c] -> d ) -> a b d
af >. f = af >>. ((:[]) . f)
{-# INLINE (>.) #-}
-- | combinator for converting an arrow into a determinstic version with all results collected in a single element list
--
-- @ listA af = af >>. (:[]) @
--
-- this is useful when the list of results computed by an arrow must be manipulated (e.g. sorted)
--
-- example for sorting the results of a filter
--
-- > collectAndSort :: a b c -> a b c
-- >
-- > collectAndSort collect = listA collect >>> arrL sort
listA :: a b c -> a b [c]
listA af = af >>. (:[])
{-# INLINE listA #-}
-- | the inverse of 'listA'
--
-- @ listA af >>> unlistA = af @
--
-- unlistA is defined as @ arrL id @
unlistA :: a [b] b
unlistA = arrL id
{-# INLINE unlistA #-}
-- | the identity arrow, alias for returnA
this :: a b b
this = returnA
{-# INLINE this #-}
-- | the zero arrow, alias for zeroArrow
none :: a b c
none = zeroArrow
{-# INLINE none #-}
-- | converts an arrow, that may fail, into an arrow that always succeeds
--
-- example: @ withDefault none \"abc\" @ is equivalent to @ constA \"abc\" @
withDefault :: a b c -> c -> a b c
withDefault a d = a >>. \ x -> if null x then [d] else x
{-# INLINE withDefault #-}
-- | makes a list arrow deterministic, the number of results is at most 1
--
-- definition
--
-- > single f = f >>. take 1
--
-- examples with strings:
--
-- > runLA ( single none ) "x" == []
-- > runLA ( single this ) "x" == ["x"]
-- > runLA ( single
-- > (constA "y"
-- > <+> this ) ) "x" == ["y"]
single :: a b c -> a b c
single f = f >>. take 1
-- | compute an arrow from the input and apply the arrow to this input
--
-- definition: @ (f &&& this) >>> app @
--
-- in a point free style, there is no way to use an argument in 2 places,
-- this is a combinator for simulating this. first the argument is used to compute an arrow,
-- then this new arrow is applied to the input
--
-- applyA coresponds to: @ apply f x = let g = f x in g x @
--
-- see also: '$<', '$<<', '$<<<', '$<<<<', '$<$'
applyA :: a b (a b c) -> a b c
applyA f = (f &&& this) >>> app
-- | compute the parameter for an arrow with extra parameters from the input
-- and apply the arrow for all parameter values to the input
--
-- a kind of \"function call\" for arrows, useful for joining arrows
--
-- > infixl 2 ($<)
--
-- definition:
--
-- > g $< f = applyA (f >>> arr g)
--
-- if @f@ fails, the whole arrow fails, e.g. @ g \$\< none == none @
--
-- if @f@ computes n values and @g@ is deterministic, the whole arrow computes n values
--
-- examples with simple list arrows with strings
--
-- > prefixString :: String -> a String String
-- > prefixString s = arr (s++)
-- >
-- > runLA ( prefixString $< none ) "x" == []
-- > runLA ( prefixString $< constA "y" ) "x" == ["yx"]
-- > runLA ( prefixString $< this ) "x" == ["xx"]
-- > runLA ( prefixString $< constA "y"
-- > <+> constA "z" ) "x" == ["yx","zx"]
-- > runLA ( prefixString $< constA "y"
-- > <+> this
-- > <+> constA "z" ) "x" == ["yx","xx","zx"]
--
-- see also: 'applyA', '$<<', '$<<<', '$<<<<', '$<$'
($<) :: (c -> a b d) -> a b c -> a b d
g $< f = applyA (f >>> arr g)
-- | binary version of '$<'
--
-- example with simple list arrows with strings
--
-- > infixString :: String -> String -> a String String
-- > infixString s1 s2
-- > = arr (\ s -> s1 ++ s ++ s2)
-- >
-- > runLA ( infixString $<< constA "y" &&& constA "z" ) "x" = ["yxz"]
-- > runLA ( infixString $<< this &&& this ) "x" = ["xxx"]
-- > runLA ( infixString $<< constA "y"
-- > &&& (constA "z" <+> this) ) "x" = ["yxz", "yxx"]
($<<) :: (c1 -> c2 -> a b d) -> a b (c1, c2) -> a b d
f $<< g = applyA (g >>> arr2 f)
-- | version of '$<' for arrows with 3 extra parameters
--
-- typical usage
--
-- > f $<<< g1 &&& g2 &&& g3
($<<<) :: (c1 -> c2 -> c3 -> a b d) -> a b (c1, (c2, c3)) -> a b d
f $<<< g = applyA (g >>> arr3 f)
-- | version of '$<' for arrows with 4 extra parameters
--
-- typical usage
--
-- > f $<<<< g1 &&& g2 &&& g3 &&& g4
($<<<<) :: (c1 -> c2 -> c3 -> c4 -> a b d) -> a b (c1, (c2, (c3, c4))) -> a b d
f $<<<< g = applyA (g >>> arr4 f)
-- | compute the parameter for an arrow @f@ with an extra parameter by an arrow @g@
-- and apply all the results from @g@ sequentially to the input
--
-- > infixl 2 ($<$)
--
-- typical usage:
--
-- > g :: a b c
-- > g = ...
-- >
-- > f :: c -> a b b
-- > f x = ... x ...
-- >
-- > f $<$ g
--
-- @f@ computes the extra parameters for @g@ from the input of type @b@ and @g@ is applied with this
-- parameter to the input. This allows programming in a point wise style in @g@, which becomes
-- neccessary, when a value is needed more than once.
--
-- this combinator is useful, when transforming a single value (document) step by step,
-- with @g@ for collecting the data for all steps, and @f@ for transforming the input step by step
--
-- if @g@ is deterministic (computes exactly one result),
-- @ g $\<$ f == g $\< f @ holds
--
-- if @g@ fails, @ f $<$ g == this @
--
-- if @g@ computes more than one result, @f@ is applied sequentially to the input for every result from @g@
--
-- examples with simple list arrows with strings
--
-- > prefixString :: String -> a String String
-- > prefixString s = arr (s++)
-- >
-- > runLA ( prefixString $<$ none ) "x" == ["x"]
-- > runLA ( prefixString $<$ constA "y" ) "x" == ["yx"]
-- > runLA ( prefixString $<$ constA "y" <+> constA "z" ) "x" == ["zyx"]
-- > runLA ( prefixString $<$ constA "y" <+> this
-- > <+> constA "z" ) "x" == ["zxyx"]
--
-- example with two extra parameter
--
-- > g1 :: a b c1
-- > g2 :: a b c2
-- >
-- > f :: (c1, c2) -> a b b
-- > f (x1, x2) = ... x1 ... x2 ...
-- >
-- > f $<$ g1 &&& g2
--
-- see also: 'applyA', '$<'
($<$) :: (c -> (a b b)) -> a b c -> a b b
g $<$ f = applyA (listA (f >>> arr g) >>> arr seqA)
-- | merge the result pairs of an arrow with type @a a1 (b1, b2)@
-- by combining the tuple components with the @op@ arrow
--
-- examples with simple list arrows working on strings and XmlTrees
--
-- > a1 :: a String (XmlTree, XmlTree)
-- > a1 = selem "foo" [this >>> mkText]
-- > &&&
-- > selem "bar" [arr (++"0") >>> mkText]
-- >
-- > runLA (a1 >>> mergeA (<+>) >>> xshow this) "42" == ["
-elements, is done.
--
--
-- see also : 'removeAllWhiteSpace', 'removeDocWhiteSpace'
removeWhiteSpace :: ArrowXml a => a XmlTree XmlTree
removeWhiteSpace = fromLA $ none `when` isWhiteSpace
-- |
-- simple recursive filter for removing all whitespace.
--
-- removes all text nodes in a tree that consist only of whitespace.
--
--
-- see also : 'removeWhiteSpace', 'removeDocWhiteSpace'
removeAllWhiteSpace :: ArrowXml a => a XmlTree XmlTree
removeAllWhiteSpace = fromLA $ editNTreeA [isWhiteSpace :-> none]
-- fromLA $ processBottomUp removeWhiteSpace' -- less efficient
-- ------------------------------------------------------------
-- |
-- filter for removing all not significant whitespace.
--
-- the tree traversed for removing whitespace between elements,
-- that was inserted for indentation and readability.
-- whitespace is only removed at places, where it's not significat
-- preserving whitespace may be controlled in a document tree
-- by a tag attribute @xml:space@
--
-- allowed values for this attribute are @default | preserve@
--
-- input is root node of the document to be cleaned up,
-- output the semantically equivalent simplified tree
--
--
-- see also : 'indentDoc', 'removeAllWhiteSpace'
removeDocWhiteSpace :: ArrowXml a => a XmlTree XmlTree
removeDocWhiteSpace = fromLA $ removeRootWhiteSpace
removeRootWhiteSpace :: LA XmlTree XmlTree
removeRootWhiteSpace
= processChildren processRootElement
`when`
isRoot
where
processRootElement :: LA XmlTree XmlTree
processRootElement
= removeWhiteSpace >>> processChild
where
processChild
= choiceA [ isDTD
:-> removeAllWhiteSpace -- whitespace in DTD is redundant
, this
:-> replaceChildren ( getChildren
>>. indentTrees insertNothing False 1
)
]
-- ------------------------------------------------------------
-- |
-- filter for indenting a document tree for pretty printing.
--
-- the tree is traversed for inserting whitespace for tag indentation.
--
-- whitespace is only inserted or changed at places, where it isn't significant,
-- is's not inserted between tags and text containing non whitespace chars.
--
-- whitespace is only inserted or changed at places, where it's not significant.
-- preserving whitespace may be controlled in a document tree
-- by a tag attribute @xml:space@
--
-- allowed values for this attribute are @default | preserve@.
--
-- input is a complete document tree or a document fragment
-- result is the semantically equivalent formatted tree.
--
--
-- see also : 'removeDocWhiteSpace'
indentDoc :: ArrowXml a => a XmlTree XmlTree
indentDoc = fromLA $
( ( isRoot `guards` indentRoot )
`orElse`
(root [] [this] >>> indentRoot >>> getChildren)
)
-- ------------------------------------------------------------
indentRoot :: LA XmlTree XmlTree
indentRoot = processChildren indentRootChildren
where
indentRootChildren
= removeText >>> indentChild >>> insertNL
where
removeText = none `when` isText
insertNL = this <+> txt "\n"
indentChild = ( replaceChildren
( getChildren
>>.
indentTrees (insertIndentation 2) False 1
)
`whenNot` isDTD
)
-- ------------------------------------------------------------
--
-- copied from EditFilter and rewritten for arrows
-- to remove dependency to the filter module
indentTrees :: (Int -> LA XmlTree XmlTree) -> Bool -> Int -> XmlTrees -> XmlTrees
indentTrees _ _ _ []
= []
indentTrees indentFilter preserveSpace level ts
= runLAs lsf ls
++
indentRest rs
where
runLAs f l
= runLA (constL l >>> f) undefined
(ls, rs)
= break XN.isElem ts
isSignificant :: Bool
isSignificant
= preserveSpace
||
(not . null . runLAs isSignificantPart) ls
isSignificantPart :: LA XmlTree XmlTree
isSignificantPart
= catA
[ isText `guards` neg isWhiteSpace
, isCdata
, isCharRef
, isEntityRef
]
lsf :: LA XmlTree XmlTree
lsf
| isSignificant
= this
| otherwise
= (none `when` isWhiteSpace)
>>>
(indentFilter level <+> this)
indentRest :: XmlTrees -> XmlTrees
indentRest []
| isSignificant
= []
| otherwise
= runLA (indentFilter (level - 1)) undefined
indentRest (t':ts')
= runLA ( ( indentElem
>>>
lsf
)
`when` isElem
) t'
++
( if null ts'
then indentRest
else indentTrees indentFilter preserveSpace level
) ts'
where
indentElem
= replaceChildren ( getChildren
>>.
indentChildren
)
xmlSpaceAttrValue :: String
xmlSpaceAttrValue
= concat . runLA (getAttrValue "xml:space") $ t'
preserveSpace' :: Bool
preserveSpace'
= ( fromMaybe preserveSpace
.
lookup xmlSpaceAttrValue
) [ ("preserve", True)
, ("default", False)
]
indentChildren :: XmlTrees -> XmlTrees
indentChildren cs'
| all (maybe False (all isXmlSpaceChar) . XN.getText) cs'
= []
| otherwise
= indentTrees indentFilter preserveSpace' (level + 1) cs'
-- filter for indenting elements
insertIndentation :: Int -> Int -> LA a XmlTree
insertIndentation indentWidth level
= txt ('\n' : replicate (level * indentWidth) ' ')
-- filter for removing all whitespace
insertNothing :: Int -> LA a XmlTree
insertNothing _ = none
-- ------------------------------------------------------------
-- |
-- converts a CDATA section into normal text nodes
transfCdata :: ArrowXml a => a XmlTree XmlTree
transfCdata = fromLA $
(getCdata >>> mkText) `when` isCdata
-- |
-- converts CDATA sections in whole document tree into normal text nodes
transfAllCdata :: ArrowXml a => a XmlTree XmlTree
transfAllCdata = fromLA $ editNTreeA [isCdata :-> (getCdata >>> mkText)]
-- |
-- converts a character reference to normal text
transfCharRef :: ArrowXml a => a XmlTree XmlTree
transfCharRef = fromLA $
( getCharRef >>> arr (\ i -> [toEnum i]) >>> mkText )
`when`
isCharRef
-- |
-- recursively converts all character references to normal text
transfAllCharRef :: ArrowXml a => a XmlTree XmlTree
transfAllCharRef = fromLA $ editNTreeA [isCharRef :-> (getCharRef >>> arr (\ i -> [toEnum i]) >>> mkText)]
-- ------------------------------------------------------------
rememberDTDAttrl :: ArrowList a => a XmlTree XmlTree
rememberDTDAttrl
= fromLA $
( ( addDTDAttrl $< ( getChildren >>> isDTDDoctype >>> getDTDAttrl ) )
`orElse`
this
)
where
addDTDAttrl al
= seqA . map (uncurry addAttr) . map (first (dtdPrefix ++)) $ al
addDefaultDTDecl :: ArrowList a => a XmlTree XmlTree
addDefaultDTDecl
= fromLA $
( addDTD $< listA (getAttrl >>> (getName &&& xshow getChildren) >>> hasDtdPrefix) )
where
hasDtdPrefix
= isA (fst >>> (dtdPrefix `isPrefixOf`))
>>>
arr (first (drop (length dtdPrefix)))
addDTD []
= this
addDTD al
= replaceChildren
( mkDTDDoctype al none
<+>
txt "\n"
<+>
( getChildren >>> (none `when` isDTDDoctype) ) -- remove old DTD decl
)
-- ------------------------------------------------------------
hasXmlPi :: ArrowXml a => a XmlTree XmlTree
hasXmlPi
= fromLA
( getChildren
>>>
isPi
>>>
hasName t_xml
)
-- | add an \ processing instruction
-- if it's not already there
addXmlPi :: ArrowXml a => a XmlTree XmlTree
addXmlPi
= fromLA
( insertChildrenAt 0 ( ( mkPi (mkName t_xml) none
>>>
addAttr a_version "1.0"
)
<+>
txt "\n"
)
`whenNot`
hasXmlPi
)
-- | add an encoding spec to the \ processing instruction
addXmlPiEncoding :: ArrowXml a => String -> a XmlTree XmlTree
addXmlPiEncoding enc
= fromLA $
processChildren ( addAttr a_encoding enc
`when`
( isPi >>> hasName t_xml )
)
-- | add an XHTML strict doctype declaration to a document
addXHtmlDoctypeStrict
, addXHtmlDoctypeTransitional
, addXHtmlDoctypeFrameset :: ArrowXml a => a XmlTree XmlTree
-- | add an XHTML strict doctype declaration to a document
addXHtmlDoctypeStrict
= addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
-- | add an XHTML transitional doctype declaration to a document
addXHtmlDoctypeTransitional
= addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
-- | add an XHTML frameset doctype declaration to a document
addXHtmlDoctypeFrameset
= addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"
-- | add a doctype declaration to a document
--
-- The arguments are the root element name, the PUBLIC id and the SYSTEM id
addDoctypeDecl :: ArrowXml a => String -> String -> String -> a XmlTree XmlTree
addDoctypeDecl rootElem public system
= fromLA $
replaceChildren
( mkDTDDoctype ( ( if null public then id else ( (k_public, public) : ) )
.
( if null system then id else ( (k_system, system) : ) )
$ [ (a_name, rootElem) ]
) none
<+>
txt "\n"
<+>
getChildren
)
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/GeneralEntitySubstitution.hs 0000644 0000000 0000000 00000031547 12752557014 022434 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.GeneralEntitySubstitution
Copyright : Copyright (C) 2005 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
general entity substitution
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.GeneralEntitySubstitution
( processGeneralEntities )
where
import Control.Arrow -- arrow classes
import Control.Arrow.ArrowList
import Control.Arrow.ArrowIf
import Control.Arrow.ArrowTree
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlState
import Text.XML.HXT.Arrow.ParserInterface
( parseXmlEntityValueAsAttrValue
, parseXmlEntityValueAsContent
)
import Text.XML.HXT.Arrow.Edit
( transfCharRef
)
import Text.XML.HXT.Arrow.DocumentInput
( getXmlEntityContents
)
import qualified Data.Map as M
( Map
, empty
, lookup
, insert
)
-- ------------------------------------------------------------
data GEContext
= ReferenceInContent
| ReferenceInAttributeValue
| ReferenceInEntityValue
-- or OccursInAttributeValue -- not used during substitution but during validation
-- or ReferenceInDTD -- not used: syntax check detects errors
type GESubstArrow = GEContext -> RecList -> GEArrow XmlTree XmlTree
type GEArrow b c = IOStateArrow GEEnv b c
type RecList = [String]
-- ------------------------------------------------------------
newtype GEEnv = GEEnv (M.Map String GESubstArrow)
emptyGeEnv :: GEEnv
emptyGeEnv = GEEnv M.empty
lookupGeEnv :: String -> GEEnv -> Maybe GESubstArrow
lookupGeEnv k (GEEnv env)
= M.lookup k env
addGeEntry :: String -> GESubstArrow -> GEEnv -> GEEnv
addGeEntry k a (GEEnv env)
= GEEnv $ M.insert k a env
-- ------------------------------------------------------------
-- |
-- substitution of general entities
--
-- input: a complete document tree including root node
processGeneralEntities :: IOStateArrow s XmlTree XmlTree
processGeneralEntities
= ( traceMsg 1 "processGeneralEntities: collect and substitute general entities"
>>>
withOtherUserState emptyGeEnv (processChildren (processGeneralEntity ReferenceInContent []))
>>>
setDocumentStatusFromSystemState "in general entity processing"
>>>
traceTree
>>>
traceSource
)
`when`
documentStatusOk
processGeneralEntity :: GESubstArrow
processGeneralEntity context recl
= choiceA [ isElem :-> ( processAttrl (processChildren substEntitiesInAttrValue)
>>>
processChildren (processGeneralEntity context recl)
)
, isEntityRef :-> substEntityRef
, isDTDDoctype :-> processChildren (processGeneralEntity context recl)
, isDTDEntity :-> addEntityDecl
, isDTDAttlist :-> substEntitiesInAttrDefaultValue
, this :-> this
]
where
addEntityDecl :: GEArrow XmlTree XmlTree
addEntityDecl
= perform ( choiceA [ isIntern :-> addInternalEntity -- don't change sequence of cases
, isExtern :-> addExternalEntity
, isUnparsed :-> addUnparsedEntity
]
)
where
isIntern = none `when` hasDTDAttr k_system
isExtern = none `when` hasDTDAttr k_ndata
isUnparsed = this
addInternalEntity :: GEArrow XmlTree b
addInternalEntity
= insertInternal $<<
( ( getDTDAttrValue a_name
>>>
traceValue 2 (("processGeneralEntity: general entity definition for " ++) . show)
)
&&&
xshow (getChildren >>> isText)
)
where
insertInternal entity contents
= insertEntity (substInternal contents) entity
>>>
none
addExternalEntity :: GEArrow XmlTree b
addExternalEntity
= insertExternal $<<
( ( getDTDAttrValue a_name
>>>
traceValue 2 (("processGeneralEntity: external entity definition for " ++) . show)
)
&&&
getDTDAttrValue a_url -- the absolute URL, not the relative in attr: k_system
)
where
insertExternal entity uri
= insertEntity (substExternalParsed1Time uri) entity
>>>
none
addUnparsedEntity :: GEArrow XmlTree b
addUnparsedEntity
= getDTDAttrValue a_name
>>>
traceValue 2 (("processGeneralEntity: unparsed entity definition for " ++) . show)
>>>
applyA (arr (insertEntity substUnparsed))
>>>
none
insertEntity :: (String -> GESubstArrow) -> String -> GEArrow b b
insertEntity fct entity
= ( getUserState
>>>
applyA (arr checkDefined)
)
`guards`
addEntity fct entity
where
checkDefined geEnv
= maybe ok alreadyDefined . lookupGeEnv entity $ geEnv
where
ok = this
alreadyDefined _
= issueWarn ("entity " ++ show entity ++ " already defined, repeated definition ignored")
>>>
none
addEntity :: (String -> GESubstArrow) -> String -> GEArrow b b
addEntity fct entity
= changeUserState ins
where
ins _ geEnv = addGeEntry entity (fct entity) geEnv
substEntitiesInAttrDefaultValue :: GEArrow XmlTree XmlTree
substEntitiesInAttrDefaultValue
= applyA ( xshow ( getDTDAttrValue a_default -- parse the default value
>>> -- substitute entities
mkText -- and convert value into a string
>>>
parseXmlEntityValueAsAttrValue "default value of attribute"
>>>
filterErrorMsg
>>>
substEntitiesInAttrValue
)
>>> arr (setDTDAttrValue a_default)
)
`when` hasDTDAttr a_default
substEntitiesInAttrValue :: GEArrow XmlTree XmlTree
substEntitiesInAttrValue
= ( processGeneralEntity ReferenceInAttributeValue recl
`when`
isEntityRef
)
>>>
changeText normalizeWhiteSpace
>>>
transfCharRef
where
normalizeWhiteSpace = map ( \c -> if c `elem` "\n\t\r" then ' ' else c )
substEntityRef :: GEArrow XmlTree XmlTree
substEntityRef
= applyA ( ( ( getEntityRef -- get the entity name and the env
>>> -- and compute the arrow to be applied
traceValue 2 (("processGeneralEntity: entity reference for entity " ++) . show)
>>>
traceMsg 3 ("recursion list = " ++ show recl)
)
&&&
getUserState
) >>>
arr2 substA
)
where
substA :: String -> GEEnv -> GEArrow XmlTree XmlTree
substA entity geEnv
= maybe entityNotFound entityFound . lookupGeEnv entity $ geEnv
where
errMsg msg
= issueErr msg
entityNotFound
= errMsg ("general entity reference \"&" ++ entity ++ ";\" not processed, no definition found, (forward reference?)")
entityFound fct
| entity `elem` recl
= errMsg ("general entity reference \"&" ++ entity ++ ";\" not processed, cyclic definition")
| otherwise
= fct context recl
substExternalParsed1Time :: String -> String -> GESubstArrow
substExternalParsed1Time uri entity cx rl
= perform ( traceMsg 2 ("substExternalParsed1Time: read and parse external parsed entity " ++ show entity)
>>>
runInLocalURIContext ( root [sattr a_source uri] [] -- uri must be an absolute uri
>>> -- abs uri is computed during parameter entity handling
getXmlEntityContents
>>>
processExternalEntityContents
)
>>>
applyA ( arr $ \ s -> addEntity (substExternalParsed s) entity )
)
>>>
processGeneralEntity cx rl
where
processExternalEntityContents :: IOStateArrow s XmlTree String
processExternalEntityContents
= ( ( ( documentStatusOk -- reading entity succeeded
>>> -- with content stored in a text node
(getChildren >>> isText)
)
`guards`
this
)
`orElse`
issueErr ("illegal value for external parsed entity " ++ show entity)
)
>>>
xshow (getChildren >>> isText)
substExternalParsed :: String -> String -> GESubstArrow
substExternalParsed s entity ReferenceInContent rl = includedIfValidating s rl entity
substExternalParsed _ entity ReferenceInAttributeValue _
= forbidden entity "external parsed general" "in attribute value"
substExternalParsed _ _ ReferenceInEntityValue _
= bypassed
substInternal :: String -> String -> GESubstArrow
substInternal s entity ReferenceInContent rl = included s rl entity
substInternal s entity ReferenceInAttributeValue rl = includedInLiteral s rl entity
substInternal _ _ ReferenceInEntityValue _ = bypassed
substUnparsed :: String -> GESubstArrow
substUnparsed entity ReferenceInContent _ = forbidden entity "unparsed" "content"
substUnparsed entity ReferenceInAttributeValue _ = forbidden entity "unparsed" "attribute value"
substUnparsed entity ReferenceInEntityValue _ = forbidden entity "unparsed" "entity value"
-- XML 1.0 chapter 4.4.2
included :: String -> RecList -> String -> GEArrow XmlTree XmlTree
included s rl entity
= traceMsg 3 ("substituting general entity " ++ show entity ++ " with value " ++ show s)
>>>
txt s
>>>
parseXmlEntityValueAsContent ("substituting general entity " ++ show entity ++ " in contents")
>>>
filterErrorMsg
>>>
processGeneralEntity context (entity : rl)
-- XML 1.0 chapter 4.4.3
includedIfValidating :: String -> RecList -> String -> GEArrow XmlTree XmlTree
includedIfValidating
= included
-- XML 1.0 chapter 4.4.4
forbidden :: String -> String -> String -> GEArrow XmlTree XmlTree
forbidden entity msg cx
= issueErr ("reference of " ++ msg ++ show entity ++ " forbidden in " ++ cx)
-- XML 1.0 chapter 4.4.5
includedInLiteral :: String -> RecList -> String -> GEArrow XmlTree XmlTree
includedInLiteral s rl entity
= txt s
>>>
parseXmlEntityValueAsAttrValue ("substituting general entity " ++ show entity ++ " in attribute value")
>>>
filterErrorMsg
>>>
processGeneralEntity context (entity : rl)
-- XML 1.0 chapter 4.4.7
bypassed :: GEArrow XmlTree XmlTree
bypassed
= this
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/Namespace.hs 0000644 0000000 0000000 00000040525 12752557014 017115 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.Namespace
Copyright : Copyright (C) 2005-2008 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
namespace specific arrows
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.Namespace
( attachNsEnv
, cleanupNamespaces
, collectNamespaceDecl
, collectPrefixUriPairs
, isNamespaceDeclAttr
, getNamespaceDecl
, processWithNsEnv
, processWithNsEnvWithoutAttrl
, propagateNamespaces
, uniqueNamespaces
, uniqueNamespacesFromDeclAndQNames
, validateNamespaces
)
where
import Control.Arrow -- arrow classes
import Control.Arrow.ArrowList
import Control.Arrow.ArrowIf
import Control.Arrow.ArrowTree
import Control.Arrow.ListArrow
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.XmlArrow
import Data.Maybe ( isNothing
, fromJust
)
import Data.List ( nub )
-- ------------------------------------------------------------
-- | test whether an attribute node contains an XML Namespace declaration
isNamespaceDeclAttr :: ArrowXml a => a XmlTree XmlTree
isNamespaceDeclAttr
= fromLA $
(getAttrName >>> isA isNameSpaceName) `guards` this
{-# INLINE isNamespaceDeclAttr #-}
-- | get the namespace prefix and the namespace URI out of
-- an attribute tree with a namespace declaration (see 'isNamespaceDeclAttr')
-- for all other nodes this arrow fails
getNamespaceDecl :: ArrowXml a => a XmlTree (String, String)
getNamespaceDecl
= fromLA $
isNamespaceDeclAttr
>>>
( ( getAttrName
>>>
arr getNsPrefix
)
&&& xshow getChildren
)
where
getNsPrefix = drop 6 . qualifiedName -- drop "xmlns:"
-- ------------------------------------------------------------
-- | collect all namespace declarations contained in a document
--
-- apply 'getNamespaceDecl' to a whole XmlTree
collectNamespaceDecl :: LA XmlTree (String, String)
collectNamespaceDecl = multi getAttrl >>> getNamespaceDecl
-- | collect all (namePrefix, namespaceUri) pairs from a tree
--
-- all qualified names are inspected, whether a namespace uri is defined,
-- for these uris the prefix and uri is returned. This arrow is useful for
-- namespace cleanup, e.g. for documents generated with XSLT. It can be used
-- together with 'collectNamespaceDecl' to 'cleanupNamespaces'
collectPrefixUriPairs :: LA XmlTree (String, String)
collectPrefixUriPairs
= multi (isElem <+> getAttrl <+> isPi)
>>>
getQName
>>>
arrL getPrefixUri
where
getPrefixUri :: QName -> [(String, String)]
getPrefixUri n
| null uri = []
| px == a_xmlns
||
px == a_xml = [] -- these ones are reserved an predefined
| otherwise = [(namePrefix n, uri)]
where
uri = namespaceUri n
px = namePrefix n
-- ------------------------------------------------------------
-- | generate unique namespaces and add all namespace declarations to all top nodes containing a namespace declaration
-- Usually the top node containing namespace declarations is the root node, but this isn't mandatory.
--
-- Calls 'cleanupNamespaces' with 'collectNamespaceDecl'
uniqueNamespaces :: ArrowXml a => a XmlTree XmlTree
uniqueNamespaces = fromLA $
cleanupNamespaces' collectNamespaceDecl
-- | generate unique namespaces and add all namespace declarations for all prefix-uri pairs in all qualified names
--
-- useful for cleanup of namespaces in generated documents.
-- Calls 'cleanupNamespaces' with @ collectNamespaceDecl \<+> collectPrefixUriPairs @
uniqueNamespacesFromDeclAndQNames :: ArrowXml a => a XmlTree XmlTree
uniqueNamespacesFromDeclAndQNames = fromLA $
cleanupNamespaces' ( collectNamespaceDecl
<+>
collectPrefixUriPairs
)
cleanupNamespaces' :: LA XmlTree (String, String) -> LA XmlTree XmlTree
cleanupNamespaces' collectNamespaces = processTopDownUntil
( hasNamespaceDecl `guards` cleanupNamespaces collectNamespaces )
where
hasNamespaceDecl = isElem
>>>
getAttrl
>>>
isNamespaceDeclAttr
-- | does the real work for namespace cleanup.
--
-- The parameter is used for collecting namespace uris and prefixes from the input tree
cleanupNamespaces :: LA XmlTree (String, String) -> LA XmlTree XmlTree
cleanupNamespaces collectNamespaces
= renameNamespaces $< (listA collectNamespaces >>^ (toNsEnv >>> nub))
where
renameNamespaces :: NsEnv -> LA XmlTree XmlTree
renameNamespaces env
= processBottomUp
( processAttrl
( ( none `when` isNamespaceDeclAttr ) -- remove all namespace declarations
>>>
changeQName renamePrefix -- update namespace prefix of attribute names, if namespace uri is set
)
>>>
changeQName renamePrefix -- update namespace prefix of element names
)
>>>
attachEnv env1 -- add all namespaces as attributes to the root node attribute list
where
renamePrefix :: QName -> QName
renamePrefix n
| isNullXName uri = n
| isNothing newPx = n
| otherwise = setNamePrefix' (fromJust newPx) n
where
uri = namespaceUri' n
newPx = lookup uri revEnv1
revEnv1 = map (\ (x, y) -> (y, x)) env1
env1 :: NsEnv
env1 = newEnv [] uris
uris :: [XName]
uris = nub . map snd $ env
genPrefixes :: [XName]
genPrefixes = map (newXName . ("ns" ++) . show) [(0::Int)..]
newEnv :: NsEnv -> [XName] -> NsEnv
newEnv env' []
= env'
newEnv env' (uri:rest)
= newEnv env'' rest
where
env'' = (prefix, uri) : env'
prefix
= head (filter notAlreadyUsed $ preferedPrefixes ++ genPrefixes)
preferedPrefixes
= map fst . filter ((==uri).snd) $ env
notAlreadyUsed s
= isNothing . lookup s $ env'
-- ------------------------------------------------------------
-- | auxiliary arrow for processing with a namespace environment
--
-- process a document tree with an arrow, containing always the
-- valid namespace environment as extra parameter.
-- The namespace environment is implemented as a 'Data.AssocList.AssocList'.
-- Processing of attributes can be controlled by a boolean parameter
processWithNsEnv1 :: ArrowXml a => Bool -> (NsEnv -> a XmlTree XmlTree) -> NsEnv -> a XmlTree XmlTree
processWithNsEnv1 withAttr f env
= ifA isElem -- the test is just an optimization
( processWithExtendedEnv $< arr (extendEnv env) ) -- only element nodes contain namespace declarations
( processWithExtendedEnv env )
where
processWithExtendedEnv env'
= f env' -- apply the env filter
>>>
( ( if withAttr
then processAttrl (f env') -- apply the env to all attributes
else this
)
>>>
processChildren (processWithNsEnv f env') -- apply the env recursively to all children
)
`when` isElem -- attrl and children only need processing for elem nodes
extendEnv :: NsEnv -> XmlTree -> NsEnv
extendEnv env' t'
= addEntries (toNsEnv newDecls) env'
where
newDecls = runLA ( getAttrl >>> getNamespaceDecl ) t'
-- ------------------------------------------------------------
-- | process a document tree with an arrow, containing always the
-- valid namespace environment as extra parameter.
--
-- The namespace environment is implemented as a 'Data.AssocList.AssocList'
processWithNsEnv :: ArrowXml a => (NsEnv -> a XmlTree XmlTree) -> NsEnv -> a XmlTree XmlTree
processWithNsEnv = processWithNsEnv1 True
-- | process all element nodes of a document tree with an arrow, containing always the
-- valid namespace environment as extra parameter. Attribute lists are not processed.
--
-- See also: 'processWithNsEnv'
processWithNsEnvWithoutAttrl :: ArrowXml a => (NsEnv -> a XmlTree XmlTree) -> NsEnv -> a XmlTree XmlTree
processWithNsEnvWithoutAttrl = processWithNsEnv1 False
-- -----------------------------------------------------------------------------
-- | attach all valid namespace declarations to the attribute list of element nodes.
--
-- This arrow is useful for document processing, that requires access to all namespace
-- declarations at any element node, but which cannot be done with a simple 'processWithNsEnv'.
attachNsEnv :: ArrowXml a => NsEnv -> a XmlTree XmlTree
attachNsEnv initialEnv
= fromLA $ processWithNsEnvWithoutAttrl attachEnv initialEnv
where
attachEnv :: NsEnv -> LA XmlTree XmlTree
attachEnv env
= ( processAttrl (none `when` isNamespaceDeclAttr)
>>>
addAttrl (catA nsAttrl)
)
`when` isElem
where
nsAttrl :: [LA XmlTree XmlTree]
nsAttrl = map nsDeclToAttr env
nsDeclToAttr :: (XName, XName) -> LA XmlTree XmlTree
nsDeclToAttr (n, uri)
= mkAttr qn (txt (unXN uri))
where
qn :: QName
qn | isNullXName n = newQName xmlnsXName nullXName xmlnsNamespaceXName
| otherwise = newQName n xmlnsXName xmlnsNamespaceXName
-- -----------------------------------------------------------------------------
-- |
-- propagate all namespace declarations \"xmlns:ns=...\" to all element and attribute nodes of a document.
--
-- This arrow does not check for illegal use of namespaces.
-- The real work is done by 'propagateNamespaceEnv'.
--
-- The arrow may be applied repeatedly if neccessary.
propagateNamespaces :: ArrowXml a => a XmlTree XmlTree
propagateNamespaces = fromLA $
propagateNamespaceEnv [ (xmlXName, xmlNamespaceXName)
, (xmlnsXName, xmlnsNamespaceXName)
]
-- |
-- attaches the namespace info given by the namespace table
-- to a tag node and its attributes and children.
propagateNamespaceEnv :: NsEnv -> LA XmlTree XmlTree
propagateNamespaceEnv
= processWithNsEnv addNamespaceUri
where
addNamespaceUri :: NsEnv -> LA XmlTree XmlTree
addNamespaceUri env'
= choiceA [ isElem :-> changeElemName (setNamespace env')
, isAttr :-> attachNamespaceUriToAttr env'
, isPi :-> changePiName (setNamespace env')
, this :-> this
]
attachNamespaceUriToAttr :: NsEnv -> LA XmlTree XmlTree
attachNamespaceUriToAttr attrEnv
= ( ( getQName >>> isA (not . null . namePrefix) )
`guards`
changeAttrName (setNamespace attrEnv)
)
`orElse`
( changeAttrName (const xmlnsQN)
`when`
hasName a_xmlns
)
-- -----------------------------------------------------------------------------
-- |
-- validate the namespace constraints in a whole tree.
--
-- Result is the list of errors concerning namespaces.
-- Predicates 'isWellformedQName', 'isWellformedQualifiedName', 'isDeclaredNamespace'
-- and 'isWellformedNSDecl' are applied to the appropriate elements and attributes.
validateNamespaces :: ArrowXml a => a XmlTree XmlTree
validateNamespaces = fromLA validateNamespaces1
validateNamespaces1 :: LA XmlTree XmlTree
validateNamespaces1
= choiceA [ isRoot :-> ( getChildren >>> validateNamespaces1 ) -- root is correct by definition
, this :-> multi validate1Namespaces
]
-- |
-- a single node for namespace constrains.
validate1Namespaces :: LA XmlTree XmlTree
validate1Namespaces
= choiceA
[ isElem :-> catA [ ( getQName >>> isA ( not . isWellformedQName )
)
`guards` nsError (\ n -> "element name " ++ show n ++ " is not a wellformed qualified name" )
, ( getQName >>> isA ( not . isDeclaredNamespace )
)
`guards` nsError (\ n -> "namespace for prefix in element name " ++ show n ++ " is undefined" )
, doubleOcc $< ( (getAttrl >>> getUniversalName) >>. doubles )
, getAttrl >>> validate1Namespaces
]
, isAttr :-> catA [ ( getQName >>> isA ( not . isWellformedQName )
)
`guards` nsError (\ n -> "attribute name " ++ show n ++ " is not a wellformed qualified name" )
, ( getQName >>> isA ( not . isDeclaredNamespace )
)
`guards` nsError (\ n -> "namespace for prefix in attribute name " ++ show n ++ " is undefined" )
, ( hasNamePrefix a_xmlns >>> xshow getChildren >>> isA null
)
`guards` nsError (\ n -> "namespace value of namespace declaration for " ++ show n ++ " has no value" )
, ( getQName >>> isA (not . isWellformedNSDecl )
)
`guards` nsError (\ n -> "illegal namespace declaration for name " ++ show n ++ " starting with reserved prefix " ++ show "xml" )
]
, isDTD :-> catA [ isDTDDoctype <+> isDTDAttlist <+> isDTDElement <+> isDTDName
>>>
getDTDAttrValue a_name
>>>
( isA (not . isWellformedQualifiedName)
`guards`
nsErr (\ n -> "a DTD part contains a not wellformed qualified Name: " ++ show n)
)
, isDTDAttlist
>>>
getDTDAttrValue a_value
>>>
( isA (not . isWellformedQualifiedName)
`guards`
nsErr (\ n -> "an ATTLIST declaration contains as attribute name a not wellformed qualified Name: " ++ show n)
)
, isDTDEntity <+> isDTDPEntity <+> isDTDNotation
>>>
getDTDAttrValue a_name
>>>
( isA (not . isNCName)
`guards`
nsErr (\ n -> "an entity or notation declaration contains a not wellformed NCName: " ++ show n)
)
]
, isPi :-> catA [ getName
>>>
( isA (not . isNCName)
`guards`
nsErr (\ n -> "a PI contains a not wellformed NCName: " ++ show n)
)
]
]
where
nsError :: (QName -> String) -> LA XmlTree XmlTree
nsError msg
= getQName >>> nsErr msg
nsErr :: (a -> String) -> LA a XmlTree
nsErr msg = arr msg >>> mkError c_err
doubleOcc :: String -> LA XmlTree XmlTree
doubleOcc an
= nsError (\ n -> "multiple occurences of universal name for attributes of tag " ++ show n ++ " : " ++ show an )
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/ParserInterface.hs 0000644 0000000 0000000 00000005552 12752557014 020277 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.ParserInterface
Copyright : Copyright (C) 2010 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
interface to the HXT XML and DTD parsers
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.ParserInterface
( module Text.XML.HXT.Arrow.ParserInterface )
where
import Control.Arrow.ArrowList
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.XmlArrow
import qualified Text.XML.HXT.Parser.HtmlParsec as HP
import qualified Text.XML.HXT.Parser.XmlParsec as XP
import qualified Text.XML.HXT.Parser.XmlDTDParser as DP
-- ------------------------------------------------------------
parseXmlDoc :: ArrowXml a => a (String, String) XmlTree
parseXmlDoc = arr2L XP.parseXmlDocument
parseXmlDTDPart :: ArrowXml a => a (String, XmlTree) XmlTree
parseXmlDTDPart = arr2L XP.parseXmlDTDPart
xreadCont :: ArrowXml a => a String XmlTree
xreadCont = arrL XP.xread
xreadDoc :: ArrowXml a => a String XmlTree
xreadDoc = arrL XP.xreadDoc
parseXmlEntityEncodingSpec
, parseXmlDocEncodingSpec
, removeEncodingSpec :: ArrowXml a => a XmlTree XmlTree
parseXmlDocEncodingSpec = arrL XP.parseXmlDocEncodingSpec
parseXmlEntityEncodingSpec = arrL XP.parseXmlEntityEncodingSpec
removeEncodingSpec = arrL XP.removeEncodingSpec
parseXmlDTDdeclPart :: ArrowXml a => a XmlTree XmlTree
parseXmlDTDdeclPart = arrL DP.parseXmlDTDdeclPart
parseXmlDTDdecl :: ArrowXml a => a XmlTree XmlTree
parseXmlDTDdecl = arrL DP.parseXmlDTDdecl
parseXmlDTDEntityValue :: ArrowXml a => a XmlTree XmlTree
parseXmlDTDEntityValue = arrL DP.parseXmlDTDEntityValue
parseXmlEntityValueAsContent :: ArrowXml a => String -> a XmlTree XmlTree
parseXmlEntityValueAsContent = arrL . XP.parseXmlEntityValueAsContent
parseXmlEntityValueAsAttrValue :: ArrowXml a => String -> a XmlTree XmlTree
parseXmlEntityValueAsAttrValue = arrL . XP.parseXmlEntityValueAsAttrValue
-- ------------------------------------------------------------
parseHtmlDoc :: ArrowList a => a (String, String) XmlTree
parseHtmlDoc = arr2L HP.parseHtmlDocument
hread :: ArrowList a => a String XmlTree
hread = arrL HP.parseHtmlContent
hreadDoc :: ArrowList a => a String XmlTree
hreadDoc = arrL $ HP.parseHtmlDocument "string"
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/Pickle.hs 0000644 0000000 0000000 00000021720 13625174751 016427 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.Pickle
Copyright : Copyright (C) 2005 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
Pickler functions for converting between user defined data types
and XmlTree data. Usefull for persistent storage and retreival
of arbitray data as XML documents
This module is an adaptation of the pickler combinators
developed by Andrew Kennedy
( https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2004\/01\/picklercombinators.pdf )
The difference to Kennedys approach is that the target is not
a list of Chars but a list of XmlTrees. The basic picklers will
convert data into XML text nodes. New are the picklers for
creating elements and attributes.
One extension was neccessary: The unpickling may fail.
Therefore the unpickler has a Maybe result type.
Failure is used to unpickle optional elements
(Maybe data) and lists of arbitray length
There is an example program demonstrating the use
of the picklers for a none trivial data structure.
(see \"examples\/arrows\/pickle\" directory)
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.Pickle
( xpickleDocument -- from this module Text.XML.HXT.Arrow.Pickle
, xunpickleDocument
, xpickleWriteDTD
, xpickleDTD
, checkPickler
, xpickleVal
, xunpickleVal
, thePicklerDTD
, a_addDTD
-- from Text.XML.HXT.Arrow.Pickle.Xml
, pickleDoc
, unpickleDoc
, unpickleDoc'
, showPickled
, PU(..)
, XmlPickler(..)
, xp4Tuple
, xp5Tuple
, xp6Tuple
, xp7Tuple
, xp8Tuple
, xp9Tuple
, xp10Tuple
, xp11Tuple
, xp12Tuple
, xp13Tuple
, xp14Tuple
, xp15Tuple
, xp16Tuple
, xp17Tuple
, xp18Tuple
, xp19Tuple
, xp20Tuple
, xp21Tuple
, xp22Tuple
, xp23Tuple
, xp24Tuple
, xpAddFixedAttr
, xpAddNSDecl
, xpAlt
, xpAttr
, xpAttrFixed
, xpAttrImplied
, xpAttrNS
, xpCheckEmpty
, xpCheckEmptyAttributes
, xpCheckEmptyContents
, xpTextAttr
, xpChoice
, xpDefault
, xpElem
, xpElemNS
, xpElemWithAttrValue
, xpFilterAttr
, xpFilterCont
, xpInt
, xpLift
, xpLiftEither
, xpLiftMaybe
, xpList
, xpList1
, xpMap
, xpOption
, xpPair
, xpPrim
, xpSeq
, xpSeq'
, xpText
, xpText0
, xpTextDT
, xpText0DT
, xpTree
, xpTrees
, xpTriple
, xpUnit
, xpWrap
, xpWrapEither
, xpWrapMaybe
, xpXmlText
, xpZero
-- from Text.XML.HXT.Arrow.Pickle.Schema
, Schema
, Schemas
, DataTypeDescr
)
where
import Control.Arrow.ListArrows
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.ReadDocument
import Text.XML.HXT.Arrow.WriteDocument
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlState
import Text.XML.HXT.Arrow.XmlState.TypeDefs
import Text.XML.HXT.Arrow.Pickle.Xml
import Text.XML.HXT.Arrow.Pickle.Schema
import Text.XML.HXT.Arrow.Pickle.DTD
-- ------------------------------------------------------------
-- the arrow interface for pickling and unpickling
-- | store an arbitray value in a persistent XML document
--
-- The pickler converts a value into an XML tree, this is written out with
-- 'Text.XML.HXT.Arrow.writeDocument'. The option list is passed to 'Text.XML.HXT.Arrow.writeDocument'
--
-- An option evaluated by this arrow is 'a_addDTD'.
-- If 'a_addDTD' is set ('v_1'), the pickler DTD is added as an inline DTD into the document.
xpickleDocument :: PU a -> SysConfigList -> String -> IOStateArrow s a XmlTree
xpickleDocument xp config dest
= localSysEnv
$
configSysVars config
>>>
xpickleVal xp
>>>
traceMsg 1 "xpickleVal applied"
>>>
ifA ( getSysAttr a_addDTD >>> isA (== v_1) )
( replaceChildren ( (constA undefined >>> xpickleDTD xp >>> getChildren)
<+>
getChildren
)
)
this
>>>
writeDocument [] dest
-- | Option for generating and adding DTD when document is pickled
a_addDTD :: String
a_addDTD = "addDTD"
-- | read an arbitray value from an XML document
--
-- The document is read with 'Text.XML.HXT.Arrow.readDocument'. Options are passed
-- to 'Text.XML.HXT.Arrow.readDocument'. The conversion from XmlTree is done with the
-- pickler.
--
-- @ xpickleDocument xp al dest >>> xunpickleDocument xp al' dest @ is the identity arrow
-- when applied with the appropriate options. When during pickling indentation is switched on,
-- the whitespace must be removed during unpickling.
xunpickleDocument :: PU a -> SysConfigList -> String -> IOStateArrow s b a
xunpickleDocument xp conf src
= readDocument conf src
>>>
traceMsg 1 ("xunpickleVal for " ++ show src ++ " started")
>>>
xunpickleVal xp
>>>
traceMsg 1 ("xunpickleVal for " ++ show src ++ " finished")
-- | Write out the DTD generated out of a pickler. Calls 'xpicklerDTD'
xpickleWriteDTD :: PU b -> SysConfigList -> String -> IOStateArrow s b XmlTree
xpickleWriteDTD xp config dest
= xpickleDTD xp
>>>
writeDocument config dest
-- | The arrow for generating the DTD out of a pickler
--
-- A DTD is generated from a pickler and check for consistency.
-- Errors concerning the DTD are issued.
xpickleDTD :: PU b -> IOStateArrow s b XmlTree
xpickleDTD xp = root [] [ constL (thePicklerDTD xp)
>>>
filterErrorMsg
]
-- | An arrow for checking picklers
--
-- A value is transformed into an XML document by a given pickler,
-- the associated DTD is extracted from the pickler and checked,
-- the document including the DTD is tranlated into a string,
-- this string is read and validated against the included DTD,
-- and unpickled.
-- The last step is the equality with the input.
--
-- If the check succeeds, the arrow works like this, else it fails.
checkPickler :: Eq a => PU a -> IOStateArrow s a a
checkPickler xp = ( ( ( ( xpickleVal xp
>>>
replaceChildren ( (constA undefined >>> xpickleDTD xp >>> getChildren)
<+>
getChildren
)
>>>
writeDocumentToString []
>>>
readFromString [withValidate True]
>>>
xunpickleVal xp
)
&&&
this
)
>>> isA (uncurry (==))
)
`guards` this
)
`orElse` issueErr "pickle/unpickle combinators failed"
-- | The arrow version of the pickler function
xpickleVal :: ArrowXml a => PU b -> a b XmlTree
xpickleVal xp = arr (pickleDoc xp)
-- | The arrow version of the unpickler function
{- old version, runs outside IO
xunpickleVal :: ArrowXml a => PU b -> a XmlTree b
xunpickleVal xp = ( processChildren (none `whenNot` isElem) -- remove all stuff surrounding the root element
`when`
isRoot
)
>>>
arrL (maybeToList . unpickleDoc xp)
-- -}
xunpickleVal :: PU b -> IOStateArrow s XmlTree b
xunpickleVal xp = ( processChildren (none `whenNot` isElem) -- remove all stuff surrounding the root element
`when`
isRoot
)
>>>
arr (unpickleDoc' xp)
>>>
( ( (issueFatal $< arr ("document unpickling failed\n" ++))
>>>
none
)
|||
this
)
-- | Compute the associated DTD of a pickler
thePicklerDTD :: PU b -> XmlTrees
thePicklerDTD = dtdDescrToXml . dtdDescr . theSchema
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/Pickle/DTD.hs 0000644 0000000 0000000 00000026672 12752557014 017052 0 ustar 00 0000000 0000000 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
-- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.Pickle.DTD
Copyright : Copyright (C) 2005 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
Version : $Id$
Functions for converting a pickler schema
into a DTD
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.Pickle.DTD
where
import Data.Maybe
import qualified Text.XML.HXT.DOM.XmlNode as XN
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.Pickle.Schema
import Text.XML.HXT.XMLSchema.DataTypeLibW3CNames
-- ------------------------------------------------------------
data DTDdescr = DTDdescr Name Schemas [(Name,Schemas)]
instance Show DTDdescr where
show (DTDdescr n es as)
= "root element: " ++ n ++ "\n"
++
"elements:\n"
++
concatMap ((++ "\n") .show) es
++
"attributes:\n"
++
concatMap ((++ "\n") . showAttr) as
where
showAttr (n1, sc) = n1 ++ ": " ++ show sc
-- ------------------------------------------------------------
-- | convert a DTD descr into XmlTrees
dtdDescrToXml :: DTDdescr -> XmlTrees
dtdDescrToXml (DTDdescr rt es as)
= checkErr (null rt) "no unique root element found in pickler DTD, add an \"xpElem\" pickler"
++
concatMap (checkErr True . ("no element decl found in: " ++) . show) (filter (not . isScElem) es)
++
concatMap (uncurry checkContentModell . \ (Element n sc) -> (n,sc)) es1
++
concatMap (uncurry checkAttrModell) as
++
[ XN.mkDTDElem DOCTYPE docAttrs ( concatMap elemDTD es1
++
concatMap (uncurry attrDTDs) as
) ]
where
es1 = filter isScElem es
docAttrs = [(a_name, if null rt then "no-unique-root-element-found" else rt)]
elemDTD (Element n sc)
| lookup1 a_type al == "unknown"
= cl
| otherwise
= [ XN.mkDTDElem ELEMENT ((a_name, n) : al) cl ]
where
(al, cl) = scContToXml sc
elemDTD _
= error "illegal case in elemDTD"
attrDTDs en = concatMap (attrDTD en)
attrDTD en (Attribute an sc)
= [ XN.mkDTDElem ATTLIST ((a_name, en) : (a_value, an) : al) cl ]
where
(al, cl) = scAttrToXml sc
attrDTD _ _ = error "illegal case in attrDTD"
checkAttrModell :: Name -> Schemas -> XmlTrees
checkAttrModell n = concatMap (checkAM n)
checkAM :: Name -> Schema -> XmlTrees
checkAM en (Attribute an sc) = checkAMC en an sc
checkAM _ _ = []
checkAMC :: Name -> Name -> Schema -> XmlTrees
checkAMC _en _an (CharData _) = []
checkAMC en an sc
| isScCharData sc = []
| isScList sc
&&
(sc_1 sc == scNmtoken)
= []
| isScOpt sc = checkAMC en an (sc_1 sc)
| otherwise = foundErr
( "weird attribute type found for attribute "
++ show an
++ " for element "
++ show en
++ "\n\t(internal structure: " ++ show sc ++ ")"
++ "\n\thint: create an element instead of an attribute for "
++ show an
)
-- checkContentModell1 n sc = foundErr (n ++ " : " ++ show sc) ++ checkContentModell n sc
checkContentModell :: Name -> Schema -> XmlTrees
checkContentModell _ Any
= []
checkContentModell _ (ElemRef _)
= []
checkContentModell _ (CharData _)
= []
checkContentModell _ (Seq [])
= []
checkContentModell n (Seq scs)
= checkErr pcDataInCM
( "PCDATA found in a sequence spec in the content modell for "
++ show n
++ "\n\thint: create an element for this data"
)
++
checkErr somethingElseInCM
( "something weired found in a sequence spec in the content modell for "
++ show n
)
++
concatMap (checkContentModell n) scs
where
pcDataInCM = any isScCharData scs
somethingElseInCM = any (\ sc -> not (isScSARE sc) && not (isScCharData sc)) scs
checkContentModell n (Alt scs)
= checkErr mixedCM
( "PCDATA mixed up with illegal content spec in mixed contents for "
++ show n
++ "\n\thint: create an element for this data"
)
++
concatMap (checkContentModell n) scs
where
mixedCM
| any isScCharData scs
= any (not . isScElemRef) . filter (not . isScCharData) $ scs
| otherwise
= False
checkContentModell _ (Rep _ _ (ElemRef _))
= []
checkContentModell n (Rep _ _ sc@(Seq _))
= checkContentModell n sc
checkContentModell n (Rep _ _ sc@(Alt _))
= checkContentModell n sc
checkContentModell n (Rep _ _ _)
= foundErr
( "illegal content spec found for "
++ show n
)
checkContentModell _ _
= []
scContToXml :: Schema -> (Attributes, XmlTrees)
scContToXml Any = ( [(a_type, v_any)], [] )
scContToXml (CharData _) = ( [(a_type, v_pcdata)], [] )
scContToXml (Seq []) = ( [(a_type, v_empty)], [] )
scContToXml sc@(ElemRef _) = scContToXml (Seq [sc])
scContToXml sc@(Seq _) = ( [(a_type, v_children)]
, scCont [] sc
)
scContToXml sc@(Alt sc1)
| isMixed sc1 = ( [(a_type, v_mixed)]
, scCont [ (a_modifier, "*") ] sc
)
| otherwise = ( [(a_type, v_children)]
, scCont [] sc
)
where
isMixed = not . null . filter isScCharData
scContToXml sc@(Rep _ _ _) = ( [(a_type, v_children)]
, scCont [] sc
)
scContToXml _sc = ( [(a_type, v_any)] -- default: everything is allowed
, []
)
scWrap :: Schema -> Schema
scWrap sc@(Alt _) = sc
scWrap sc@(Seq _) = sc
scWrap sc@(Rep _ _ _) = sc
scWrap sc = Seq [sc]
scCont :: Attributes -> Schema -> XmlTrees
scCont al (Seq scs) = scConts ((a_kind, v_seq ) : al) scs
scCont al (Alt scs) = scConts ((a_kind, v_choice) : al) scs
scCont al (Rep 0 (-1) sc) = scCont ((a_modifier, "*") : al) (scWrap sc)
scCont al (Rep 1 (-1) sc) = scCont ((a_modifier, "+") : al) (scWrap sc)
scCont al (Rep 0 1 sc) = scCont ((a_modifier, "?") : al) (scWrap sc)
scCont al (ElemRef n) = [XN.mkDTDElem NAME ((a_name, n) : al) []]
scCont _ (CharData _) = [XN.mkDTDElem NAME [(a_name, "#PCDATA")] []]
scCont _ _sc = [XN.mkDTDElem NAME [(a_name, "bad-content-spec")] []] -- error case
scConts :: Attributes -> Schemas -> XmlTrees
scConts al scs = [XN.mkDTDElem CONTENT al (concatMap (scCont []) scs)]
scAttrToXml :: Schema -> (Attributes, XmlTrees)
scAttrToXml sc
| isScFixed sc = ( [ (a_kind, k_fixed)
, (a_type, k_cdata)
, (a_default, (xsdParam xsd_enumeration sc))
]
, [])
| isScEnum sc = ( [ (a_kind, k_required)
, (a_type, k_enumeration)
]
, map (\ n -> XN.mkDTDElem NAME [(a_name, n)] []) enums
)
| isScCharData sc = ( [ (a_kind, k_required)
, (a_type, d_type)
]
, [])
| isScOpt sc = (addEntry a_kind k_implied al, cl)
| isScList sc = (addEntry a_type k_nmtokens al, cl)
| otherwise = ( [ (a_kind, k_fixed)
, (a_default, "bad-attribute-type: " ++ show sc)
]
, [] )
where
(al, cl) = scAttrToXml (sc_1 sc)
d_type
| sc == scNmtoken = k_nmtoken
| otherwise = k_cdata
enums = words . xsdParam xsd_enumeration $ sc
checkErr :: Bool -> String -> XmlTrees
checkErr True s = [XN.mkError c_err s]
checkErr _ _ = []
foundErr :: String -> XmlTrees
foundErr = checkErr True
-- ------------------------------------------------------------
-- | convert a pickler schema into a DTD descr
dtdDescr :: Schema -> DTDdescr
dtdDescr sc
= DTDdescr rt es1 as
where
es = elementDeclarations sc
es1 = map remAttrDec es
as = filter (not. null . snd) . concatMap attrDec $ es
rt = fromMaybe "" . elemName $ sc
elementDeclarations :: Schema -> Schemas
elementDeclarations sc = elemRefs . elementDecs [] $ [sc]
elementDecs :: Schemas -> Schemas -> Schemas
elementDecs es []
= es
elementDecs es (s:ss)
= elementDecs (elemDecs s) ss
where
elemDecs (Seq scs) = elementDecs es scs
elemDecs (Alt scs) = elementDecs es scs
elemDecs (Rep _ _ sc) = elemDecs sc
elemDecs e@(Element n sc)
| n `elem` elemNames es = es
| otherwise = elementDecs (e:es) [sc]
elemDecs _ = es
elemNames :: Schemas -> [Name]
elemNames = concatMap (maybeToList . elemName)
elemName :: Schema -> Maybe Name
elemName (Element n _) = Just n
elemName _ = Nothing
elemRefs :: Schemas -> Schemas
elemRefs = map elemRef
where
elemRef (Element n sc) = Element n (pruneElem sc)
elemRef sc = sc
pruneElem (Element n _) = ElemRef n
pruneElem (Seq scs) = Seq (map pruneElem scs)
pruneElem (Alt scs) = Alt (map pruneElem scs)
pruneElem (Rep l u sc) = Rep l u (pruneElem sc)
pruneElem sc = sc
attrDec :: Schema -> [(Name, Schemas)]
attrDec (Element n sc)
= [(n, attrDecs sc)]
where
attrDecs a@(Attribute _ _) = [a]
attrDecs (Seq scs) = concatMap attrDecs scs
attrDecs _ = []
attrDec _ = []
remAttrDec :: Schema -> Schema
remAttrDec (Element n sc)
= Element n (remA sc)
where
remA (Attribute _ _) = scEmpty
remA (Seq scs) = scSeqs . map remA $ scs
remA sc1 = sc1
remAttrDec _
= error "illegal case in remAttrDec"
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/Pickle/Schema.hs 0000644 0000000 0000000 00000016152 12752557014 017627 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.Pickle.Schema
Copyright : Copyright (C) 2005 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
Version : $Id$
Datatypes and functions for building a content model
for XML picklers. A schema is part of every pickler
and can be used to derive a corrensponding DTD (or Relax NG schema).
This schema further enables checking the picklers.
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.Pickle.Schema
where
import Text.XML.HXT.DOM.TypeDefs
import Text.XML.HXT.XMLSchema.DataTypeLibW3CNames
import Data.List
( sort )
-- ------------------------------------------------------------
-- | The datatype for modelling the structure of an
data Schema = Any
| Seq { sc_l :: [Schema]
}
| Alt { sc_l :: [Schema]
}
| Rep { sc_lb :: Int
, sc_ub :: Int
, sc_1 :: Schema
}
| Element { sc_n :: Name
, sc_1 :: Schema
}
| Attribute { sc_n :: Name
, sc_1 :: Schema
}
| ElemRef { sc_n :: Name
}
| CharData { sc_dt :: DataTypeDescr
}
deriving (Eq, Show)
type Name = String
type Schemas = [Schema]
data DataTypeDescr = DTDescr { dtLib :: String
, dtName :: String
, dtParams :: Attributes
}
deriving (Show)
instance Eq DataTypeDescr where
x1 == x2 = dtLib x1 == dtLib x2
&&
dtName x1 == dtName x2
&&
sort (dtParams x1) == sort (dtParams x2)
-- ------------------------------------------------------------
-- | test: is schema a simple XML Schema datatype
isScXsd :: (String -> Bool) -> Schema -> Bool
isScXsd p (CharData (DTDescr lib n _ps))
= lib == w3cNS
&&
p n
isScXsd _ _ = False
-- | test: is type a fixed value attribute type
isScFixed :: Schema -> Bool
isScFixed sc = isScXsd (== xsd_string) sc
&&
((== 1) . length . words . xsdParam xsd_enumeration) sc
isScEnum :: Schema -> Bool
isScEnum sc = isScXsd (== xsd_string) sc
&&
(not . null . xsdParam xsd_enumeration) sc
isScElem :: Schema -> Bool
isScElem (Element _ _) = True
isScElem _ = False
isScAttr :: Schema -> Bool
isScAttr (Attribute _ _)= True
isScAttr _ = False
isScElemRef :: Schema -> Bool
isScElemRef (ElemRef _) = True
isScElemRef _ = False
isScCharData :: Schema -> Bool
isScCharData (CharData _)= True
isScCharData _ = False
isScSARE :: Schema -> Bool
isScSARE (Seq _) = True
isScSARE (Alt _) = True
isScSARE (Rep _ _ _) = True
isScSARE (ElemRef _) = True
isScSARE _ = False
isScList :: Schema -> Bool
isScList (Rep 0 (-1) _) = True
isScList _ = False
isScOpt :: Schema -> Bool
isScOpt (Rep 0 1 _) = True
isScOpt _ = False
-- | access an attribute of a descr of an atomic type
xsdParam :: String -> Schema -> String
xsdParam n (CharData dtd)
= lookup1 n (dtParams dtd)
xsdParam _ _ = ""
-- ------------------------------------------------------------
-- smart constructors for Schema datatype
-- ------------------------------------------------------------
--
-- predefined xsd data types for representation of DTD types
scDT :: String -> String -> Attributes -> Schema
scDT l n rl = CharData $ DTDescr l n rl
scDTxsd :: String -> Attributes -> Schema
scDTxsd = scDT w3cNS
scString :: Schema
scString = scDTxsd xsd_string []
scString1 :: Schema
scString1 = scDTxsd xsd_string [(xsd_minLength, "1")]
scFixed :: String -> Schema
scFixed v = scDTxsd xsd_string [(xsd_enumeration, v)]
scEnum :: [String] -> Schema
scEnum vs = scFixed (unwords vs)
scNmtoken :: Schema
scNmtoken = scDTxsd xsd_NCName []
scNmtokens :: Schema
scNmtokens = scList scNmtoken
-- ------------------------------------------------------------
scEmpty :: Schema
scEmpty = Seq []
scSeq :: Schema -> Schema -> Schema
scSeq (Seq []) sc2 = sc2
scSeq sc1 (Seq []) = sc1
scSeq (Seq scs1) (Seq scs2) = Seq (scs1 ++ scs2) -- prevent nested Seq expr
scSeq (Seq scs1) sc2 = Seq (scs1 ++ [sc2])
scSeq sc1 (Seq scs2) = Seq (sc1 : scs2)
scSeq sc1 sc2 = Seq [sc1,sc2]
scSeqs :: [Schema] -> Schema
scSeqs = foldl scSeq scEmpty
scNull :: Schema
scNull = Alt []
scAlt :: Schema -> Schema -> Schema
scAlt (Alt []) sc2 = sc2
scAlt sc1 (Alt []) = sc1
scAlt (Alt scs1) (Alt scs2) = Alt (scs1 ++ scs2) -- prevent nested Alt expr
scAlt (Alt scs1) sc2 = Alt (scs1 ++ [sc2])
scAlt sc1 (Alt scs2) = Alt (sc1 : scs2)
scAlt sc1 sc2 = Alt [sc1,sc2]
scAlts :: [Schema] -> Schema
scAlts = foldl scAlt scNull
scOption :: Schema -> Schema
scOption (Seq []) = scEmpty
scOption (Attribute n sc2) = Attribute n (scOption sc2)
scOption sc1
| sc1 == scString1 = scString
| otherwise = scOpt sc1
scList :: Schema -> Schema
scList = scRep 0 (-1)
scList1 :: Schema -> Schema
scList1 = scRep 1 (-1)
scOpt :: Schema -> Schema
scOpt = scRep 0 1
scRep :: Int -> Int -> Schema -> Schema
scRep l u sc1 = Rep l u sc1
scElem :: String -> Schema -> Schema
scElem n sc1 = Element n sc1
scAttr :: String -> Schema -> Schema
scAttr n sc1 = Attribute n sc1
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/Pickle/Xml.hs 0000644 0000000 0000000 00000175115 14025460147 017167 0 ustar 00 0000000 0000000 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TypeSynonymInstances #-}
-- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.Pickle.Xml
Copyright : Copyright (C) 2005-2021 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
Pickler functions for converting between user defined data types
and XmlTree data. Usefull for persistent storage and retreival
of arbitray data as XML documents.
This module is an adaptation of the pickler combinators
developed by Andrew Kennedy
( https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2004\/01\/picklercombinators.pdf )
The difference to Kennedys approach is that the target is not
a list of Chars but a list of XmlTrees. The basic picklers will
convert data into XML text nodes. New are the picklers for
creating elements and attributes.
One extension was neccessary: The unpickling may fail.
Old: Therefore the unpickler has a Maybe result type.
Failure is used to unpickle optional elements
(Maybe data) and lists of arbitray length.
Since hxt-9.2.0: The unpicklers are implemented as
a parser monad with an Either err val result type.
This enables appropriate error messages , when unpickling
XML stuff, that is not generated with the picklers and which contains
some elements and/or attributes that are not handled when unpickling.
There is an example program demonstrating the use
of the picklers for a none trivial data structure.
(see \"examples\/arrows\/pickle\" directory in the hxt distribution)
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.Pickle.Xml
where
#if MIN_VERSION_base(4,8,0)
#else
import Control.Applicative (Applicative (..))
#endif
import Control.Arrow.ArrowList
import Control.Arrow.ListArrows
import Control.Monad ()
#if MIN_VERSION_mtl(2,2,0)
import Control.Monad.Except (MonadError (..))
#else
import Control.Monad.Error (MonadError (..))
#endif
import Control.Monad.State (MonadState (..), gets,
modify)
import Data.Char (isDigit)
import Data.List (foldl')
import Data.Map (Map)
import qualified Data.Map as M
import Data.Maybe (fromJust, fromMaybe)
import Text.XML.HXT.Arrow.Edit (xshowEscapeXml)
import Text.XML.HXT.Arrow.Pickle.Schema
import Text.XML.HXT.Arrow.ReadDocument (xread)
import Text.XML.HXT.Arrow.WriteDocument (writeDocumentToString)
import Text.XML.HXT.Arrow.XmlState
import Text.XML.HXT.DOM.Interface
import qualified Text.XML.HXT.DOM.ShowXml as XN
import qualified Text.XML.HXT.DOM.XmlNode as XN
{- just for embedded test cases, prefix with -- to activate
import Text.XML.HXT.Arrow.XmlArrow
import qualified Control.Arrow.ListArrows as X
-- -}
{- debug code
import qualified Debug.Trace as T
-- -}
-- ------------------------------------------------------------
data St = St { attributes :: [XmlTree]
, contents :: [XmlTree]
, nesting :: Int -- the remaining 3 fields are used only for unpickling
, pname :: QName -- to generate appropriate error messages
, pelem :: Bool
} deriving (Show)
data PU a = PU { appPickle :: Pickler a -- (a, St) -> St
, appUnPickle :: Unpickler a
, theSchema :: Schema
}
-- --------------------
--
-- The pickler
type Pickler a = a -> St -> St
-- --------------------
--
-- The unpickler monad, a combination of state and error monad
newtype Unpickler a = UP { runUP :: St -> (UnpickleVal a, St) }
type UnpickleVal a = Either UnpickleErr a
type UnpickleErr = (String, St)
instance Functor Unpickler where
fmap f u = UP $ \ st ->
let (r, st') = runUP u st in (fmap f r, st')
instance Applicative Unpickler where
pure a = UP $ \ st -> (Right a, st)
uf <*> ua = UP $ \ st ->
let (f, st') = runUP uf st in
case f of
Left err -> (Left err, st')
Right f' -> runUP (fmap f' ua) st'
instance Monad Unpickler where
return = pure
u >>= f = UP $ \ st ->
let (r, st') = runUP u st in
case r of
Left err -> (Left err, st')
Right v -> runUP (f v) st'
instance MonadState St Unpickler where
get = UP $ \ st -> (Right st, st)
put st = UP $ \ _ -> (Right (), st)
instance MonadError UnpickleErr Unpickler where
throwError err
= UP $ \ st -> (Left err, st)
-- redundant, not (yet) used
catchError u handler
= UP $ \ st ->
let (r, st') = runUP u st in
case r of
Left err -> runUP (handler err) st -- not st', state will be reset in error case
_ -> (r, st')
throwMsg :: String -> Unpickler a
throwMsg msg = UP $ \ st -> (Left (msg, st), st)
-- | Choice combinator for unpickling
--
-- first 2 arguments are applied sequentially, but if the 1. one fails the
-- 3. arg is applied
mchoice :: Unpickler a -> (a -> Unpickler b) -> Unpickler b -> Unpickler b
mchoice u f v = UP $ \ st ->
let (r, st') = runUP u st in
case r of
Right x
-> runUP (f x) st' -- success
Left e@(_msg, st'')
-> if nesting st'' == nesting st -- true: failure in parsing curr contents
then runUP v st -- try the alternative unpickler
else (Left e, st') -- false: failure in unpickling a subtree of
-- the current contents, so the whole unpickler
-- must fail
-- | Lift a Maybe value into the Unpickler monad.
--
-- The 1. arg is the attached error message
liftMaybe :: String -> Maybe a -> Unpickler a
liftMaybe e v = case v of
Nothing -> throwMsg e
Just x -> return x
-- | Lift an Either value into the Unpickler monad
liftUnpickleVal :: UnpickleVal a -> Unpickler a
liftUnpickleVal v = UP $ \ st -> (v, st)
-- --------------------
getCont :: Unpickler XmlTree
getCont = do cs <- gets contents
case cs of
[] -> throwMsg "no more contents to be read"
(x : xs) -> do modify (\ s -> s {contents = xs})
return x
getAtt :: QName -> Unpickler XmlTree
getAtt qn = do as <- gets attributes
case findAtt as of
Nothing -> throwMsg $ "no attribute value found for " ++ show qn
Just (a, as') -> do modify (\ s -> s {attributes = as'})
return $ nonEmptyVal a
where
findAtt = findElem (maybe False (== qn) . XN.getAttrName)
nonEmptyVal a'
| null (XN.getChildren a') = XN.setChildren [et] a'
| otherwise = a'
where
et = XN.mkText ""
getNSAtt :: String -> Unpickler ()
getNSAtt ns = do as <- gets attributes
case findNS as of
Nothing -> throwMsg $
"no namespace declaration found for namespace " ++ show ns
Just (_a, as') -> do modify (\ s -> s {attributes = as'})
return ()
where
isNS t = (fromMaybe False . fmap isNameSpaceName . XN.getAttrName $ t)
&&
XN.xshow (XN.getChildren t) == ns
findNS = findElem isNS
-- --------------------
emptySt :: St
emptySt = St { attributes = []
, contents = []
, nesting = 0
, pname = mkName "/"
, pelem = True
}
putAtt :: QName -> [XmlTree] -> St -> St
putAtt qn v s = s {attributes = x : attributes s}
where
x = XN.mkAttr qn v
{-# INLINE putAtt #-}
putCont :: XmlTree -> St -> St
putCont x s = s {contents = x : contents s}
{-# INLINE putCont #-}
-- --------------------
--
-- generally useful function for splitting a value from a list
findElem :: (a -> Bool) -> [a] -> Maybe (a, [a])
findElem p = find' id
where
find' _ [] = Nothing
find' prefix (x : xs)
| p x = Just (x, prefix xs)
| otherwise = find' (prefix . (x:)) xs
-- ------------------------------------------------------------
--
-- | Format the context of an error message.
formatSt :: St -> String
formatSt st = fcx ++
fa (attributes st) ++
fc (contents st)
where
fcx = "\n" ++ "context: " ++
( if pelem st
then "element"
else "attribute"
) ++
" " ++ show (pname st)
fc [] = ""
fc cs = "\n" ++ "contents: " ++ formatXML cs
fa [] = ""
fa as = "\n" ++ "attributes: " ++ formatXML as
formatXML = format 80 . showXML
showXML = concat . runLA ( xshowEscapeXml unlistA )
format n s = let s' = take (n + 1) s in
if length s' <= n then s' else take n s ++ "..."
-- ------------------------------------------------------------
-- | conversion of an arbitrary value into an XML document tree.
--
-- The pickler, first parameter, controls the conversion process.
-- Result is a complete document tree including a root node
pickleDoc :: PU a -> a -> XmlTree
pickleDoc p v = XN.mkRoot (attributes st) (contents st)
where
st = appPickle p v emptySt
-- | Conversion of an XML document tree into an arbitrary data type
--
-- The inverse of 'pickleDoc'.
-- This law should hold for all picklers: @ unpickle px . pickle px $ v == Just v @.
-- Not every possible combination of picklers does make sense.
-- For reconverting a value from an XML tree, is becomes neccessary,
-- to introduce \"enough\" markup for unpickling the value
unpickleDoc :: PU a -> XmlTree -> Maybe a
unpickleDoc p = either (const Nothing) Just
. unpickleDoc' p
-- | Like unpickleDoc but with a (sometimes) useful error message, when unpickling failed.
unpickleDoc' :: PU a -> XmlTree -> Either String a
unpickleDoc' p t
| XN.isRoot t = mapErr $
unpickleElem' p 0 t
| otherwise = unpickleDoc' p (XN.mkRoot [] [t])
where
mapErr = either ( Left .
\ (msg, st) -> msg ++ formatSt st
) Right
-- | The main entry for unpickling, called by unpickleDoc
unpickleElem' :: PU a -> Int -> XmlTree -> UnpickleVal a
unpickleElem' p l t
= -- T.trace ("unpickleElem': " ++ show t) $
( fst . runUP (appUnPickle p) )
$ St { attributes = fromMaybe [] .
XN.getAttrl $ t
, contents = XN.getChildren t
, nesting = l
, pname = fromJust .
XN.getName $ t
, pelem = XN.isElem t
}
-- ------------------------------------------------------------
-- | Pickles a value, then writes the document to a string.
showPickled :: (XmlPickler a) => SysConfigList -> a -> String
showPickled a = concat . (pickleDoc xpickle >>> runLA (writeDocumentToString a))
-- ------------------------------------------------------------
-- | The zero pickler
--
-- Encodes nothing, fails always during unpickling
xpZero :: String -> PU a
xpZero err = PU { appPickle = const id
, appUnPickle = throwMsg err
, theSchema = scNull
}
-- | unit pickler
xpUnit :: PU ()
xpUnit = xpLift ()
-- | Check EOF pickler.
--
-- When pickling, this behaves like the unit pickler.
-- The unpickler fails, when there is some unprocessed XML contents left.
xpCheckEmptyContents :: PU a -> PU a
xpCheckEmptyContents pa = PU { appPickle = appPickle pa
, appUnPickle = do res <- appUnPickle pa
cs <- gets contents
if null cs
then return res
else contentsLeft
, theSchema = scNull
}
where
contentsLeft = throwMsg
"xpCheckEmptyContents: unprocessed XML content detected"
-- | Like xpCheckEmptyContents, but checks the attribute list
xpCheckEmptyAttributes :: PU a -> PU a
xpCheckEmptyAttributes pa
= PU { appPickle = appPickle pa
, appUnPickle = do res <- appUnPickle pa
as <- gets attributes
if null as
then return res
else attributesLeft
, theSchema = scNull
}
where
attributesLeft = throwMsg
"xpCheckEmptyAttributes: unprocessed XML attribute(s) detected"
-- | Composition of xpCheckEmptyContents and xpCheckAttributes
xpCheckEmpty :: PU a -> PU a
xpCheckEmpty = xpCheckEmptyAttributes . xpCheckEmptyContents
xpLift :: a -> PU a
xpLift x = PU { appPickle = const id
, appUnPickle = return x
, theSchema = scEmpty
}
-- | Lift a Maybe value to a pickler.
--
-- @Nothing@ is mapped to the zero pickler, @Just x@ is pickled with @xpLift x@.
xpLiftMaybe :: Maybe a -> PU a
xpLiftMaybe v = (xpLiftMaybe'' v) { theSchema = scOption scEmpty }
where
xpLiftMaybe'' Nothing = xpZero "xpLiftMaybe: got Nothing"
xpLiftMaybe'' (Just x) = xpLift x
xpLiftEither :: Either String a -> PU a
xpLiftEither v = (xpLiftEither'' v) { theSchema = scOption scEmpty }
where
xpLiftEither'' (Left err) = xpZero err
xpLiftEither'' (Right x) = xpLift x
-- | Combine two picklers sequentially.
--
-- If the first fails during
-- unpickling, the whole unpickler fails
xpSeq :: (b -> a) -> PU a -> (a -> PU b) -> PU b
xpSeq f pa k
= PU { appPickle = ( \ b ->
let a = f b in
appPickle pa a . appPickle (k a) b
)
, appUnPickle = appUnPickle pa >>= (appUnPickle . k)
, theSchema = undefined
}
-- | First apply a fixed pickler/unpickler, then a 2. one
--
-- If the first fails during unpickling, the whole pickler fails.
-- This can be used to check some properties of the input, e.g. whether
-- a given fixed attribute or a namespace declaration exists
-- ('xpAddFixedAttr', 'xpAddNSDecl')
-- or to filter the input, e.g. to ignore some elements or attributes
-- ('xpFilterCont', 'xpFilterAttr').
--
-- When pickling, this can be used to insert some fixed XML pieces,
-- e.g. namespace declarations,
-- class attributes or other stuff.
xpSeq' :: PU () -> PU a -> PU a
xpSeq' pa = xpWrap ( snd
, \ y -> ((), y)
) .
xpPair pa
-- | combine two picklers with a choice
--
-- Run two picklers in sequence like with xpSeq.
-- If during unpickling the first one fails,
-- an alternative pickler (first argument) is applied.
-- This pickler is only used as combinator for unpickling.
xpChoice :: PU b -> PU a -> (a -> PU b) -> Unpickler b
xpChoice pb pa k = mchoice (appUnPickle pa) (appUnPickle . k) (appUnPickle pb)
-- | map value into another domain and apply pickler there
--
-- One of the most often used picklers.
xpWrap :: (a -> b, b -> a) -> PU a -> PU b
xpWrap (i, j) pa = (xpSeq j pa (xpLift . i)) { theSchema = theSchema pa }
-- | like 'xpWrap', but if the inverse mapping is undefined, the unpickler fails
--
-- Map a value into another domain. If the inverse mapping is
-- undefined (Nothing), the unpickler fails
--
-- Deprecated: Use xpWrapEither, this gives better error messages
xpWrapMaybe :: (a -> Maybe b, b -> a) -> PU a -> PU b
xpWrapMaybe (i, j) pa = (xpSeq j pa (xpLiftMaybe . i)) { theSchema = theSchema pa }
-- | like 'xpWrap', but if the inverse mapping is undefined, the unpickler fails
--
-- Map a value into another domain. If the inverse mapping is
-- undefined, the unpickler fails with an error message in the Left component
xpWrapEither :: (a -> Either String b, b -> a) -> PU a -> PU b
xpWrapEither (i, j) pa = (xpSeq j pa (xpLiftEither . i)) { theSchema = theSchema pa }
-- ------------------------------------------------------------
-- | pickle a pair of values sequentially
--
-- Used for pairs or together with wrap for pickling
-- algebraic data types with two components
xpPair :: PU a -> PU b -> PU (a, b)
xpPair pa pb
= ( xpSeq fst pa (\ a ->
xpSeq snd pb (\ b ->
xpLift (a,b)))
) { theSchema = scSeq (theSchema pa) (theSchema pb) }
-- | Like 'xpPair' but for triples
xpTriple :: PU a -> PU b -> PU c -> PU (a, b, c)
xpTriple pa pb pc
= xpWrap (toTriple, fromTriple) (xpPair pa (xpPair pb pc))
where
toTriple ~(a, ~(b, c)) = (a, b, c )
fromTriple ~(a, b, c ) = (a, (b, c))
-- | Like 'xpPair' and 'xpTriple' but for 4-tuples
xp4Tuple :: PU a -> PU b -> PU c -> PU d -> PU (a, b, c, d)
xp4Tuple pa pb pc pd
= xpWrap (toQuad, fromQuad) (xpPair pa (xpPair pb (xpPair pc pd)))
where
toQuad ~(a, ~(b, ~(c, d))) = (a, b, c, d )
fromQuad ~(a, b, c, d ) = (a, (b, (c, d)))
-- | Like 'xpPair' and 'xpTriple' but for 5-tuples
xp5Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU (a, b, c, d, e)
xp5Tuple pa pb pc pd pe
= xpWrap (toQuint, fromQuint) (xpPair pa (xpPair pb (xpPair pc (xpPair pd pe))))
where
toQuint ~(a, ~(b, ~(c, ~(d, e)))) = (a, b, c, d, e )
fromQuint ~(a, b, c, d, e ) = (a, (b, (c, (d, e))))
-- | Like 'xpPair' and 'xpTriple' but for 6-tuples
xp6Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU (a, b, c, d, e, f)
xp6Tuple pa pb pc pd pe pf
= xpWrap (toSix, fromSix) (xpPair pa (xpPair pb (xpPair pc (xpPair pd (xpPair pe pf)))))
where
toSix ~(a, ~(b, ~(c, ~(d, ~(e, f))))) = (a, b, c, d, e, f )
fromSix ~(a, b, c, d, e, f) = (a, (b, (c, (d, (e, f)))))
-- ------------------------------------------------------------
-- | Like 'xpPair' and 'xpTriple' but for 7-tuples
--
-- Thanks to Tony Morris for doing xp7Tuple, ..., xp24Tuple.
xp7Tuple :: PU a -> PU b -> PU c -> PU d -> PU e ->
PU f -> PU g -> PU (a, b, c, d, e, f, g)
xp7Tuple a b c d e f g
= xpWrap ( \ (a, (b, c, d, e, f, g)) -> (a, b, c, d, e, f, g)
, \ (a, b, c, d, e, f, g) -> (a, (b, c, d, e, f, g))
)
(xpPair a (xp6Tuple b c d e f g))
xp8Tuple :: PU a -> PU b -> PU c -> PU d -> PU e ->
PU f -> PU g -> PU h -> PU (a, b, c, d, e, f, g, h)
xp8Tuple a b c d e f g h
= xpWrap ( \ ((a, b), (c, d, e, f, g, h)) -> (a, b, c, d, e, f, g, h)
, \ (a, b, c, d, e, f, g, h) -> ((a, b), (c, d, e, f, g, h))
)
(xpPair (xpPair a b) (xp6Tuple c d e f g h))
xp9Tuple :: PU a -> PU b -> PU c -> PU d -> PU e ->
PU f -> PU g -> PU h -> PU i -> PU (a, b, c, d, e, f, g, h, i)
xp9Tuple a b c d e f g h i
= xpWrap ( \ ((a, b, c), (d, e, f, g, h, i)) -> (a, b, c, d, e, f, g, h, i)
, \ (a, b, c, d, e, f, g, h, i) -> ((a, b, c), (d, e, f, g, h, i))
)
(xpPair (xpTriple a b c) (xp6Tuple d e f g h i))
xp10Tuple :: PU a -> PU b -> PU c -> PU d -> PU e ->
PU f -> PU g -> PU h -> PU i -> PU j ->
PU (a, b, c, d, e, f, g, h, i, j)
xp10Tuple a b c d e f g h i j
= xpWrap ( \ ((a, b, c, d), (e, f, g, h, i, j)) -> (a, b, c, d, e, f, g, h, i, j)
, \ (a, b, c, d, e, f, g, h, i, j) -> ((a, b, c, d), (e, f, g, h, i, j))
)
(xpPair (xp4Tuple a b c d) (xp6Tuple e f g h i j))
xp11Tuple :: PU a -> PU b -> PU c -> PU d -> PU e ->
PU f -> PU g -> PU h -> PU i -> PU j ->
PU k -> PU (a, b, c, d, e, f, g, h, i, j, k)
xp11Tuple a b c d e f g h i j k
= xpWrap ( \ ((a, b, c, d, e), (f, g, h, i, j, k)) -> (a, b, c, d, e, f, g, h, i, j, k)
, \ (a, b, c, d, e, f, g, h, i, j, k) -> ((a, b, c, d, e), (f, g, h, i, j, k))
)
(xpPair (xp5Tuple a b c d e) (xp6Tuple f g h i j k))
xp12Tuple :: PU a -> PU b -> PU c -> PU d -> PU e ->
PU f -> PU g -> PU h -> PU i -> PU j ->
PU k -> PU l -> PU (a, b, c, d, e, f, g, h, i, j, k, l)
xp12Tuple a b c d e f g h i j k l
= xpWrap ( \ ((a, b, c, d, e, f), (g, h, i, j, k, l)) -> (a, b, c, d, e, f, g, h, i, j, k, l)
, \ (a, b, c, d, e, f, g, h, i, j, k, l) -> ((a, b, c, d, e, f), (g, h, i, j, k, l))
)
(xpPair (xp6Tuple a b c d e f) (xp6Tuple g h i j k l))
xp13Tuple :: PU a -> PU b -> PU c -> PU d -> PU e ->
PU f -> PU g -> PU h -> PU i -> PU j ->
PU k -> PU l -> PU m -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m)
xp13Tuple a b c d e f g h i j k l m
= xpWrap ( \ (a, (b, c, d, e, f, g), (h, i, j, k, l, m)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m)
, \ (a, b, c, d, e, f, g, h, i, j, k, l, m) -> (a, (b, c, d, e, f, g), (h, i, j, k, l, m))
)
(xpTriple a (xp6Tuple b c d e f g) (xp6Tuple h i j k l m))
xp14Tuple :: PU a -> PU b -> PU c -> PU d -> PU e ->
PU f -> PU g -> PU h -> PU i -> PU j ->
PU k -> PU l -> PU m -> PU n -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n)
xp14Tuple a b c d e f g h i j k l m n
= xpWrap ( \ ((a, b), (c, d, e, f, g, h), (i, j, k, l, m, n)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n)
, \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n) -> ((a, b), (c, d, e, f, g, h), (i, j, k, l, m, n))
)
(xpTriple (xpPair a b) (xp6Tuple c d e f g h) (xp6Tuple i j k l m n))
xp15Tuple :: PU a -> PU b -> PU c -> PU d -> PU e ->
PU f -> PU g -> PU h -> PU i -> PU j ->
PU k -> PU l -> PU m -> PU n -> PU o ->
PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
xp15Tuple a b c d e f g h i j k l m n o
= xpWrap ( \ ((a, b, c), (d, e, f, g, h, i), (j, k, l, m, n, o)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
, \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) -> ((a, b, c), (d, e, f, g, h, i), (j, k, l, m, n, o))
)
(xpTriple (xpTriple a b c) (xp6Tuple d e f g h i) (xp6Tuple j k l m n o))
xp16Tuple :: PU a -> PU b -> PU c -> PU d -> PU e ->
PU f -> PU g -> PU h -> PU i -> PU j ->
PU k -> PU l -> PU m -> PU n -> PU o ->
PU p -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p)
xp16Tuple a b c d e f g h i j k l m n o p
= xpWrap ( \ ((a, b, c, d), (e, f, g, h, i, j), (k, l, m, n, o, p)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p)
, \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) -> ((a, b, c, d), (e, f, g, h, i, j), (k, l, m, n, o, p))
)
(xpTriple (xp4Tuple a b c d) (xp6Tuple e f g h i j) (xp6Tuple k l m n o p))
xp17Tuple :: PU a -> PU b -> PU c -> PU d -> PU e ->
PU f -> PU g -> PU h -> PU i -> PU j ->
PU k -> PU l -> PU m -> PU n -> PU o ->
PU p -> PU q -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q)
xp17Tuple a b c d e f g h i j k l m n o p q
= xpWrap ( \ ((a, b, c, d, e), (f, g, h, i, j, k), (l, m, n, o, p, q)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q)
, \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q) -> ((a, b, c, d, e), (f, g, h, i, j, k), (l, m, n, o, p, q))
)
(xpTriple (xp5Tuple a b c d e) (xp6Tuple f g h i j k) (xp6Tuple l m n o p q))
xp18Tuple :: PU a -> PU b -> PU c -> PU d -> PU e ->
PU f -> PU g -> PU h -> PU i -> PU j ->
PU k -> PU l -> PU m -> PU n -> PU o ->
PU p -> PU q -> PU r -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r)
xp18Tuple a b c d e f g h i j k l m n o p q r
= xpWrap ( \ ((a, b, c, d, e, f), (g, h, i, j, k, l), (m, n, o, p, q, r)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r)
, \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r) -> ((a, b, c, d, e, f), (g, h, i, j, k, l), (m, n, o, p, q, r))
)
(xpTriple (xp6Tuple a b c d e f) (xp6Tuple g h i j k l) (xp6Tuple m n o p q r))
xp19Tuple :: PU a -> PU b -> PU c -> PU d -> PU e ->
PU f -> PU g -> PU h -> PU i -> PU j ->
PU k -> PU l -> PU m -> PU n -> PU o ->
PU p -> PU q -> PU r -> PU s -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s)
xp19Tuple a b c d e f g h i j k l m n o p q r s
= xpWrap ( \ (a, (b, c, d, e, f, g), (h, i, j, k, l, m), (n, o, p, q, r, s)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s)
, \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s) -> (a, (b, c, d, e, f, g), (h, i, j, k, l, m), (n, o, p, q, r, s))
)
(xp4Tuple a (xp6Tuple b c d e f g) (xp6Tuple h i j k l m) (xp6Tuple n o p q r s))
xp20Tuple :: PU a -> PU b -> PU c -> PU d -> PU e ->
PU f -> PU g -> PU h -> PU i -> PU j ->
PU k -> PU l -> PU m -> PU n -> PU o ->
PU p -> PU q -> PU r -> PU s -> PU t ->
PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t)
xp20Tuple a b c d e f g h i j k l m n o p q r s t
= xpWrap ( \ ((a, b), (c, d, e, f, g, h), (i, j, k, l, m, n), (o, p, q, r, s, t)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t)
, \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t) -> ((a, b), (c, d, e, f, g, h), (i, j, k, l, m, n), (o, p, q, r, s, t))
)
(xp4Tuple (xpPair a b) (xp6Tuple c d e f g h) (xp6Tuple i j k l m n) (xp6Tuple o p q r s t))
xp21Tuple :: PU a -> PU b -> PU c -> PU d -> PU e ->
PU f -> PU g -> PU h -> PU i -> PU j ->
PU k -> PU l -> PU m -> PU n -> PU o ->
PU p -> PU q -> PU r -> PU s -> PU t ->
PU u -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u)
xp21Tuple a b c d e f g h i j k l m n o p q r s t u
= xpWrap ( \ ((a, b, c), (d, e, f, g, h, i), (j, k, l, m, n, o), (p, q, r, s, t, u)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u)
, \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u) -> ((a, b, c), (d, e, f, g, h, i), (j, k, l, m, n, o), (p, q, r, s, t, u))
)
(xp4Tuple (xpTriple a b c) (xp6Tuple d e f g h i) (xp6Tuple j k l m n o) (xp6Tuple p q r s t u))
xp22Tuple :: PU a -> PU b -> PU c -> PU d -> PU e ->
PU f -> PU g -> PU h -> PU i -> PU j ->
PU k -> PU l -> PU m -> PU n -> PU o ->
PU p -> PU q -> PU r -> PU s -> PU t ->
PU u -> PU v -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v)
xp22Tuple a b c d e f g h i j k l m n o p q r s t u v
= xpWrap ( \ ((a, b, c, d), (e, f, g, h, i, j), (k, l, m, n, o, p), (q, r, s, t, u, v)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v)
, \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v) -> ((a, b, c, d), (e, f, g, h, i, j), (k, l, m, n, o, p), (q, r, s, t, u, v))
)
(xp4Tuple (xp4Tuple a b c d) (xp6Tuple e f g h i j) (xp6Tuple k l m n o p) (xp6Tuple q r s t u v))
xp23Tuple :: PU a -> PU b -> PU c -> PU d -> PU e ->
PU f -> PU g -> PU h -> PU i -> PU j ->
PU k -> PU l -> PU m -> PU n -> PU o ->
PU p -> PU q -> PU r -> PU s -> PU t ->
PU u -> PU v -> PU w -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w)
xp23Tuple a b c d e f g h i j k l m n o p q r s t u v w
= xpWrap ( \ ((a, b, c, d, e), (f, g, h, i, j, k), (l, m, n, o, p, q), (r, s, t, u, v, w)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w)
, \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w) -> ((a, b, c, d, e), (f, g, h, i, j, k), (l, m, n, o, p, q), (r, s, t, u, v, w))
)
(xp4Tuple (xp5Tuple a b c d e) (xp6Tuple f g h i j k) (xp6Tuple l m n o p q) (xp6Tuple r s t u v w))
-- | Hopefully no one needs a xp25Tuple
xp24Tuple :: PU a -> PU b -> PU c -> PU d -> PU e ->
PU f -> PU g -> PU h -> PU i -> PU j ->
PU k -> PU l -> PU m -> PU n -> PU o ->
PU p -> PU q -> PU r -> PU s -> PU t ->
PU u -> PU v -> PU w -> PU x -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x)
xp24Tuple a b c d e f g h i j k l m n o p q r s t u v w x
= xpWrap ( \ ((a, b, c, d, e, f), (g, h, i, j, k, l), (m, n, o, p, q, r), (s, t, u, v, w, x)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x)
, \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x) -> ((a, b, c, d, e, f), (g, h, i, j, k, l), (m, n, o, p, q, r), (s, t, u, v, w, x))
)
(xp4Tuple (xp6Tuple a b c d e f) (xp6Tuple g h i j k l) (xp6Tuple m n o p q r) (xp6Tuple s t u v w x))
-- ------------------------------------------------------------
-- | Pickle a string into an XML text node
--
-- One of the most often used primitive picklers. Attention:
-- For pickling empty strings use 'xpText0'. If the text has a more
-- specific datatype than xsd:string, use 'xpTextDT'
xpText :: PU String
xpText = xpTextDT scString1
{-# INLINE xpText #-}
-- | Pickle a string into an XML text node
--
-- Text pickler with a description of the structure of the text
-- by a schema. A schema for a data type can be defined by 'Text.XML.HXT.Arrow.Pickle.Schema.scDT'.
-- In 'Text.XML.HXT.Arrow.Pickle.Schema' there are some more functions for creating
-- simple datatype descriptions.
xpTextDT :: Schema -> PU String
xpTextDT sc = PU { appPickle = putCont . XN.mkText
, appUnPickle = do t <- getCont
liftMaybe "xpText: XML text expected" $ XN.getText t
, theSchema = sc
}
-- | Pickle a possibly empty string into an XML node.
--
-- Must be used in all places, where empty strings are legal values.
-- If the content of an element can be an empty string, this string disapears
-- during storing the DOM into a document and reparse the document.
-- So the empty text node becomes nothing, and the pickler must deliver an empty string,
-- if there is no text node in the document.
xpText0 :: PU String
xpText0 = xpText0DT scString1
{-# INLINE xpText0 #-}
-- | Pickle a possibly empty string with a datatype description into an XML node.
--
-- Like 'xpText0' but with extra Parameter for datatype description as in 'xpTextDT'.
xpText0DT :: Schema -> PU String
xpText0DT sc = xpWrap (fromMaybe "", emptyToNothing) $
xpOption $
xpTextDT sc
where
emptyToNothing "" = Nothing
emptyToNothing x = Just x
-- | Pickle an arbitrary value by applyling show during pickling
-- and read during unpickling.
--
-- Real pickling is then done with 'xpText'.
-- One of the most often used pimitive picklers. Applicable for all
-- types which are instances of @Read@ and @Show@
xpPrim :: (Read a, Show a) => PU a
xpPrim = xpWrapEither (readMaybe, show) xpText
where
readMaybe :: Read a => String -> Either String a
readMaybe str = val (reads str)
where
val [(x,"")] = Right x
val _ = Left $ "xpPrim: reading string " ++ show str ++ " failed"
-- | Pickle an Int
xpInt :: PU Int
xpInt = xpWrapEither (readMaybe, show) xpText
where
readMaybe xs@(_:_)
| all isDigit xs = Right . foldl' (\ r c -> 10 * r + (fromEnum c - fromEnum '0')) 0 $ xs
readMaybe ('-' : xs) = fmap (0 -) . readMaybe $ xs
readMaybe ('+' : xs) = readMaybe $ xs
readMaybe xs = Left $ "xpInt: reading an Int from string " ++ show xs ++ " failed"
-- ------------------------------------------------------------
-- | Pickle an XmlTree by just adding it
--
-- Usefull for components of type XmlTree in other data structures
xpTree :: PU XmlTree
xpTree = PU { appPickle = putCont
, appUnPickle = getCont
, theSchema = Any
}
-- | Pickle a whole list of XmlTrees by just adding the list, unpickle is done by taking all element contents.
--
-- This pickler should always be combined with 'xpElem' for taking the whole contents of an element.
xpTrees :: PU [XmlTree]
xpTrees = (xpList xpTree) { theSchema = Any }
-- | Pickle a string representing XML contents by inserting the tree representation into the XML document.
--
-- Unpickling is done by converting the contents with
-- 'Text.XML.HXT.Arrow.Edit.xshowEscapeXml' into a string,
-- this function will escape all XML special chars, such that pickling the value back becomes save.
-- Pickling is done with 'Text.XML.HXT.Arrow.ReadDocument.xread'
xpXmlText :: PU String
xpXmlText = xpWrap ( showXML, readXML ) $ xpTrees
where
showXML = concat . runLA ( xshowEscapeXml unlistA )
readXML = runLA xread
-- ------------------------------------------------------------
-- | Encoding of optional data by ignoring the Nothing case during pickling
-- and relying on failure during unpickling to recompute the Nothing case
--
-- The default pickler for Maybe types
xpOption :: PU a -> PU (Maybe a)
xpOption pa = PU { appPickle = ( \ a ->
case a of
Nothing -> id
Just x -> appPickle pa x
)
, appUnPickle = xpChoice (xpLift Nothing) pa (xpLift . Just)
, theSchema = scOption (theSchema pa)
}
-- | Optional conversion with default value
--
-- The default value is not encoded in the XML document,
-- during unpickling the default value is inserted if the pickler fails
xpDefault :: (Eq a) => a -> PU a -> PU a
xpDefault df = xpWrap ( fromMaybe df
, \ x -> if x == df then Nothing else Just x
) .
xpOption
-- ------------------------------------------------------------
-- | Encoding of list values by pickling all list elements sequentially.
--
-- Unpickler relies on failure for detecting the end of the list.
-- The standard pickler for lists. Can also be used in combination with 'xpWrap'
-- for constructing set and map picklers
xpList :: PU a -> PU [a]
xpList pa = PU { appPickle = ( \ a ->
case a of
[] -> id
_:_ -> appPickle pc a
)
, appUnPickle = xpChoice
(xpLift [])
pa
(\ x -> xpSeq id (xpList pa) (\xs -> xpLift (x:xs)))
, theSchema = scList (theSchema pa)
}
where
pc = xpSeq head pa (\ x ->
xpSeq tail (xpList pa) (\ xs ->
xpLift (x:xs) ))
-- | Encoding of a none empty list of values
--
-- Attention: when calling this pickler with an empty list,
-- an internal error \"head of empty list is raised\".
xpList1 :: PU a -> PU [a]
xpList1 pa = ( xpWrap (\ (x, xs) -> x : xs
,\ x -> (head x, tail x)
) $
xpPair pa (xpList pa)
) { theSchema = scList1 (theSchema pa) }
-- ------------------------------------------------------------
-- | Standard pickler for maps
--
-- This pickler converts a map into a list of pairs.
-- All key value pairs are mapped to an element with name (1.arg),
-- the key is encoded as an attribute named by the 2. argument,
-- the 3. arg is the pickler for the keys, the last one for the values
xpMap :: Ord k => String -> String -> PU k -> PU v -> PU (Map k v)
xpMap en an xpk xpv
= xpWrap ( M.fromList
, M.toList
) $
xpList $
xpElem en $
xpPair ( xpAttr an $ xpk ) xpv
-- ------------------------------------------------------------
-- | Pickler for sum data types.
--
-- Every constructor is mapped to an index into the list of picklers.
-- The index is used only during pickling, not during unpickling, there the 1. match is taken
xpAlt :: (a -> Int) -> [PU a] -> PU a
xpAlt tag ps = PU { appPickle = \ a ->
appPickle (ps !! tag a) a
, appUnPickle = case ps of
[] -> throwMsg "xpAlt: no matching unpickler found for a sum datatype"
pa:ps1 -> xpChoice (xpAlt tag ps1) pa xpLift
, theSchema = scAlts (map theSchema ps)
}
-- ------------------------------------------------------------
-- | Pickler for wrapping\/unwrapping data into an XML element
--
-- Extra parameter is the element name given as a QName. THE pickler for constructing
-- nested structures
--
-- Example:
--
-- > xpElemQN (mkName "number") $ xpickle
--
-- will map an (42::Int) onto
--
-- > 42
xpElemQN :: QName -> PU a -> PU a
xpElemQN qn pa = PU { appPickle = ( \ a ->
let st' = appPickle pa a emptySt in
putCont (XN.mkElement qn (attributes st') (contents st'))
)
, appUnPickle = upElem
, theSchema = scElem (qualifiedName qn) (theSchema pa)
}
where
upElem = do t <- getCont
n <- liftMaybe "xpElem: XML element expected" $ XN.getElemName t
if n /= qn
then throwMsg ("xpElem: got element name " ++ show n ++ ", but expected " ++ show qn)
else do l <- gets nesting
liftUnpickleVal $ unpickleElem' (xpCheckEmpty pa) (l + 1) t
-- | convenient Pickler for xpElemQN
--
-- > xpElem n = xpElemQN (mkName n)
xpElem :: String -> PU a -> PU a
xpElem = xpElemQN . mkName
-- | convenient Pickler for xpElemQN
-- for pickling elements with respect to namespaces
--
-- > xpElemNS ns px lp = xpElemQN (mkQName px lp ns)
xpElemNS :: String -> String -> String -> PU a -> PU a
xpElemNS ns px lp
= xpElemQN $ mkQName px lp ns
-- ------------------------------------------------------------
-- | Pickler for wrapping\/unwrapping data into an XML element with an attribute with given value
--
-- To make XML structures flexible but limit the number of different elements, it's sometimes
-- useful to use a kind of generic element with a key value structure
--
-- Example:
--
-- > value1
-- > value2
-- > value3
--
-- the Haskell datatype may look like this
--
-- > type T = T { key1 :: Int ; key2 :: String ; key3 :: Double }
--
-- Then the picker for that type looks like this
--
-- > xpT :: PU T
-- > xpT = xpWrap ( uncurry3 T, \ t -> (key1 t, key2 t, key3 t) ) $
-- > xpTriple (xpElemWithAttrValue "attr" "name" "key1" $ xpickle)
-- > (xpElemWithAttrValue "attr" "name" "key2" $ xpText0)
-- > (xpElemWithAttrValue "attr" "name" "key3" $ xpickle)
xpElemWithAttrValue :: String -> String -> String -> PU a -> PU a
xpElemWithAttrValue name an av pa
= xpElem name $
xpAddFixedAttr an av $
pa
-- ------------------------------------------------------------
-- | Pickler for storing\/retreiving data into\/from an attribute value
--
-- The attribute is inserted in the surrounding element constructed by the 'xpElem' pickler
xpAttrQN :: QName -> PU a -> PU a
xpAttrQN qn pa = PU { appPickle = ( \ a ->
let st' = appPickle pa a emptySt in
putAtt qn (contents st')
)
, appUnPickle = upAttr
, theSchema = scAttr (qualifiedName qn) (theSchema pa)
}
where
upAttr = do a <- getAtt qn
l <- gets nesting
liftUnpickleVal $ unpickleElem' (xpCheckEmptyContents pa) l a
-- | convenient Pickler for xpAttrQN
--
-- > xpAttr n = xpAttrQN (mkName n)
xpAttr :: String -> PU a -> PU a
xpAttr = xpAttrQN . mkName
-- | convenient Pickler for xpAttrQN
--
-- > xpAttr ns px lp = xpAttrQN (mkQName px lp ns)
xpAttrNS :: String -> String -> String -> PU a -> PU a
xpAttrNS ns px lp
= xpAttrQN (mkQName px lp ns)
-- | A text attribute.
xpTextAttr :: String -> PU String
xpTextAttr = flip xpAttr xpText
-- | Add an optional attribute for an optional value (Maybe a).
xpAttrImplied :: String -> PU a -> PU (Maybe a)
xpAttrImplied name pa
= xpOption $ xpAttr name pa
xpAttrFixed :: String -> String -> PU ()
xpAttrFixed name val
= ( xpWrapEither ( \ v ->
if v == val
then Right ()
else Left ( "xpAttrFixed: value "
++ show val
++ " expected, but got "
++ show v
)
, const val
) $
xpAttr name xpText
) { theSchema = scAttr name (scFixed val) }
-- | Add/Check an attribute with a fixed value.
--
xpAddFixedAttr :: String -> String -> PU a -> PU a
xpAddFixedAttr name val
= xpSeq' $ xpAttrFixed name val
-- | Add a namespace declaration.
--
-- When generating XML the namespace decl is added,
-- when reading a document, the unpickler checks
-- whether there is a namespace declaration for the given
-- namespace URI (2. arg)
xpAddNSDecl :: String -> String -> PU a -> PU a
xpAddNSDecl name val
= xpSeq' $ xpAttrNSDecl name' val
where
name'
| null name = "xmlns"
| otherwise = "xmlns:" ++ name
xpAttrNSDecl :: String -> String -> PU ()
xpAttrNSDecl name ns
= PU { appPickle = const $ putAtt (mkName name) [XN.mkText ns]
, appUnPickle = getNSAtt ns
, theSchema = scAttr name (scFixed ns)
}
-- ------------------------------------------------------------
xpIgnoreCont :: LA XmlTree XmlTree -> PU ()
xpIgnoreCont = xpIgnoreInput $ \ mf s -> s {contents = mf $ contents s}
xpIgnoreAttr :: LA XmlTree XmlTree -> PU ()
xpIgnoreAttr = xpIgnoreInput $ \ mf s -> s {attributes = mf $ attributes s}
-- | When unpickling, filter the contents of the element currently processed,
-- before applying the pickler argument
--
-- Maybe useful to ignore some stuff in the input, or to do some cleanup before unpickling.
xpFilterCont :: LA XmlTree XmlTree -> PU a -> PU a
xpFilterCont f = xpSeq' $ xpIgnoreCont f
-- | Same as 'xpFilterCont' but for the attribute list of the element currently processed.
--
-- Maybe useful to ignore some stuff in the input, e.g. class attributes, or to do some cleanup before unpickling.
xpFilterAttr :: LA XmlTree XmlTree -> PU a -> PU a
xpFilterAttr f = xpSeq' $ xpIgnoreAttr f
xpIgnoreInput :: (([XmlTree] -> [XmlTree]) -> St -> St) -> LA XmlTree XmlTree -> PU ()
xpIgnoreInput m f
= PU { appPickle = const id
, appUnPickle = do modify (m filterCont)
return ()
, theSchema = scNull
}
where
filterCont = runLA (unlistA >>> f)
-- ------------------------------------------------------------
-- | The class for overloading 'xpickle', the default pickler
class XmlPickler a where
xpickle :: PU a
instance XmlPickler Int where
xpickle = xpPrim
instance XmlPickler Integer where
xpickle = xpPrim
{-
no instance of XmlPickler Char
because then every text would be encoded
char by char, because of the instance for lists
instance XmlPickler Char where
xpickle = xpPrim
-}
instance XmlPickler () where
xpickle = xpUnit
instance (XmlPickler a, XmlPickler b) => XmlPickler (a,b) where
xpickle = xpPair xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c) => XmlPickler (a,b,c) where
xpickle = xpTriple xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d) => XmlPickler (a,b,c,d) where
xpickle = xp4Tuple xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e) => XmlPickler (a,b,c,d,e) where
xpickle = xp5Tuple xpickle xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e, XmlPickler f) => XmlPickler (a, b, c, d, e, f) where
xpickle = xp6Tuple xpickle xpickle xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e, XmlPickler f, XmlPickler g) => XmlPickler (a, b, c, d, e, f, g) where
xpickle = xp7Tuple xpickle xpickle xpickle xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e, XmlPickler f, XmlPickler g, XmlPickler h) => XmlPickler (a, b, c, d, e, f, g, h) where
xpickle = xp8Tuple xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e, XmlPickler f, XmlPickler g, XmlPickler h, XmlPickler i) => XmlPickler (a, b, c, d, e, f, g, h, i) where
xpickle = xp9Tuple xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e, XmlPickler f, XmlPickler g, XmlPickler h, XmlPickler i, XmlPickler j) => XmlPickler (a, b, c, d, e, f, g, h, i, j) where
xpickle = xp10Tuple xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e, XmlPickler f, XmlPickler g, XmlPickler h, XmlPickler i, XmlPickler j, XmlPickler k) => XmlPickler (a, b, c, d, e, f, g, h, i, j, k) where
xpickle = xp11Tuple xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e, XmlPickler f, XmlPickler g, XmlPickler h, XmlPickler i, XmlPickler j, XmlPickler k, XmlPickler l) => XmlPickler (a, b, c, d, e, f, g, h, i, j, k, l) where
xpickle = xp12Tuple xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e, XmlPickler f, XmlPickler g, XmlPickler h, XmlPickler i, XmlPickler j, XmlPickler k, XmlPickler l, XmlPickler m) => XmlPickler (a, b, c, d, e, f, g, h, i, j, k, l, m) where
xpickle = xp13Tuple xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e, XmlPickler f, XmlPickler g, XmlPickler h, XmlPickler i, XmlPickler j, XmlPickler k, XmlPickler l, XmlPickler m, XmlPickler n) => XmlPickler (a, b, c, d, e, f, g, h, i, j, k, l, m, n) where
xpickle = xp14Tuple xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e, XmlPickler f, XmlPickler g, XmlPickler h, XmlPickler i, XmlPickler j, XmlPickler k, XmlPickler l, XmlPickler m, XmlPickler n, XmlPickler o) => XmlPickler (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) where
xpickle = xp15Tuple xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e, XmlPickler f, XmlPickler g, XmlPickler h, XmlPickler i, XmlPickler j, XmlPickler k, XmlPickler l, XmlPickler m, XmlPickler n, XmlPickler o, XmlPickler p) => XmlPickler (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) where
xpickle = xp16Tuple xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e, XmlPickler f, XmlPickler g, XmlPickler h, XmlPickler i, XmlPickler j, XmlPickler k, XmlPickler l, XmlPickler m, XmlPickler n, XmlPickler o, XmlPickler p, XmlPickler q) => XmlPickler (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q) where
xpickle = xp17Tuple xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e, XmlPickler f, XmlPickler g, XmlPickler h, XmlPickler i, XmlPickler j, XmlPickler k, XmlPickler l, XmlPickler m, XmlPickler n, XmlPickler o, XmlPickler p, XmlPickler q, XmlPickler r) => XmlPickler (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r) where
xpickle = xp18Tuple xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e, XmlPickler f, XmlPickler g, XmlPickler h, XmlPickler i, XmlPickler j, XmlPickler k, XmlPickler l, XmlPickler m, XmlPickler n, XmlPickler o, XmlPickler p, XmlPickler q, XmlPickler r, XmlPickler s) => XmlPickler (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s) where
xpickle = xp19Tuple xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e, XmlPickler f, XmlPickler g, XmlPickler h, XmlPickler i, XmlPickler j, XmlPickler k, XmlPickler l, XmlPickler m, XmlPickler n, XmlPickler o, XmlPickler p, XmlPickler q, XmlPickler r, XmlPickler s, XmlPickler t) => XmlPickler (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t) where
xpickle = xp20Tuple xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e, XmlPickler f, XmlPickler g, XmlPickler h, XmlPickler i, XmlPickler j, XmlPickler k, XmlPickler l, XmlPickler m, XmlPickler n, XmlPickler o, XmlPickler p, XmlPickler q, XmlPickler r, XmlPickler s, XmlPickler t, XmlPickler u) => XmlPickler (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u) where
xpickle = xp21Tuple xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e, XmlPickler f, XmlPickler g, XmlPickler h, XmlPickler i, XmlPickler j, XmlPickler k, XmlPickler l, XmlPickler m, XmlPickler n, XmlPickler o, XmlPickler p, XmlPickler q, XmlPickler r, XmlPickler s, XmlPickler t, XmlPickler u, XmlPickler v) => XmlPickler (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v) where
xpickle = xp22Tuple xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e, XmlPickler f, XmlPickler g, XmlPickler h, XmlPickler i, XmlPickler j, XmlPickler k, XmlPickler l, XmlPickler m, XmlPickler n, XmlPickler o, XmlPickler p, XmlPickler q, XmlPickler r, XmlPickler s, XmlPickler t, XmlPickler u, XmlPickler v, XmlPickler w) => XmlPickler (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w) where
xpickle = xp23Tuple xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle
instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e, XmlPickler f, XmlPickler g, XmlPickler h, XmlPickler i, XmlPickler j, XmlPickler k, XmlPickler l, XmlPickler m, XmlPickler n, XmlPickler o, XmlPickler p, XmlPickler q, XmlPickler r, XmlPickler s, XmlPickler t, XmlPickler u, XmlPickler v, XmlPickler w, XmlPickler x) => XmlPickler (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x) where
xpickle = xp24Tuple xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle xpickle
instance XmlPickler a => XmlPickler [a] where
xpickle = xpList xpickle
instance XmlPickler a => XmlPickler (Maybe a) where
xpickle = xpOption xpickle
-- | Pickler for an arbitrary datum of type 'Either'.
instance (XmlPickler l, XmlPickler r) => XmlPickler (Either l r) where
xpickle = pick xpickle xpickle
where
pick :: PU l -> PU r -> PU (Either l r)
pick lPickler rPickler =
xpAlt (const 0 `either` const 1)
[ xpWrap ( Left -- Construct.
, \ (Left l ) -> l -- Deconstruct.
) lPickler
, xpWrap ( Right -- Construct.
, \ (Right r) -> r -- Deconstruct.
) rPickler
]
-- ------------------------------------------------------------
{-
-- Thanks to treeowl:
-- This script was used to generate the tuple instances:
import Data.List (intercalate)
-- | Generates XmlPickler instances for tuples of size 4 <= n <= 24
mkInstance :: Int -> String
mkInstance n =
"instance (" ++ constrainsts ++ ") => XmlPickler (" ++ tuple ++ ") where\n" ++
" xpickle = xp" ++ show n ++ "Tuple " ++ xpickleStrings
where
xpickleStrings = intercalate " " (replicate n "xpickle")
tuple = intercalate ", " letters
letters = map (:[]) $ take n ['a'..'z']
constrainsts = intercalate ", " $ map oneConstr letters
oneConstr a = "XmlPickler " ++ a
mkInstances :: String
mkInstances = intercalate "\n\n" $ mkInstance <$> [6..24]
-}
-- ------------------------------------------------------------
{- begin embeded test cases
-- ------------------------------------------------------------
--
-- a somewhat complex data structure
-- for representing programs of a simple
-- imperative language
type Program = Stmt
type StmtList = [Stmt]
data Stmt
= Assign Ident Expr
| Stmts StmtList
| If Expr Stmt (Maybe Stmt)
| While Expr Stmt
deriving (Eq, Show)
type Ident = String
data Expr
= IntConst Int
| BoolConst Bool
| Var Ident
| UnExpr UnOp Expr
| BinExpr Op Expr Expr
deriving (Eq, Show)
data Op
= Add | Sub | Mul | Div | Mod | Eq | Neq
deriving (Eq, Ord, Enum, Show)
data UnOp
= UPlus | UMinus | Neg
deriving (Eq, Ord, Read, Show)
-- ------------------------------------------------------------
--
-- the pickler definition for the data types
-- the main pickler
xpProgram :: PU Program
xpProgram = xpElem "program" $
xpAddNSDecl "" "program42" $
xpickle
xpMissingRootElement :: PU Program
xpMissingRootElement = xpickle
instance XmlPickler UnOp where
xpickle = xpPrim
instance XmlPickler Op where
xpickle = xpWrap (toEnum, fromEnum) xpPrim
instance XmlPickler Expr where
xpickle = xpAlt tag ps
where
tag (IntConst _ ) = 0
tag (BoolConst _ ) = 1
tag (Var _ ) = 2
tag (UnExpr _ _ ) = 3
tag (BinExpr _ _ _ ) = 4
ps = [ xpWrap ( IntConst
, \ (IntConst i ) -> i
) $
( xpElem "int" $
xpAttr "value" $
xpickle
)
, xpWrap ( BoolConst
, \ (BoolConst b) -> b
) $
( xpElem "bool" $
xpAttr "value" $
xpWrap (toEnum, fromEnum) xpickle
)
, xpWrap ( Var
, \ (Var n) -> n
) $
( xpElem "var" $
xpAttr "name" $
xpText
)
, xpWrap ( uncurry UnExpr
, \ (UnExpr op e) -> (op, e)
) $
( xpElem "unex" $
xpPair (xpAttr "op" xpickle)
xpickle
)
, xpWrap ( uncurry3 $ BinExpr
, \ (BinExpr op e1 e2) -> (op, e1, e2)
) $
( xpElem "binex" $
xpTriple (xpAttr "op" xpickle)
xpickle
xpickle
)
]
instance XmlPickler Stmt where
xpickle = xpAlt tag ps
where
tag ( Assign _ _ ) = 0
tag ( Stmts _ ) = 1
tag ( If _ _ _ ) = 2
tag ( While _ _ ) = 3
ps = [ xpWrap ( uncurry Assign
, \ (Assign n v) -> (n, v)
) $
( xpElem "assign" $
xpFilterCont (neg $ hasName "comment" <+> isText) $ -- test case test7: remove uninteresting stuff
xpPair (xpAttr "name" xpText)
xpickle
)
, xpWrap ( Stmts
, \ (Stmts sl) -> sl
) $
( xpElem "block" $
xpList xpickle
)
, xpWrap ( uncurry3 If
, \ (If c t e) -> (c, t, e)
) $
( xpElem "if" $
xpTriple xpickle
xpickle
xpickle
)
, xpWrap ( uncurry While
, \ (While c b) -> (c, b)
) $
( xpElem "while" $
xpPair xpickle
xpickle
)
]
-- ------------------------------------------------------------
--
-- example programs
progs :: [Program]
progs = [p0, p1, p2]
p0, p1, p2 :: Program
p0 = Stmts [] -- the empty program
p1 = Stmts
[ Assign i ( UnExpr UMinus ( IntConst (-22) ) )
, Assign j ( IntConst 20 )
, While
( BinExpr Neq ( Var i ) ( IntConst 0 ) )
( Stmts
[ Assign i ( BinExpr Sub ( Var i ) ( IntConst 1 ) )
, Assign j ( BinExpr Add ( Var j ) ( IntConst 1 ) )
, If ( IntConst 0 ) (Stmts []) Nothing
]
)
]
where
i = "i"
j = "j"
p2 = Stmts
[ Assign x (IntConst 6)
, Assign y (IntConst 7)
, Assign p (IntConst 0)
, While
( BinExpr Neq (Var x) (IntConst 0) )
( If ( BinExpr Neq ( BinExpr Mod (Var x) (IntConst 2) ) (IntConst 0) )
( Stmts
[ Assign x ( BinExpr Sub (Var x) (IntConst 1) )
, Assign p ( BinExpr Add (Var p) (Var y) )
]
)
( Just ( Stmts
[ Assign x ( BinExpr Div (Var x) (IntConst 2) )
, Assign y ( BinExpr Mul (Var y) (IntConst 2) )
]
)
)
)
]
where
x = "x"
y = "y"
p = "p"
-- ------------------------------------------------------------
test0 = putStrLn . head . runLA
( xshow (arr (pickleDoc xpProgram)
>>> getChildren
)
)
test0' f = runLA
( xshow (arr (pickleDoc xpProgram)
>>> getChildren
)
>>>
root [] [xread]
>>>
f
)
test1' f = runLA
( xshow (arr (pickleDoc xpProgram)
>>> getChildren
)
>>>
root [] [xread]
>>>
f
>>>
arr (unpickleDoc' xpProgram)
)
test1 = test0' (processTopDown (setQName (mkName "real") `X.when` hasName "int"))
test2 = test1' this
test3 = test1' (processTopDown (setQName (mkName "real") `X.when` hasName "int"))
test4 = test1' (processTopDown (setQName (mkName "xxx") `X.when` hasName "program"))
test5 = test1' (processTopDown (setQName (mkName "xxx") `X.when` hasName "assign"))
test6 = test1' (processTopDownWithAttrl (txt "xxx" `X.when` hasText (== "UMinus")))
test7 = test1' (processTopDown (insertComment `X.when` hasName "assign"))
where insertComment = replaceChildren (getChildren <+> eelem "comment" <+> txt "zzz")
-- ------------------------------------------------------------
-- end embeded test cases -}
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/ProcessDocument.hs 0000644 0000000 0000000 00000027410 12752557014 020334 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.ProcessDocument
Copyright : Copyright (C) 2011 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
Compound arrows for reading, parsing, validating and writing XML documents
All arrows use IO and a global state for options, errorhandling, ...
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.ProcessDocument
( parseXmlDocument
, parseXmlDocumentWithExpat
, parseHtmlDocument
, validateDocument
, propagateAndValidateNamespaces
, andValidateNamespaces
, getDocumentContents
)
where
import Control.Arrow
import Control.Arrow.ArrowIf
import Control.Arrow.ArrowList
import Control.Arrow.ArrowTree
import Control.Arrow.ListArrow (fromLA)
import Control.Arrow.NTreeEdit
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlState
import Text.XML.HXT.Arrow.XmlState.TypeDefs
import Text.XML.HXT.Arrow.ParserInterface (parseHtmlDoc,
parseXmlDoc)
import Text.XML.HXT.Arrow.Edit (substAllXHTMLEntityRefs,
transfAllCharRef)
import Text.XML.HXT.Arrow.GeneralEntitySubstitution (processGeneralEntities)
import Text.XML.HXT.Arrow.DTDProcessing (processDTD)
import Text.XML.HXT.Arrow.DocumentInput (getXmlContents)
import Text.XML.HXT.Arrow.Namespace (propagateNamespaces, validateNamespaces)
import Text.XML.HXT.DTDValidation.Validation (generalEntitiesDefined,
getDTDSubset,
transform,
validate)
-- ------------------------------------------------------------
{- |
XML parser
Input tree must be a root tree with a text tree as child containing the document to be parsed.
The parser generates from the input string a tree of a wellformed XML document,
processes the DTD (parameter substitution, conditional DTD parts, ...) and
substitutes all general entity references. Next step is character reference substitution.
Last step is the document validation.
Validation can be controlled by an extra parameter.
Example:
> parseXmlDocument True -- parse and validate document
>
> parseXmlDocument False -- only parse document, don't validate
This parser is useful for applications processing correct XML documents.
-}
parseXmlDocument :: Bool -> Bool -> Bool -> Bool -> IOStateArrow s XmlTree XmlTree
parseXmlDocument validateD substDTD substHTML validateRX
= ( replaceChildren ( ( getAttrValue a_source
&&&
xshow getChildren
)
>>>
parseXmlDoc
>>>
filterErrorMsg
)
>>>
setDocumentStatusFromSystemState "parse XML document"
>>>
( ifA (fromLA getDTDSubset)
( processDTDandEntities
>>>
( if validate' -- validation only possible if there is a DTD
then validateDocument
else this
)
)
( if validate' -- validation only consists of checking
-- for undefined entity refs
-- predefined XML entity refs are substituted
-- in the XML parser into char refs
-- so there is no need for an entity substitution
then traceMsg 2 "checkUndefinedEntityRefs: looking for undefined entity refs"
>>>
perform checkUndefinedEntityRefs
>>>
traceMsg 2 "checkUndefinedEntityRefs: looking for undefined entity refs done"
>>>
setDocumentStatusFromSystemState "decoding document"
else this
)
)
)
`when` documentStatusOk
where
validate'
= validateD && not validateRX
processDTDandEntities
= ( if validateD || substDTD
then processDTD
else this
)
>>>
( if substDTD
then ( processGeneralEntities -- DTD contains general entity definitions
`when`
fromLA generalEntitiesDefined
)
else if substHTML
then substAllXHTMLEntityRefs
else this
)
>>>
transfAllCharRef
checkUndefinedEntityRefs :: IOStateArrow s XmlTree XmlTree
checkUndefinedEntityRefs
= deep isEntityRef
>>>
getEntityRef
>>>
arr (\ en -> "general entity reference \"&" ++ en ++ ";\" is undefined")
>>>
mkError c_err
>>>
filterErrorMsg
-- ------------------------------------------------------------
parseXmlDocumentWithExpat :: IOStateArrow s XmlTree XmlTree
parseXmlDocumentWithExpat
= ( withoutUserState $< getSysVar theExpatParser
)
`when` documentStatusOk
-- ------------------------------------------------------------
{- |
HTML parser
Input tree must be a root tree with a text tree as child containing the document to be parsed.
The parser tries to parse everything as HTML, if the HTML document is not wellformed XML or if
errors occur, warnings are generated. The warnings can be issued, or suppressed.
Example: @ parseHtmlDocument True @ : parse document and issue warnings
This parser is useful for applications like web crawlers, where the pages may contain
arbitray errors, but the application is only interested in parts of the document, e.g. the plain text.
-}
parseHtmlDocument :: IOStateArrow s XmlTree XmlTree
parseHtmlDocument
= ( perform ( getAttrValue a_source
>>>
traceValue 1 (("parseHtmlDoc: parse HTML document " ++) . show)
)
>>>
( parseHtml $< getSysVar (theTagSoup .&&&. theExpat) )
>>>
( removeWarnings $< getSysVar (theWarnings .&&&. theTagSoup) )
>>>
setDocumentStatusFromSystemState "parse HTML document"
>>>
traceTree
>>>
traceSource
>>>
perform ( getAttrValue a_source
>>>
traceValue 1 (\ src -> "parse HTML document " ++ show src ++ " finished")
)
)
`when` documentStatusOk
where
parseHtml (withTagSoup', withExpat')
| withExpat' = withoutUserState $< getSysVar theExpatParser
| withTagSoup' = withoutUserState $< getSysVar theTagSoupParser
| otherwise = traceMsg 1 ("parse document with parsec HTML parser")
>>>
replaceChildren
( ( getAttrValue a_source -- get source name
&&&
xshow getChildren
) -- get string to be parsed
>>>
parseHtmlDoc -- run parser, entity substituion is done in parser
)
removeWarnings (warnings, withTagSoup')
| warnings = processTopDownWithAttrl -- remove warnings inserted by parser and entity subst
filterErrorMsg
| withTagSoup' = this -- warnings are not generated in tagsoup
| otherwise = fromLA $
editNTreeA [isError :-> none] -- remove all warnings from document
-- ------------------------------------------------------------
{- | Document validation
Input must be a complete document tree. The document
is validated with respect to the DTD spec.
Only useful for XML documents containing a DTD.
If the document is valid, it is transformed with respect to the DTD,
normalization of attribute values, adding default values, sorting attributes by name,...
If no error was found, result is the normalized tree,
else the error status is set in the list of attributes
of the root node \"\/\" and the document content is removed from the tree.
-}
validateDocument :: IOStateArrow s XmlTree XmlTree
validateDocument
= ( traceMsg 1 "validating document"
>>>
perform ( validateDoc
>>>
filterErrorMsg
)
>>>
setDocumentStatusFromSystemState "document validation"
>>>
traceMsg 1 "document validated, transforming doc with respect to DTD"
>>>
transformDoc
>>>
traceMsg 1 "document transformed"
>>>
traceSource
>>>
traceTree
)
`when`
documentStatusOk
-- ------------------------------------------------------------
{- | Namespace propagation
Input must be a complete document tree. The namespace declarations
are evaluated and all element and attribute names are processed by
splitting the name into prefix, local part and namespace URI.
Naames are checked with respect to the XML namespace definition
If no error was found, result is the unchanged input tree,
else the error status is set in the list of attributes
of the root node \"\/\" and the document content is removed from the tree.
-}
propagateAndValidateNamespaces :: IOStateArrow s XmlTree XmlTree
propagateAndValidateNamespaces
= ( traceMsg 1 "propagating namespaces"
>>>
propagateNamespaces
>>>
traceDoc "propagating namespaces done"
>>>
andValidateNamespaces
)
`when`
documentStatusOk
andValidateNamespaces :: IOStateArrow s XmlTree XmlTree
andValidateNamespaces
= ( traceMsg 1 "validating namespaces"
>>>
( setDocumentStatusFromSystemState "namespace propagation"
`when`
( validateNamespaces >>> perform filterErrorMsg )
)
>>>
traceMsg 1 "namespace validation finished"
)
`when`
documentStatusOk
-- ------------------------------------------------------------
{- |
creates a new document root, adds all options
as attributes to the document root and calls 'getXmlContents'.
If the document name is the empty string, the document will be read
from standard input.
For supported protocols see 'Text.XML.HXT.Arrow.DocumentInput.getXmlContents'
-}
getDocumentContents :: String -> IOStateArrow s b XmlTree
getDocumentContents src
= root [] []
>>>
addAttr a_source src
>>>
traceMsg 1 ("readDocument: start processing document " ++ show src)
>>>
getXmlContents
-- ------------------------------------------------------------
validateDoc :: ArrowList a => a XmlTree XmlTree
validateDoc = fromLA ( validate
`when`
getDTDSubset -- validate only when DTD decl is present
)
transformDoc :: ArrowList a => a XmlTree XmlTree
transformDoc = fromLA transform
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/ReadDocument.hs 0000644 0000000 0000000 00000051106 12752557014 017570 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.ReadDocument
Copyright : Copyright (C) 2005-2013 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
Compound arrows for reading an XML\/HTML document or an XML\/HTML string
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.ReadDocument
( readDocument
, readFromDocument
, readString
, readFromString
, hread
, hreadDoc
, xread
, xreadDoc
)
where
import Control.Arrow.ListArrows
import Data.Maybe ( fromMaybe )
import qualified Data.Map as M
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.Edit ( canonicalizeAllNodes
, canonicalizeForXPath
, canonicalizeContents
, rememberDTDAttrl
, removeDocWhiteSpace
)
import qualified Text.XML.HXT.Arrow.ParserInterface as PI
import Text.XML.HXT.Arrow.ProcessDocument ( getDocumentContents
, parseXmlDocument
, parseXmlDocumentWithExpat
, parseHtmlDocument
, propagateAndValidateNamespaces
, andValidateNamespaces
)
import Text.XML.HXT.Arrow.XmlState
import Text.XML.HXT.Arrow.XmlState.TypeDefs
-- ------------------------------------------------------------
--
{- |
the main document input filter
this filter can be configured by a list of configuration options,
a value of type 'Text.XML.HXT.XmlState.TypeDefs.SysConfig'
for all available options see module 'Text.XML.HXT.Arrow.XmlState.SystemConfig'
- @withValidate yes\/no@ :
switch on\/off DTD validation. Only for XML parsed documents, not for HTML parsing.
- @withSubstDTDEntities yes\/no@ :
switch on\/off entity substitution for general entities defined in DTD validation.
Default is @yes@.
Switching this option and the validation off can lead to faster parsing, in that case
reading the DTD documents is not longer necessary.
Only used with XML parsed documents, not with HTML parsing.
- @withSubstHTMLEntities yes\/no@ :
switch on\/off entity substitution for general entities defined in HTML validation.
Default is @no@.
Switching this option on and the validation and substDTDEntities off can lead to faster parsing,
in that case
reading the DTD documents is not longer necessary, HTML general entities are still substituted.
Only used with XML parsed documents, not with HTML parsing.
- @withParseHTML yes\/no@ :
switch on HTML parsing.
- @withParseByMimeType yes\/no@ :
select XML\/HTML parser by document mime type.
text\/xml and text\/xhtml are parsed as XML, text\/html as HTML.
- @withCheckNamespaces yes\/no@ :
Switch on\/off namespace propagation and checking
- @withInputEncoding \@ :
Set default encoding.
- @withTagSoup@ :
use light weight and lazy parser based on tagsoup lib.
This is only available when package hxt-tagsoup is installed and
the source contains an @import Text.XML.HXT.TagSoup@.
- @withRelaxNG \@ :
validate document with Relax NG, the parameter is for the schema URI.
This implies using XML parser, no validation against DTD, and canonicalisation.
- @withCurl [\...]@ :
Use the libCurl binding for HTTP access.
This is only available when package hxt-curl is installed and
the source contains an @import Text.XML.HXT.Curl@.
- @withHTTP [\...]@ :
Use the Haskell HTTP package for HTTP access.
This is only available when package hxt-http is installed and
the source contains an @import Text.XML.HXT.HTTP@.
examples:
> readDocument [] "test.xml"
reads and validates a document \"test.xml\", no namespace propagation, only canonicalization is performed
> ...
> import Text.XML.HXT.Curl
> ...
>
> readDocument [ withValidate no
> , withInputEncoding isoLatin1
> , withParseByMimeType yes
> , withCurl []
> ] "http://localhost/test.php"
reads document \"test.php\", parses it as HTML or XML depending on the mimetype given from the server, but without validation, default encoding 'isoLatin1'.
HTTP access is done via libCurl.
> readDocument [ withParseHTML yes
> , withInputEncoding isoLatin1
> ] ""
reads a HTML document from standard input, no validation is done when parsing HTML, default encoding is 'isoLatin1',
> readDocument [ withInputEncoding isoLatin1
> , withValidate no
> , withMimeTypeFile "/etc/mime.types"
> , withStrictInput yes
> ] "test.svg"
reads an SVG document from \"test.svg\", sets the mime type by looking in the system mimetype config file,
default encoding is 'isoLatin1',
> ...
> import Text.XML.HXT.Curl
> import Text.XML.HXT.TagSoup
> ...
>
> readDocument [ withParseHTML yes
> , withTagSoup
> , withProxy "www-cache:3128"
> , withCurl []
> , withWarnings no
> ] "http://www.haskell.org/"
reads Haskell homepage with HTML parser, ignoring any warnings
(at the time of writing, there were some HTML errors),
with http access via libCurl interface
and proxy \"www-cache\" at port 3128,
parsing is done with tagsoup HTML parser.
This requires packages \"hxt-curl\" and \"hxt-tagsoup\" to be installed
> readDocument [ withValidate yes
> , withCheckNamespaces yes
> , withRemoveWS yes
> , withTrace 2
> , withHTTP []
> ] "http://www.w3c.org/"
read w3c home page (xhtml), validate and check namespaces, remove whitespace between tags,
trace activities with level 2.
HTTP access is done with Haskell HTTP package
> readDocument [ withValidate no
> , withSubstDTDEntities no
> ...
> ] "http://www.w3c.org/"
read w3c home page (xhtml), but without accessing the DTD given in that document.
Only the predefined XML general entity refs are substituted.
> readDocument [ withValidate no
> , withSubstDTDEntities no
> , withSubstHTMLEntities yes
> ...
> ] "http://www.w3c.org/"
same as above, but with substituion of all general entity refs defined in XHTML.
for minimal complete examples see 'Text.XML.HXT.Arrow.WriteDocument.writeDocument'
and 'runX', the main starting point for running an XML arrow.
-}
readDocument :: SysConfigList -> String -> IOStateArrow s b XmlTree
readDocument config src
= localSysEnv
$
readDocument' config src
readDocument' :: SysConfigList -> String -> IOStateArrow s b XmlTree
readDocument' config src
= configSysVars config
>>>
readD $< getSysVar theWithCache
where
readD True
= constA undefined -- just for generalizing the signature to: IOStateArrow s b XmlTree
>>> -- instead of IOStateArrow s XmlTree XmlTree
(withoutUserState $< (getSysVar theCacheRead >>^ ($ src)))
readD False
= readDocument'' src
readDocument'' :: String -> IOStateArrow s b XmlTree
readDocument'' src
= getDocumentContents src
>>>
( processDoc
$<<
( getMimeType
&&&
getSysVar (theParseByMimeType .&&&.
theParseHTML .&&&.
theAcceptedMimeTypes .&&&.
theRelaxValidate .&&&.
theXmlSchemaValidate
)
)
)
>>>
traceMsg 1 ("readDocument: " ++ show src ++ " processed")
>>>
traceSource
>>>
traceTree
where
processNoneEmptyDoc p
= ifA (fromLA hasEmptyBody)
(replaceChildren none)
p
where
hasEmptyBody
= hasAttrValue transferStatus (/= "200") -- test on empty response body for not o.k. responses
`guards` -- e.g. 3xx status values
( neg getChildren
<+>
( getChildren >>> isWhiteSpace )
)
getMimeType
= getAttrValue transferMimeType >>^ stringToLower
applyMimeTypeHandler mt
= withoutUserState (applyMTH $< getSysVar theMimeTypeHandlers)
where
applyMTH mtTable
= fromMaybe none $
fmap (\ f -> processNoneEmptyDoc
(traceMimeStart >>> f >>> traceMimeEnd)
) $
M.lookup mt mtTable
traceMimeStart
= traceMsg 2 $
"readDocument: calling user defined document parser"
traceMimeEnd
= traceMsg 2 $
"readDocument: user defined document parser finished"
processDoc mimeType options
= traceMsg 1 (unwords [ "readDocument:", show src
, "(mime type:", show mimeType, ") will be processed"
]
)
>>>
( applyMimeTypeHandler mimeType -- try user defined document handlers
`orElse`
processDoc' mimeType options
)
processDoc' mimeType ( parseByMimeType
, ( parseHtml
, ( acceptedMimeTypes
, ( validateWithRelax
, validateWithXmlSchema
))))
= ( if isAcceptedMimeType acceptedMimeTypes mimeType
then ( processNoneEmptyDoc
( ( parse $< getSysVar (theValidate .&&&.
theSubstDTDEntities .&&&.
theSubstHTMLEntities .&&&.
theIgnoreNoneXmlContents .&&&.
theTagSoup .&&&.
theExpat
)
)
>>>
( if isXmlOrHtml
then ( ( checknamespaces $< getSysVar (theCheckNamespaces .&&&.
theTagSoup
)
)
>>>
rememberDTDAttrl
>>>
( canonicalize $< getSysVar (thePreserveComment .&&&.
theCanonicalize .&&&.
theTagSoup
)
)
>>>
( whitespace $< getSysVar (theRemoveWS .&&&.
theTagSoup
)
)
>>>
relaxOrXmlSchema
)
else this
)
)
)
else ( traceMsg 1 (unwords [ "readDocument:", show src
, "mime type:", show mimeType, "not accepted"])
>>>
replaceChildren none -- remove contents of not accepted mimetype
)
)
where
isAcceptedMimeType :: [String] -> String -> Bool
isAcceptedMimeType mts mt
| null mts
||
null mt = True
| otherwise = foldr (matchMt mt') False $ mts'
where
mt' = parseMt mt
mts' = map parseMt
$
mts
parseMt = break (== '/')
>>>
second (drop 1)
matchMt (ma,mi) (mas,mis) r = ( (ma == mas || mas == "*")
&&
(mi == mis || mis == "*")
)
|| r
parse ( validate
, ( substDTD
, ( substHTML
, ( removeNoneXml
, ( withTagSoup'
, withExpat'
)))))
| not isXmlOrHtml = if removeNoneXml
then replaceChildren none -- don't parse, if mime type is not XML nor HTML
else this -- but remove contents when option is set
| isHtml
||
withTagSoup' = configSysVar (setS theLowerCaseNames isHtml)
>>>
parseHtmlDocument -- parse as HTML or with tagsoup XML
| isXml = if withExpat'
then parseXmlDocumentWithExpat
else parseXmlDocument
validate
substDTD
substHTML
validateWithRelax
-- parse as XML
| otherwise = this -- suppress warning
checknamespaces (withNamespaces, withTagSoup')
| withNamespaces
&&
withTagSoup' = andValidateNamespaces -- propagation is done in tagsoup
| withNamespaces
||
validateWithRelax
||
validateWithXmlSchema
= propagateAndValidateNamespaces -- RelaxNG and XML Schema require correct namespaces
| otherwise = this
canonicalize (preserveCmt, (canonicalize', withTagSoup'))
| withTagSoup' = this -- tagsoup already removes redundant stuff
| validateWithRelax
||
validateWithXmlSchema = canonicalizeAllNodes -- no comments in schema validation
| canonicalize'
&&
preserveCmt = canonicalizeForXPath
| canonicalize' = canonicalizeAllNodes
| otherwise = this
relaxOrXmlSchema
| validateWithXmlSchema = withoutUserState $< getSysVar theXmlSchemaValidator
| validateWithRelax = withoutUserState $< getSysVar theRelaxValidator
| otherwise = this
whitespace (removeWS, withTagSoup')
| ( removeWS
||
validateWithXmlSchema -- XML Schema does not like WS
)
&&
not withTagSoup' = removeDocWhiteSpace -- tagsoup already removes whitespace
| otherwise = this
isHtml = ( not parseByMimeType && parseHtml ) -- force HTML
||
( parseByMimeType && isHtmlMimeType mimeType )
isXml = ( not parseByMimeType && not parseHtml )
||
( parseByMimeType
&&
( isXmlMimeType mimeType
||
null mimeType
) -- mime type is XML or not known
)
isXmlOrHtml = isHtml || isXml
-- ------------------------------------------------------------
-- |
-- the arrow version of 'readDocument', the arrow input is the source URI
readFromDocument :: SysConfigList -> IOStateArrow s String XmlTree
readFromDocument config
= applyA ( arr $ readDocument config )
-- ------------------------------------------------------------
-- |
-- read a document that is stored in a normal Haskell String
--
-- the same function as readDocument, but the parameter forms the input.
-- All options available for 'readDocument' are applicable for readString,
-- except input encoding options.
--
-- Encoding: No decoding is done, the String argument is taken as Unicode string
-- All decoding must be done before calling readString, even if the
-- XML document contains an encoding spec.
readString :: SysConfigList -> String -> IOStateArrow s b XmlTree
readString config content
= readDocument config (stringProtocol ++ content)
-- ------------------------------------------------------------
-- |
-- the arrow version of 'readString', the arrow input is the source URI
readFromString :: SysConfigList -> IOStateArrow s String XmlTree
readFromString config
= applyA ( arr $ readString config )
-- ------------------------------------------------------------
-- |
-- parse a string as HTML content, substitute all HTML entity refs and canonicalize tree.
-- (substitute char refs, ...). Errors are ignored.
--
-- This arrow delegates all work to the parseHtmlContent parser in module HtmlParser.
--
-- This is a simpler version of 'readFromString' without any options,
-- but it does not run in the IO monad.
hread :: ArrowXml a => a String XmlTree
hread
= fromLA $
PI.hread -- substHtmlEntityRefs is done in parser
>>> -- as well as subst HTML char refs
editNTreeA [isError :-> none] -- ignores all errors
>>>
canonicalizeContents -- combine text nodes, substitute char refs
-- comments are not removed
-- | like hread, but accepts a whole document, not a HTML content
hreadDoc :: ArrowXml a => a String XmlTree
hreadDoc
= fromLA $
root [] [PI.hreadDoc] -- substHtmlEntityRefs is done in parser
>>> -- as well as subst HTML char refs
editNTreeA [isError :-> none] -- ignores all errors
>>>
canonicalizeForXPath -- remove DTD spec and text in content of root node
-- and do a canonicalizeContents
>>>
getChildren
-- ------------------------------------------------------------
-- |
-- parse a string as XML CONTENT, (no xml decl or doctype decls are allowed),
-- substitute all predefined XML entity refs and canonicalize tree
-- This xread arrow delegates all work to the xread parser function in module XmlParsec
xread :: ArrowXml a => a String XmlTree
xread = PI.xreadCont
-- |
-- a more general version of xread which
-- parses a whole document including a prolog
-- (xml decl, doctype decl) and processing
-- instructions. Doctype decls remain uninterpreted,
-- but are in the list of results trees.
xreadDoc :: ArrowXml a => a String XmlTree
xreadDoc = PI.xreadDoc
{- -- the old version, where the parser does not subst char refs and cdata
xread = root [] [parseXmlContent] -- substXmlEntityRefs is done in parser
>>>
canonicalizeContents
>>>
getChildren
-- -}
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/WriteDocument.hs 0000644 0000000 0000000 00000022732 12752557014 020012 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.WriteDocument
Copyright : Copyright (C) 2005-9 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
Compound arrow for writing XML documents
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.WriteDocument
( writeDocument
, writeDocument'
, writeDocumentToString
, prepareContents
)
where
import Control.Arrow -- arrow classes
import Control.Arrow.ArrowList
import Control.Arrow.ArrowIf
import Control.Arrow.ArrowTree
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlState
import Text.XML.HXT.Arrow.XmlState.TypeDefs
import Text.XML.HXT.Arrow.XmlState.RunIOStateArrow
( initialSysState
)
import Text.XML.HXT.Arrow.Edit ( haskellRepOfXmlDoc
, indentDoc
, addDefaultDTDecl
, preventEmptyElements
, removeDocWhiteSpace
, treeRepOfXmlDoc
)
import Text.XML.HXT.Arrow.DocumentOutput ( putXmlDocument
, encodeDocument
, encodeDocument'
)
-- ------------------------------------------------------------
--
{- |
the main filter for writing documents
this filter can be configured by an option list like 'Text.XML.HXT.Arrow.ReadDocument.readDocument'
usage: @ writeDocument optionList destination @
if @ destination @ is the empty string or \"-\", stdout is used as output device
for available options see 'Text.XML.HXT.Arrow.XmlState.SystemConfig'
- @withOutputXML@ :
(default) issue XML: quote special XML chars \>,\<,\",\',& where neccessary
add XML processing instruction
and encode document with respect to output encoding,
- @withOutputHTML@ :
issue HTML: translate all special XML chars and all HTML chars with a corresponding entity reference
into entity references. Do not generate empty elements, e.g. @@ for HTML elements, that are allowed
to contain a none empty body. Result is for the example is @@.
The short form introduces trouble in various browsers.
- @withOutputXHTML@ :
same as @withOutputHTML@, but all none ASCII chars are substituted by char references.
- @withOutputPLAIN@ :
Do not substitute any chars. This is useful when generating something else than XML/HTML, e.g. Haskell source code.
- @withXmlPi yes/no@ :
Add a @@ processing instruction to the beginning of the document.
Default is yes.
- @withAddDefaultDTD@ :
if the document to be written was build by reading another document containing a Document Type Declaration,
this DTD is inserted into the output document (default: no insert)
- @withShowTree yes/no@ :
show DOM tree representation of document (for debugging)
- @withShowHaskell yes/no@ :
show Haskell representaion of document (for debugging)
a minimal main program for copying a document
has the following structure:
> module Main
> where
>
> import Text.XML.HXT.Core
>
> main :: IO ()
> main
> = do
> runX ( readDocument [] "hello.xml"
> >>>
> writeDocument [] "bye.xml"
> )
> return ()
an example for copying a document from the web to standard output with global trace level 1, input trace level 2,
output encoding isoLatin1,
and evaluation of
error code is:
> module Main
> where
>
> import Text.XML.HXT.Core
> import Text.XML.HXT.Curl
> -- or
> -- import Text.XML.HXT.HTTP
> import System.Exit
>
> main :: IO ()
> main
> = do
> [rc] <- runX
> ( configSysVars [ withTrace 1 -- set the defaults for all read-,
> , withCurl [] -- write- and other operations
> -- or withHTTP []
> ]
> >>>
> readDocument [ withTrace 2 -- use these additional
> , withParseHTML yes -- options only for this read
> ]
> "http://www.haskell.org/"
> >>>
> writeDocument [ withOutputEncoding isoLatin1
> ]
> "" -- output to stdout
> >>>
> getErrStatus
> )
> exitWith ( if rc >= c_err
> then ExitFailure 1
> else ExitSuccess
> )
-}
writeDocument :: SysConfigList -> String -> IOStateArrow s XmlTree XmlTree
writeDocument config dst
= localSysEnv
$
configSysVars config
>>>
perform ( (flip writeDocument') dst $< getSysVar theTextMode )
writeDocument' :: Bool -> String -> IOStateArrow s XmlTree XmlTree
writeDocument' textMode dst
= ( traceMsg 1 ("writeDocument: destination is " ++ show dst)
>>>
( (flip prepareContents) encodeDocument $< getSysVar idS )
>>>
traceDoc "document after encoding"
>>>
putXmlDocument textMode dst
>>>
traceMsg 1 "writeDocument: finished"
)
`when`
documentStatusOk
-- ------------------------------------------------------------
-- |
-- Convert a document into a string. Formating is done the same way
-- and with the same options as in 'writeDocument'. Default output encoding is
-- no encoding, that means the result is a normal unicode encode haskell string.
-- The default may be overwritten with the 'Text.XML.HXT.Arrow.XmlState.SystemConfig.withOutputEncoding' option.
-- The XML PI can be suppressed by the 'Text.XML.HXT.XmlKeywords.a_no_xml_pi' option.
--
-- This arrow fails, when the encoding scheme is not supported.
-- The arrow is pure, it does not run in the IO monad.
-- The XML PI is suppressed, if not explicitly turned on with an
-- option @ (a_no_xml_pi, v_0) @
writeDocumentToString :: ArrowXml a => SysConfigList -> a XmlTree String
writeDocumentToString config
= prepareContents ( foldr (>>>) id (withOutputEncoding unicodeString :
withXmlPi no :
config
)
$ initialSysState
) encodeDocument'
>>>
xshow getChildren
-- ------------------------------------------------------------
-- |
-- indent and format output
prepareContents :: ArrowXml a => XIOSysState -> (Bool -> Bool -> String -> a XmlTree XmlTree) -> a XmlTree XmlTree
prepareContents config encodeDoc
= indent
>>>
addDtd
>>>
format
where
indent' = getS theIndent config
removeWS' = getS theRemoveWS config
showTree' = getS theShowTree config
showHaskell' = getS theShowHaskell config
outHtml' = getS theOutputFmt config == HTMLoutput
outXhtml' = getS theOutputFmt config == XHTMLoutput
outXml' = getS theOutputFmt config == XMLoutput
noPi' = not $ getS theXmlPi config
noEEsFor' = getS theNoEmptyElemFor config
addDDTD' = getS theAddDefaultDTD config
outEnc' = getS theOutputEncoding config
addDtd
| addDDTD' = addDefaultDTDecl
| otherwise = this
indent
| indent' = indentDoc -- document indentation
| removeWS' = removeDocWhiteSpace -- remove all whitespace between tags
| otherwise = this
format
| showTree' = treeRepOfXmlDoc
| showHaskell' = haskellRepOfXmlDoc
| outHtml' = preventEmptyElements noEEsFor' True
>>>
encodeDoc -- convert doc into text with respect to output encoding with ASCII as default
False noPi' ( if null outEnc' then usAscii else outEnc' )
| outXhtml' = preventEmptyElements noEEsFor' True
>>>
encodeDoc -- convert doc into text with respect to output encoding
True noPi' outEnc'
| outXml' = ( if null noEEsFor'
then this
else preventEmptyElements noEEsFor' False
)
>>>
encodeDoc -- convert doc into text with respect to output encoding
True noPi' outEnc'
| otherwise = this
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/XmlArrow.hs 0000644 0000000 0000000 00000067453 13625174751 017010 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.XmlArrow
Copyright : Copyright (C) 2011 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
Basic arrows for processing XML documents
All arrows use IO and a global state for options, errorhandling, ...
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.XmlArrow
( module Text.XML.HXT.Arrow.XmlArrow )
where
import Control.Arrow -- classes
import Control.Arrow.ArrowList
import Control.Arrow.ArrowIf
import Control.Arrow.ArrowTree
import Control.Arrow.ListArrow -- arrow types
import Control.Arrow.StateListArrow
import Control.Arrow.IOListArrow
import Control.Arrow.IOStateListArrow
import Data.Char.Properties.XMLCharProps ( isXmlSpaceChar )
import Data.Maybe
import Text.XML.HXT.DOM.Interface
import qualified Text.XML.HXT.DOM.XmlNode as XN
import qualified Text.XML.HXT.DOM.ShowXml as XS
-- ------------------------------------------------------------
{- | Arrows for processing 'Text.XML.HXT.DOM.TypeDefs.XmlTree's
These arrows can be grouped into predicates, selectors, constructors, and transformers.
All predicates (tests) act like 'Control.Arrow.ArrowIf.none' for failure and 'Control.Arrow.ArrowIf.this' for success.
A logical and can be formed by @ a1 >>> a2 @, a locical or by @ a1 \<+\> a2 @.
Selector arrows will fail, when applied to wrong input, e.g. selecting the text of a node with 'getText'
will fail when applied to a none text node.
Edit arrows will remain the input unchanged, when applied to wrong argument, e.g. editing the content of a text node
with 'changeText' applied to an element node will return the unchanged element node.
-}
infixl 7 +=
class (Arrow a, ArrowList a, ArrowTree a) => ArrowXml a where
-- discriminating predicates
-- | test for text nodes
isText :: a XmlTree XmlTree
isText = isA XN.isText
{-# INLINE isText #-}
isBlob :: a XmlTree XmlTree
isBlob = isA XN.isBlob
{-# INLINE isBlob #-}
-- | test for char reference, used during parsing
isCharRef :: a XmlTree XmlTree
isCharRef = isA XN.isCharRef
{-# INLINE isCharRef #-}
-- | test for entity reference, used during parsing
isEntityRef :: a XmlTree XmlTree
isEntityRef = isA XN.isEntityRef
{-# INLINE isEntityRef #-}
-- | test for comment
isCmt :: a XmlTree XmlTree
isCmt = isA XN.isCmt
{-# INLINE isCmt #-}
-- | test for CDATA section, used during parsing
isCdata :: a XmlTree XmlTree
isCdata = isA XN.isCdata
{-# INLINE isCdata #-}
-- | test for processing instruction
isPi :: a XmlTree XmlTree
isPi = isA XN.isPi
{-# INLINE isPi #-}
-- | test for processing instruction \
isXmlPi :: a XmlTree XmlTree
isXmlPi = isPi >>> hasName "xml"
-- | test for element
isElem :: a XmlTree XmlTree
isElem = isA XN.isElem
{-# INLINE isElem #-}
-- | test for DTD part, used during parsing
isDTD :: a XmlTree XmlTree
isDTD = isA XN.isDTD
{-# INLINE isDTD #-}
-- | test for attribute tree
isAttr :: a XmlTree XmlTree
isAttr = isA XN.isAttr
{-# INLINE isAttr #-}
-- | test for error message
isError :: a XmlTree XmlTree
isError = isA XN.isError
{-# INLINE isError #-}
-- | test for root node (element with name \"\/\")
isRoot :: a XmlTree XmlTree
isRoot = isA XN.isRoot
{-# INLINE isRoot #-}
-- | test for text nodes with text, for which a predicate holds
--
-- example: @hasText (all (\`elem\` \" \\t\\n\"))@ check for text nodes with only whitespace content
hasText :: (String -> Bool) -> a XmlTree XmlTree
hasText p = (isText >>> getText >>> isA p) `guards` this
-- | test for text nodes with only white space
--
-- implemented with 'hasTest'
isWhiteSpace :: a XmlTree XmlTree
isWhiteSpace = hasText (all isXmlSpaceChar)
{-# INLINE isWhiteSpace #-}
-- |
-- test whether a node (element, attribute, pi) has a name with a special property
hasNameWith :: (QName -> Bool) -> a XmlTree XmlTree
hasNameWith p = (getQName >>> isA p) `guards` this
{-# INLINE hasNameWith #-}
-- |
-- test whether a node (element, attribute, pi) has a specific qualified name
-- useful only after namespace propagation
hasQName :: QName -> a XmlTree XmlTree
hasQName n = (getQName >>> isA (== n)) `guards` this
{-# INLINE hasQName #-}
-- |
-- test whether a node has a specific name (prefix:localPart or localPart),
-- generally useful, even without namespace handling
hasName :: String -> a XmlTree XmlTree
hasName n = (getName >>> isA (== n)) `guards` this
{-# INLINE hasName #-}
-- |
-- test whether a node has a specific name as local part,
-- useful only after namespace propagation
hasLocalPart :: String -> a XmlTree XmlTree
hasLocalPart n = (getLocalPart >>> isA (== n)) `guards` this
{-# INLINE hasLocalPart #-}
-- |
-- test whether a node has a specific name prefix,
-- useful only after namespace propagation
hasNamePrefix :: String -> a XmlTree XmlTree
hasNamePrefix n = (getNamePrefix >>> isA (== n)) `guards` this
{-# INLINE hasNamePrefix #-}
-- |
-- test whether a node has a specific namespace URI
-- useful only after namespace propagation
hasNamespaceUri :: String -> a XmlTree XmlTree
hasNamespaceUri n = (getNamespaceUri >>> isA (== n)) `guards` this
{-# INLINE hasNamespaceUri #-}
-- |
-- test whether an element node has an attribute node with a specific name
hasAttr :: String -> a XmlTree XmlTree
hasAttr n = (getAttrl >>> hasName n) `guards` this
{-# INLINE hasAttr #-}
-- |
-- test whether an element node has an attribute node with a specific qualified name
hasQAttr :: QName -> a XmlTree XmlTree
hasQAttr n = (getAttrl >>> hasQName n) `guards` this
{-# INLINE hasQAttr #-}
-- |
-- test whether an element node has an attribute with a specific value
hasAttrValue :: String -> (String -> Bool) -> a XmlTree XmlTree
hasAttrValue n p = (getAttrl >>> hasName n >>> xshow getChildren >>> isA p) `guards` this
-- |
-- test whether an element node has an attribute with a qualified name and a specific value
hasQAttrValue :: QName -> (String -> Bool) -> a XmlTree XmlTree
hasQAttrValue n p = (getAttrl >>> hasQName n >>> xshow getChildren >>> isA p) `guards` this
-- constructor arrows ------------------------------------------------------------
-- | text node construction arrow
mkText :: a String XmlTree
mkText = arr XN.mkText
{-# INLINE mkText #-}
-- | blob node construction arrow
mkBlob :: a Blob XmlTree
mkBlob = arr XN.mkBlob
{-# INLINE mkBlob #-}
-- | char reference construction arrow, useful for document output
mkCharRef :: a Int XmlTree
mkCharRef = arr XN.mkCharRef
{-# INLINE mkCharRef #-}
-- | entity reference construction arrow, useful for document output
mkEntityRef :: a String XmlTree
mkEntityRef = arr XN.mkEntityRef
{-# INLINE mkEntityRef #-}
-- | comment node construction, useful for document output
mkCmt :: a String XmlTree
mkCmt = arr XN.mkCmt
{-# INLINE mkCmt #-}
-- | CDATA construction, useful for document output
mkCdata :: a String XmlTree
mkCdata = arr XN.mkCdata
{-# INLINE mkCdata #-}
-- | error node construction, useful only internally
mkError :: Int -> a String XmlTree
mkError level = arr (XN.mkError level)
-- | element construction:
-- | the attributes and the content of the element are computed by applying arrows
-- to the input
mkElement :: QName -> a n XmlTree -> a n XmlTree -> a n XmlTree
mkElement n af cf = (listA af &&& listA cf)
>>>
arr2 (\ al cl -> XN.mkElement n al cl)
-- | attribute node construction:
-- | the attribute value is computed by applying an arrow to the input
mkAttr :: QName -> a n XmlTree -> a n XmlTree
mkAttr qn f = listA f >>> arr (XN.mkAttr qn)
-- | processing instruction construction:
-- | the content of the processing instruction is computed by applying an arrow to the input
mkPi :: QName -> a n XmlTree -> a n XmlTree
mkPi qn f = listA f >>> arr (XN.mkPi qn)
-- convenient arrows for constructors --------------------------------------------------
-- | convenient arrow for element construction, more comfortable variant of 'mkElement'
--
-- example for simplifying 'mkElement' :
--
-- > mkElement qn (a1 <+> ... <+> ai) (c1 <+> ... <+> cj)
--
-- equals
--
-- > mkqelem qn [a1,...,ai] [c1,...,cj]
mkqelem :: QName -> [a n XmlTree] -> [a n XmlTree] -> a n XmlTree
mkqelem n afs cfs = mkElement n (catA afs) (catA cfs)
{-# INLINE mkqelem #-}
-- | convenient arrow for element construction with strings instead of qualified names as element names, see also 'mkElement' and 'mkelem'
mkelem :: String -> [a n XmlTree] -> [a n XmlTree] -> a n XmlTree
mkelem n afs cfs = mkElement (mkName n) (catA afs) (catA cfs)
{-# INLINE mkelem #-}
-- | convenient arrow for element construction with attributes but without content, simple variant of 'mkelem' and 'mkElement'
aelem :: String -> [a n XmlTree] -> a n XmlTree
aelem n afs = catA afs >. \ al -> XN.mkElement (mkName n) al []
{-# INLINE aelem #-}
-- | convenient arrow for simple element construction without attributes, simple variant of 'mkelem' and 'mkElement'
selem :: String -> [a n XmlTree] -> a n XmlTree
selem n cfs = catA cfs >. XN.mkElement (mkName n) []
{-# INLINE selem #-}
-- | convenient arrow for construction of empty elements without attributes, simple variant of 'mkelem' and 'mkElement'
eelem :: String -> a n XmlTree
eelem n = constA (XN.mkElement (mkName n) [] [])
{-# INLINE eelem #-}
-- | construction of an element node with name \"\/\" for document roots
root :: [a n XmlTree] -> [a n XmlTree] -> a n XmlTree
root = mkelem t_root
{-# INLINE root #-}
-- | alias for 'mkAttr'
qattr :: QName -> a n XmlTree -> a n XmlTree
qattr = mkAttr
{-# INLINE qattr #-}
-- | convenient arrow for attribute construction, simple variant of 'mkAttr'
attr :: String -> a n XmlTree -> a n XmlTree
attr = mkAttr . mkName
{-# INLINE attr #-}
-- constant arrows (ignoring the input) for tree construction ------------------------------
-- | constant arrow for text nodes
txt :: String -> a n XmlTree
txt = constA . XN.mkText
{-# INLINE txt #-}
-- | constant arrow for blob nodes
blb :: Blob -> a n XmlTree
blb = constA . XN.mkBlob
{-# INLINE blb #-}
-- | constant arrow for char reference nodes
charRef :: Int -> a n XmlTree
charRef = constA . XN.mkCharRef
{-# INLINE charRef #-}
-- | constant arrow for entity reference nodes
entityRef :: String -> a n XmlTree
entityRef = constA . XN.mkEntityRef
{-# INLINE entityRef #-}
-- | constant arrow for comment
cmt :: String -> a n XmlTree
cmt = constA . XN.mkCmt
{-# INLINE cmt #-}
-- | constant arrow for warning
warn :: String -> a n XmlTree
warn = constA . (XN.mkError c_warn)
{-# INLINE warn #-}
-- | constant arrow for errors
err :: String -> a n XmlTree
err = constA . (XN.mkError c_err)
{-# INLINE err #-}
-- | constant arrow for fatal errors
fatal :: String -> a n XmlTree
fatal = constA . (XN.mkError c_fatal)
{-# INLINE fatal #-}
-- | constant arrow for simple processing instructions, see 'mkPi'
spi :: String -> String -> a n XmlTree
spi piName piCont = constA (XN.mkPi (mkName piName) [XN.mkAttr (mkName a_value) [XN.mkText piCont]])
{-# INLINE spi #-}
-- | constant arrow for attribute nodes, attribute name is a qualified name and value is a text,
-- | see also 'mkAttr', 'qattr', 'attr'
sqattr :: QName -> String -> a n XmlTree
sqattr an av = constA (XN.mkAttr an [XN.mkText av])
{-# INLINE sqattr #-}
-- | constant arrow for attribute nodes, attribute name and value are
-- | given by parameters, see 'mkAttr'
sattr :: String -> String -> a n XmlTree
sattr an av = constA (XN.mkAttr (mkName an) [XN.mkText av])
{-# INLINE sattr #-}
-- selector arrows --------------------------------------------------
-- | select the text of a text node
getText :: a XmlTree String
getText = arrL (maybeToList . XN.getText)
{-# INLINE getText #-}
-- | select the value of a char reference
getCharRef :: a XmlTree Int
getCharRef = arrL (maybeToList . XN.getCharRef)
{-# INLINE getCharRef #-}
-- | select the name of a entity reference node
getEntityRef :: a XmlTree String
getEntityRef = arrL (maybeToList . XN.getEntityRef)
{-# INLINE getEntityRef #-}
-- | select the comment of a comment node
getCmt :: a XmlTree String
getCmt = arrL (maybeToList . XN.getCmt)
{-# INLINE getCmt #-}
-- | select the content of a CDATA node
getCdata :: a XmlTree String
getCdata = arrL (maybeToList . XN.getCdata)
{-# INLINE getCdata #-}
-- | select the name of a processing instruction
getPiName :: a XmlTree QName
getPiName = arrL (maybeToList . XN.getPiName)
{-# INLINE getPiName #-}
-- | select the content of a processing instruction
getPiContent :: a XmlTree XmlTree
getPiContent = arrL (fromMaybe [] . XN.getPiContent)
{-# INLINE getPiContent #-}
-- | select the name of an element node
getElemName :: a XmlTree QName
getElemName = arrL (maybeToList . XN.getElemName)
{-# INLINE getElemName #-}
-- | select the attribute list of an element node
getAttrl :: a XmlTree XmlTree
getAttrl = arrL (fromMaybe [] . XN.getAttrl)
{-# INLINE getAttrl #-}
-- | select the DTD type of a DTD node
getDTDPart :: a XmlTree DTDElem
getDTDPart = arrL (maybeToList . XN.getDTDPart)
{-# INLINE getDTDPart #-}
-- | select the DTD attributes of a DTD node
getDTDAttrl :: a XmlTree Attributes
getDTDAttrl = arrL (maybeToList . XN.getDTDAttrl)
{-# INLINE getDTDAttrl #-}
-- | select the name of an attribute
getAttrName :: a XmlTree QName
getAttrName = arrL (maybeToList . XN.getAttrName)
{-# INLINE getAttrName #-}
-- | select the error level (c_warn, c_err, c_fatal) from an error node
getErrorLevel :: a XmlTree Int
getErrorLevel = arrL (maybeToList . XN.getErrorLevel)
{-# INLINE getErrorLevel #-}
-- | select the error message from an error node
getErrorMsg :: a XmlTree String
getErrorMsg = arrL (maybeToList . XN.getErrorMsg)
{-# INLINE getErrorMsg #-}
-- | select the qualified name from an element, attribute or pi
getQName :: a XmlTree QName
getQName = arrL (maybeToList . XN.getName)
{-# INLINE getQName #-}
-- | select the prefix:localPart or localPart from an element, attribute or pi
getName :: a XmlTree String
getName = arrL (maybeToList . XN.getQualifiedName)
{-# INLINE getName #-}
-- | select the univeral name ({namespace URI} ++ localPart)
getUniversalName :: a XmlTree String
getUniversalName = arrL (maybeToList . XN.getUniversalName)
{-# INLINE getUniversalName #-}
-- | select the univeral name (namespace URI ++ localPart)
getUniversalUri :: a XmlTree String
getUniversalUri = arrL (maybeToList . XN.getUniversalUri)
{-# INLINE getUniversalUri #-}
-- | select the local part
getLocalPart :: a XmlTree String
getLocalPart = arrL (maybeToList . XN.getLocalPart)
{-# INLINE getLocalPart #-}
-- | select the name prefix
getNamePrefix :: a XmlTree String
getNamePrefix = arrL (maybeToList . XN.getNamePrefix)
{-# INLINE getNamePrefix #-}
-- | select the namespace URI
getNamespaceUri :: a XmlTree String
getNamespaceUri = arrL (maybeToList . XN.getNamespaceUri)
{-# INLINE getNamespaceUri #-}
-- | select the value of an attribute of an element node,
-- always succeeds with empty string as default value \"\"
getAttrValue :: String -> a XmlTree String
getAttrValue n = xshow (getAttrl >>> hasName n >>> getChildren)
-- | like 'getAttrValue', but fails if the attribute does not exist
getAttrValue0 :: String -> a XmlTree String
getAttrValue0 n = getAttrl >>> hasName n >>> xshow getChildren
-- | like 'getAttrValue', but select the value of an attribute given by a qualified name,
-- always succeeds with empty string as default value \"\"
getQAttrValue :: QName -> a XmlTree String
getQAttrValue n = xshow (getAttrl >>> hasQName n >>> getChildren)
-- | like 'getQAttrValue', but fails if attribute does not exist
getQAttrValue0 :: QName -> a XmlTree String
getQAttrValue0 n = getAttrl >>> hasQName n >>> xshow getChildren
-- edit arrows --------------------------------------------------
-- | edit the string of a text node
changeText :: (String -> String) -> a XmlTree XmlTree
changeText cf = arr (XN.changeText cf) `when` isText
-- | edit the blob of a blob node
changeBlob :: (Blob -> Blob) -> a XmlTree XmlTree
changeBlob cf = arr (XN.changeBlob cf) `when` isBlob
-- | edit the comment string of a comment node
changeCmt :: (String -> String) -> a XmlTree XmlTree
changeCmt cf = arr (XN.changeCmt cf) `when` isCmt
-- | edit an element-, attribute- or pi- name
changeQName :: (QName -> QName) -> a XmlTree XmlTree
changeQName cf = arr (XN.changeName cf) `when` getQName
-- | edit an element name
changeElemName :: (QName -> QName) -> a XmlTree XmlTree
changeElemName cf = arr (XN.changeElemName cf) `when` isElem
-- | edit an attribute name
changeAttrName :: (QName -> QName) -> a XmlTree XmlTree
changeAttrName cf = arr (XN.changeAttrName cf) `when` isAttr
-- | edit a pi name
changePiName :: (QName -> QName) -> a XmlTree XmlTree
changePiName cf = arr (XN.changePiName cf) `when` isPi
-- | edit an attribute value
changeAttrValue :: (String -> String) -> a XmlTree XmlTree
changeAttrValue cf = replaceChildren ( xshow getChildren
>>> arr cf
>>> mkText
)
`when` isAttr
-- | edit an attribute list of an element node
changeAttrl :: (XmlTrees -> XmlTrees -> XmlTrees) -> a XmlTree XmlTree -> a XmlTree XmlTree
changeAttrl cf f = ( ( listA f &&& this )
>>>
arr2 changeAL
)
`when`
( isElem <+> isPi )
where
changeAL as x = XN.changeAttrl (\ xs -> cf xs as) x
-- | replace an element, attribute or pi name
setQName :: QName -> a XmlTree XmlTree
setQName n = changeQName (const n)
{-# INLINE setQName #-}
-- | replace an element name
setElemName :: QName -> a XmlTree XmlTree
setElemName n = changeElemName (const n)
{-# INLINE setElemName #-}
-- | replace an attribute name
setAttrName :: QName -> a XmlTree XmlTree
setAttrName n = changeAttrName (const n)
{-# INLINE setAttrName #-}
-- | replace an element name
setPiName :: QName -> a XmlTree XmlTree
setPiName n = changePiName (const n)
{-# INLINE setPiName #-}
-- | replace an atribute list of an element node
setAttrl :: a XmlTree XmlTree -> a XmlTree XmlTree
setAttrl = changeAttrl (const id) -- (\ x y -> y)
{-# INLINE setAttrl #-}
-- | add a list of attributes to an element
addAttrl :: a XmlTree XmlTree -> a XmlTree XmlTree
addAttrl = changeAttrl (XN.mergeAttrl)
{-# INLINE addAttrl #-}
-- | add (or replace) an attribute
addAttr :: String -> String -> a XmlTree XmlTree
addAttr an av = addAttrl (sattr an av)
{-# INLINE addAttr #-}
-- | remove an attribute
removeAttr :: String -> a XmlTree XmlTree
removeAttr an = processAttrl (none `when` hasName an)
-- | remove an attribute with a qualified name
removeQAttr :: QName -> a XmlTree XmlTree
removeQAttr an = processAttrl (none `when` hasQName an)
-- | process the attributes of an element node with an arrow
processAttrl :: a XmlTree XmlTree -> a XmlTree XmlTree
processAttrl f = setAttrl (getAttrl >>> f)
-- | process a whole tree inclusive attribute list of element nodes
-- see also: 'Control.Arrow.ArrowTree.processTopDown'
processTopDownWithAttrl :: a XmlTree XmlTree -> a XmlTree XmlTree
processTopDownWithAttrl f = processTopDown ( f >>> ( processAttrl (processTopDown f) `when` isElem))
-- | convenient op for adding attributes or children to a node
--
-- usage: @ tf += cf @
--
-- the @tf@ arrow computes an element node, and all trees computed by @cf@ are
-- added to this node, if a tree is an attribute, it is inserted in the attribute list
-- else it is appended to the content list.
--
-- attention: do not build long content list this way because '+=' is implemented by ++
--
-- examples:
--
-- > eelem "a"
-- > += sattr "href" "page.html"
-- > += sattr "name" "here"
-- > += txt "look here"
--
-- is the same as
--
-- > mkelem [ sattr "href" "page.html"
-- > , sattr "name" "here"
-- > ]
-- > [ txt "look here" ]
--
-- and results in the XML fragment: \look here\<\/a\>
--
-- advantage of the '+=' operator is, that attributes and content can be added
-- any time step by step.
-- if @tf@ computes a whole list of trees, e.g. a list of \"td\" or \"tr\" elements,
-- the attributes or content is added to all trees. useful for adding \"class\" or \"style\" attributes
-- to table elements.
(+=) :: a b XmlTree -> a b XmlTree -> a b XmlTree
tf += cf = (tf &&& listA cf) >>> arr2 addChildren
where
addChildren :: XmlTree -> XmlTrees -> XmlTree
addChildren t cs
= foldl addChild t cs
addChild :: XmlTree -> XmlTree -> XmlTree
addChild t c
| not (XN.isElem t)
= t
| XN.isAttr c
= XN.changeAttrl (XN.addAttr c) t
| otherwise
= XN.changeChildren (++ [c]) t
-- | apply an arrow to the input and convert the resulting XML trees into a string representation
xshow :: a n XmlTree -> a n String
xshow f = f >. XS.xshow
{-# INLINE xshow #-}
-- | apply an arrow to the input and convert the resulting XML trees into a string representation
xshowBlob :: a n XmlTree -> a n Blob
xshowBlob f = f >. XS.xshowBlob
{-# INLINE xshowBlob #-}
{- | Document Type Definition arrows
These are separated, because they are not needed for document processing,
only when processing the DTD, e.g. for generating access funtions for the toolbox
from a DTD (se example DTDtoHaskell in the examples directory)
-}
class (ArrowXml a) => ArrowDTD a where
isDTDDoctype :: a XmlTree XmlTree
isDTDDoctype = isA (maybe False (== DOCTYPE ) . XN.getDTDPart)
isDTDElement :: a XmlTree XmlTree
isDTDElement = isA (maybe False (== ELEMENT ) . XN.getDTDPart)
isDTDContent :: a XmlTree XmlTree
isDTDContent = isA (maybe False (== CONTENT ) . XN.getDTDPart)
isDTDAttlist :: a XmlTree XmlTree
isDTDAttlist = isA (maybe False (== ATTLIST ) . XN.getDTDPart)
isDTDEntity :: a XmlTree XmlTree
isDTDEntity = isA (maybe False (== ENTITY ) . XN.getDTDPart)
isDTDPEntity :: a XmlTree XmlTree
isDTDPEntity = isA (maybe False (== PENTITY ) . XN.getDTDPart)
isDTDNotation :: a XmlTree XmlTree
isDTDNotation = isA (maybe False (== NOTATION) . XN.getDTDPart)
isDTDCondSect :: a XmlTree XmlTree
isDTDCondSect = isA (maybe False (== CONDSECT) . XN.getDTDPart)
isDTDName :: a XmlTree XmlTree
isDTDName = isA (maybe False (== NAME ) . XN.getDTDPart)
isDTDPERef :: a XmlTree XmlTree
isDTDPERef = isA (maybe False (== PEREF ) . XN.getDTDPart)
hasDTDAttr :: String -> a XmlTree XmlTree
hasDTDAttr n = isA (isJust . lookup n . fromMaybe [] . XN.getDTDAttrl)
getDTDAttrValue :: String -> a XmlTree String
getDTDAttrValue n = arrL (maybeToList . lookup n . fromMaybe [] . XN.getDTDAttrl)
setDTDAttrValue :: String -> String -> a XmlTree XmlTree
setDTDAttrValue n v = arr (XN.changeDTDAttrl (addEntry n v)) `when` isDTD
mkDTDElem :: DTDElem -> Attributes -> a n XmlTree -> a n XmlTree
mkDTDElem e al cf = listA cf >>> arr (XN.mkDTDElem e al)
mkDTDDoctype :: Attributes -> a n XmlTree -> a n XmlTree
mkDTDDoctype = mkDTDElem DOCTYPE
mkDTDElement :: Attributes -> a n XmlTree
mkDTDElement al = mkDTDElem ELEMENT al none
mkDTDEntity :: Attributes -> a n XmlTree
mkDTDEntity al = mkDTDElem ENTITY al none
mkDTDPEntity :: Attributes -> a n XmlTree
mkDTDPEntity al = mkDTDElem PENTITY al none
instance ArrowXml LA
instance ArrowXml (SLA s)
instance ArrowXml IOLA
instance ArrowXml (IOSLA s)
instance ArrowDTD LA
instance ArrowDTD (SLA s)
instance ArrowDTD IOLA
instance ArrowDTD (IOSLA s)
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/XmlOptions.hs 0000644 0000000 0000000 00000033036 13001350442 017314 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.XmlOptions
Copyright : Copyright (C) 2010 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
system configuration and common options options
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.XmlOptions
where
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.XmlState.TypeDefs
import Text.XML.HXT.Arrow.XmlState.SystemConfig
import Data.Maybe
import System.Console.GetOpt
-- ------------------------------------------------------------
--
-- |
-- commonly useful options for XML input
--
-- can be used for option definition with haskell getopt
--
-- defines options: 'a_trace', 'a_proxy', 'a_use_curl', 'a_do_not_use_curl', 'a_options_curl', 'a_encoding',
-- 'a_issue_errors', 'a_do_not_issue_errors', 'a_parse_html', 'a_parse_by_mimetype', 'a_issue_warnings', 'a_do_not_issue_warnings',
-- 'a_parse_xml', 'a_validate', 'a_do_not_validate', 'a_canonicalize', 'a_do_not_canonicalize',
--- 'a_preserve_comment', 'a_do_not_preserve_comment', 'a_check_namespaces', 'a_do_not_check_namespaces',
-- 'a_remove_whitespace', 'a_do_not_remove_whitespace'
inputOptions :: [OptDescr SysConfig]
inputOptions
= [ Option "t" [a_trace] (OptArg trc "LEVEL") "trace level (0-4), default 1"
, Option "p" [a_proxy] (ReqArg withProxy "PROXY") "proxy for http access (e.g. \"www-cache:3128\")"
, Option "" [a_redirect] (NoArg (withRedirect True)) "automatically follow redirected URIs"
, Option "" [a_no_redirect] (NoArg (withRedirect False)) "switch off following redirected URIs"
, Option "" [a_default_baseuri] (ReqArg withDefaultBaseURI "URI") "default base URI, default: \"file:////\""
, Option "e" [a_encoding] (ReqArg withInputEncoding "CHARSET") ( "default document encoding (" ++ utf8 ++ ", " ++ isoLatin1 ++ ", " ++ usAscii ++ ", ...)" )
, Option "" [a_mime_types] (ReqArg withMimeTypeFile "FILE") "set mime type configuration file, e.g. \"/etc/mime.types\""
, Option "" [a_issue_errors] (NoArg (withErrors True)) "issue all error messages on stderr (default)"
, Option "" [a_do_not_issue_errors] (NoArg (withErrors False)) "ignore all error messages"
, Option "" [a_ignore_encoding_errors] (NoArg (withEncodingErrors False)) "ignore encoding errors"
, Option "" [a_ignore_none_xml_contents] (NoArg (withIgnoreNoneXmlContents True)) "discards all contents of none XML/HTML documents, only the meta info remains in the doc tree"
, Option "" [a_accept_mimetypes] (ReqArg withMT "MIMETYPES") "only accept documents matching the given comma separated list of mimetype specs"
, Option "H" [a_parse_html] (NoArg (withParseHTML True)) "parse input as HTML, try to interprete everything as HTML, no validation"
, Option "M" [a_parse_by_mimetype] (NoArg (withParseByMimeType True)) "parse dependent on mime type: text/html as HTML, text/xml and text/xhtml and others as XML, else no parse"
, Option "" [a_parse_xml] (NoArg (withParseHTML False)) "parse input as XML, (default)"
, Option "" [a_strict_input] (NoArg (withStrictInput True)) "read input files strictly, this ensures closing the files correctly even if not read completely"
, Option "" [a_issue_warnings] (NoArg (withWarnings True)) "issue warnings, when parsing HTML (default)"
, Option "Q" [a_do_not_issue_warnings] (NoArg (withWarnings False)) "ignore warnings, when parsing HTML"
, Option "" [a_validate] (NoArg (withValidate True)) "document validation when parsing XML (default)"
, Option "w" [a_do_not_validate] (NoArg (withValidate False)) "only wellformed check, no validation"
, Option "" [a_subst_dtd_entities] (NoArg (withSubstDTDEntities True)) "entities defined in DTD are substituted when parsing XML (default)"
, Option "" [a_do_not_subst_dtd_entities] (NoArg (withSubstDTDEntities False)) "entities defined in DTD are NOT substituted when parsing XML"
, Option "" [a_subst_html_entities] (NoArg (withSubstHTMLEntities True)) "entities defined in XHTML are substituted when parsing XML, only in effect when prev. option is switched off"
, Option "" [a_do_not_subst_html_entities] (NoArg (withSubstHTMLEntities False)) "only entities predefined in XML are substituted when parsing XML (default)"
, Option "" [a_canonicalize] (NoArg (withCanonicalize True)) "canonicalize document, remove DTD, comment, transform CDATA, CharRef's, ... (default)"
, Option "c" [a_do_not_canonicalize] (NoArg (withCanonicalize False)) "do not canonicalize document, don't remove DTD, comment, don't transform CDATA, CharRef's, ..."
, Option "C" [a_preserve_comment] (NoArg (withPreserveComment True)) "don't remove comments during canonicalisation"
, Option "" [a_do_not_preserve_comment] (NoArg (withPreserveComment False)) "remove comments during canonicalisation (default)"
, Option "n" [a_check_namespaces] (NoArg (withCheckNamespaces True)) "tag tree with namespace information and check namespaces"
, Option "" [a_do_not_check_namespaces] (NoArg (withCheckNamespaces False)) "ignore namespaces (default)"
, Option "r" [a_remove_whitespace] (NoArg (withRemoveWS True)) "remove redundant whitespace, simplifies tree and processing"
, Option "" [a_do_not_remove_whitespace] (NoArg (withRemoveWS False)) "don't remove redundant whitespace (default)"
]
where
withMT = withAcceptedMimeTypes . words . map (\ x -> if x == ',' then ' ' else x)
trc = withTrace . max 0 . min 9 . (read :: String -> Int) . ('0':) . filter (`elem` "0123456789") . fromMaybe v_1
-- |
-- commonly useful options for XML output
--
-- defines options: 'a_indent', 'a_output_encoding', 'a_output_html' and others
outputOptions :: [OptDescr SysConfig]
outputOptions
= [ Option "f" [a_output_file] (ReqArg (withSysAttr a_output_file) "FILE") "output file for resulting document (default: stdout)"
, Option "i" [a_indent] (NoArg (withIndent True)) "indent XML output for readability"
, Option "o" [a_output_encoding] (ReqArg withOutputEncoding "CHARSET") ( "encoding of output (" ++ utf8 ++ ", " ++ isoLatin1 ++ ", " ++ usAscii ++ ")" )
, Option "" [a_output_xml] (NoArg withOutputXML ) "output of none ASCII chars as HTMl entity references"
, Option "" [a_output_html] (NoArg withOutputHTML ) "output of none ASCII chars as HTMl entity references"
, Option "" [a_output_xhtml] (NoArg withOutputXHTML ) "output of HTML elements with empty content (script, ...) done in format instead of "
, Option "" [a_output_plain] (NoArg withOutputPLAIN ) "output of HTML elements with empty content (script, ...) done in format instead of "
, Option "" [a_no_xml_pi] (NoArg (withXmlPi False)) ("output without processing instruction, useful in combination with --" ++ show a_output_html)
, Option "" [a_no_empty_elem_for] (ReqArg (withNoEmptyElemFor . words') "NAMES") "output of empty elements done in format only for given list of element names"
, Option "" [a_add_default_dtd] (NoArg (withAddDefaultDTD True)) "add the document type declaration given in the input document"
, Option "" [a_text_mode] (NoArg (withTextMode True)) "output in text mode"
]
where
words'
= words
. map (\ c -> if c == ',' then ' ' else c)
-- |
-- commonly useful options
--
-- defines options: 'a_verbose', 'a_help'
generalOptions :: [OptDescr SysConfig]
generalOptions
= [ Option "v" [a_verbose] (NoArg (withSysAttr a_verbose v_1)) "verbose output"
, Option "h?" [a_help] (NoArg (withSysAttr a_help v_1)) "this message"
]
-- |
-- defines 'a_version' option
versionOptions :: [OptDescr SysConfig]
versionOptions
= [ Option "V" [a_version] (NoArg (withSysAttr a_version v_1)) "show program version"
]
-- |
-- debug output options
showOptions :: [OptDescr SysConfig]
showOptions
= [ Option "" [a_show_tree] (NoArg (withShowTree True)) "output tree representation instead of document source"
, Option "" [a_show_haskell] (NoArg (withShowHaskell True)) "output internal Haskell representation instead of document source"
]
-- ------------------------------------------------------------
a_accept_mimetypes,
a_add_default_dtd,
a_canonicalize,
a_check_namespaces,
a_collect_errors,
a_default_baseuri,
a_do_not_canonicalize,
a_do_not_check_namespaces,
a_do_not_issue_errors,
a_do_not_issue_warnings,
a_do_not_preserve_comment,
a_do_not_remove_whitespace,
a_do_not_subst_dtd_entities,
a_do_not_subst_html_entities,
a_do_not_validate,
a_error,
a_error_log,
a_help,
a_if_modified_since,
a_if_unmodified_since,
a_ignore_encoding_errors,
a_ignore_none_xml_contents,
a_indent,
a_issue_errors,
a_issue_warnings,
a_mime_types,
a_no_empty_elements,
a_no_empty_elem_for,
a_no_redirect,
a_no_xml_pi,
a_output_file,
a_output_xml,
a_output_html,
a_output_xhtml,
a_output_plain,
a_parse_by_mimetype,
a_parse_html,
a_parse_xml,
a_preserve_comment,
a_proxy,
a_redirect,
a_remove_whitespace,
a_show_haskell,
a_show_tree,
a_strict_input,
a_subst_dtd_entities,
a_subst_html_entities,
a_text_mode,
a_trace,
a_validate,
a_verbose :: String
a_accept_mimetypes = "accept-mimetypes"
a_add_default_dtd = "add-default-dtd"
a_canonicalize = "canonicalize"
a_check_namespaces = "check-namespaces"
a_collect_errors = "collect-errors"
a_default_baseuri = "default-base-URI"
a_do_not_canonicalize = "do-not-canonicalize"
a_do_not_check_namespaces = "do-not-check-namespaces"
a_do_not_issue_errors = "do-not-issue-errors"
a_do_not_issue_warnings = "do-not-issue-warnings"
a_do_not_preserve_comment = "do-not-preserve-comment"
a_do_not_remove_whitespace = "do-not-remove-whitespace"
a_do_not_subst_dtd_entities = "do-not-subst-dtd-entities"
a_do_not_subst_html_entities = "do-not-subst-html-entities"
a_do_not_validate = "do-not-validate"
a_error = "error"
a_error_log = "errorLog"
a_help = "help"
a_if_modified_since = "if-modified-since"
a_if_unmodified_since = "if-unmodified-since"
a_ignore_encoding_errors = "ignore-encoding-errors"
a_ignore_none_xml_contents = "ignore-none-xml-contents"
a_indent = "indent"
a_issue_warnings = "issue-warnings"
a_issue_errors = "issue-errors"
a_mime_types = "mimetypes"
a_no_empty_elements = "no-empty-elements"
a_no_empty_elem_for = "no-empty-elem-for"
a_no_redirect = "no-redirect"
a_no_xml_pi = "no-xml-pi"
a_output_file = "output-file"
a_output_html = "output-html"
a_output_xhtml = "output-xhtml"
a_output_xml = "output-xml"
a_output_plain = "output-plain"
a_parse_by_mimetype = "parse-by-mimetype"
a_parse_html = "parse-html"
a_parse_xml = "parse-xml"
a_preserve_comment = "preserve-comment"
a_proxy = "proxy"
a_redirect = "redirect"
a_remove_whitespace = "remove-whitespace"
a_show_haskell = "show-haskell"
a_show_tree = "show-tree"
a_strict_input = "strict-input"
a_subst_dtd_entities = "subst-dtd-entities"
a_subst_html_entities = "subst-html-entities"
a_text_mode = "text-mode"
a_trace = "trace"
a_validate = "validate"
a_verbose = "verbose"
-- ------------------------------------------------------------
-- |
-- select options from a predefined list of option descriptions
selectOptions :: [String] -> [OptDescr a] -> [OptDescr a]
selectOptions ol os
= concat . map (\ on -> filter (\ (Option _ ons _ _) -> on `elem` ons) os) $ ol
removeOptions :: [String] -> [OptDescr a] -> [OptDescr a]
removeOptions ol os
= filter (\ (Option _ ons _ _) -> not . any (`elem` ol) $ ons ) os
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/XmlRegex.hs 0000644 0000000 0000000 00000032363 12752557014 016755 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.XmlRegex
Copyright : Copyright (C) 2008 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
Regular Expression Matcher working on lists of XmlTrees
It's intended to import this module with an explicit
import declaration for not spoiling the namespace
with these somewhat special arrows
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.XmlRegex
( XmlRegex
, mkZero
, mkUnit
, mkPrim
, mkPrim'
, mkPrimA
, mkDot
, mkStar
, mkAlt
, mkAlts
, mkSeq
, mkSeqs
, mkRep
, mkRng
, mkOpt
, mkPerm
, mkPerms
, mkMerge
, nullable
, delta
, matchXmlRegex
, splitXmlRegex
, scanXmlRegex
, matchRegexA
, splitRegexA
, scanRegexA
)
where
import Control.Arrow.ListArrows
import Data.Maybe
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.DOM.ShowXml (xshow)
-- ------------------------------------------------------------
-- the exported regex arrows
-- | check whether a sequence of XmlTrees match an Xml regular expression
--
-- The arrow for 'matchXmlRegex'.
--
-- The expession is build up from simple arrows acting as predicate ('mkPrimA') for
-- an XmlTree and of the usual cobinators for sequence ('mkSeq'), repetition
-- ('mkStar', mkRep', 'mkRng') and choice ('mkAlt', 'mkOpt')
matchRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees
matchRegexA re ts = ts >>. (\ s -> maybe [s] (const []) . matchXmlRegex re $ s)
-- | split the sequence of trees computed by the filter a into
--
-- The arrow for 'splitXmlRegex'.
--
-- a first part matching the regex and a rest,
-- if a prefix of the input sequence does not match the regex, the arrow fails
-- else the pair containing the result lists is returned
splitRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree (XmlTrees, XmlTrees)
splitRegexA re ts = ts >>. (maybeToList . splitXmlRegex re)
-- | scan the input sequence with a regex and give the result as a list of lists of trees back
-- the regex must at least match one input tree, so the empty sequence should not match the regex
--
-- The arrow for 'scanXmlRegex'.
scanRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees
scanRegexA re ts = ts >>. (fromMaybe [] . scanXmlRegex re)
-- ------------------------------------------------------------
data XmlRegex = Zero String
| Unit
| Sym (XmlTree -> Bool) String -- optional external repr. of predicate
| Dot
| Star XmlRegex
| Alt XmlRegex XmlRegex
| Seq XmlRegex XmlRegex
| Rep Int XmlRegex -- 1 or more repetitions
| Rng Int Int XmlRegex -- n..m repetitions
| Perm XmlRegex XmlRegex
| Merge XmlRegex XmlRegex
-- ------------------------------------------------------------
{- just for documentation
class Inv a where
inv :: a -> Bool
instance Inv XmlRegex where
inv (Zero _) = True
inv Unit = True
inv (Sym p _) = p holds for some XmlTrees
inv Dot = True
inv (Star e) = inv e
inv (Alt e1 e2) = inv e1 &&
inv e2
inv (Seq e1 e2) = inv e1 &&
inv e2
inv (Rep i e) = i > 0 && inv e
inv (Rng i j e) = (i < j || (i == j && i > 1)) &&
inv e
inv (Perm e1 e2) = inv e1 &&
inv e2
-}
-- ------------------------------------------------------------
--
-- smart constructors
mkZero :: String -> XmlRegex
mkZero = Zero
mkUnit :: XmlRegex
mkUnit = Unit
mkPrim :: (XmlTree -> Bool) -> XmlRegex
mkPrim p = Sym p ""
mkPrim' :: (XmlTree -> Bool) -> String -> XmlRegex
mkPrim' = Sym
mkPrimA :: LA XmlTree XmlTree -> XmlRegex
mkPrimA a = mkPrim (not . null . runLA a)
mkDot :: XmlRegex
mkDot = Dot
mkStar :: XmlRegex -> XmlRegex
mkStar (Zero _) = mkUnit -- {}* == ()
mkStar e@Unit = e -- ()* == ()
mkStar e@(Star _e1) = e -- (r*)* == r*
mkStar (Rep 1 e1) = mkStar e1 -- (r+)* == r*
mkStar e@(Alt _ _) = Star (rmStar e) -- (a*|b)* == (a|b)*
mkStar e = Star e
rmStar :: XmlRegex -> XmlRegex
rmStar (Alt e1 e2) = mkAlt (rmStar e1) (rmStar e2)
rmStar (Star e1) = rmStar e1
rmStar (Rep 1 e1) = rmStar e1
rmStar e1 = e1
mkAlt :: XmlRegex -> XmlRegex -> XmlRegex
mkAlt e1 (Zero _) = e1 -- e1 u {} = e1
mkAlt (Zero _) e2 = e2 -- {} u e2 = e2
mkAlt e1@(Star Dot) _e2 = e1 -- A* u e1 = A*
mkAlt _e1 e2@(Star Dot) = e2 -- e1 u A* = A*
mkAlt (Sym p1 e1) (Sym p2 e2) = mkPrim' (\ x -> p1 x || p2 x) (e e1 e2) -- melting of predicates
where
e "" x2 = x2
e x1 "" = x1
e x1 x2 = x1 ++ "|" ++ x2
mkAlt e1 e2@(Sym _ _) = mkAlt e2 e1 -- symmetry: predicates always first
mkAlt e1@(Sym _ _) (Alt e2@(Sym _ _) e3)
= mkAlt (mkAlt e1 e2) e3 -- prepare melting of predicates
mkAlt (Alt e1 e2) e3 = mkAlt e1 (mkAlt e2 e3) -- associativity
mkAlt e1 e2 = Alt e1 e2
mkAlts :: [XmlRegex] -> XmlRegex
mkAlts = foldr mkAlt (mkZero "")
mkSeq :: XmlRegex -> XmlRegex -> XmlRegex
mkSeq e1@(Zero _) _e2 = e1
mkSeq _e1 e2@(Zero _) = e2
mkSeq Unit e2 = e2
mkSeq e1 Unit = e1
mkSeq (Seq e1 e2) e3 = mkSeq e1 (mkSeq e2 e3)
mkSeq e1 e2 = Seq e1 e2
mkSeqs :: [XmlRegex] -> XmlRegex
mkSeqs = foldr mkSeq mkUnit
mkRep :: Int -> XmlRegex -> XmlRegex
mkRep 0 e = mkStar e
mkRep _ e@(Zero _) = e
mkRep _ e@Unit = e
mkRep i e = Rep i e
mkRng :: Int -> Int -> XmlRegex -> XmlRegex
mkRng 0 0 _e = mkUnit
mkRng 1 1 e = e
mkRng lb ub _e
| lb > ub = Zero $
"illegal range " ++
show lb ++ ".." ++ show ub
mkRng _l _u e@(Zero _) = e
mkRng _l _u e@Unit = e
mkRng lb ub e = Rng lb ub e
mkOpt :: XmlRegex -> XmlRegex
mkOpt = mkRng 0 1
mkPerm :: XmlRegex -> XmlRegex -> XmlRegex
mkPerm e1@(Zero _) _ = e1
mkPerm _ e2@(Zero _) = e2
mkPerm Unit e2 = e2
mkPerm e1 Unit = e1
mkPerm e1 e2 = Perm e1 e2
mkPerms :: [XmlRegex] -> XmlRegex
mkPerms = foldr mkPerm mkUnit
mkMerge :: XmlRegex -> XmlRegex -> XmlRegex
mkMerge e1@(Zero _) _ = e1
mkMerge _ e2@(Zero _) = e2
mkMerge Unit e2 = e2
mkMerge e1 Unit = e1
mkMerge e1 e2 = Merge e1 e2
-- ------------------------------------------------------------
instance Show XmlRegex where
show (Zero s) = "{err:" ++ s ++ "}"
show Unit = "()"
show (Sym _p "") = ""
show (Sym _p r ) = r
show Dot = "."
show (Star e) = "(" ++ show e ++ ")*"
show (Alt e1 e2) = "(" ++ show e1 ++ "|" ++ show e2 ++ ")"
show (Seq e1 e2) = show e1 ++ show e2
show (Rep 1 e) = "(" ++ show e ++ ")+"
show (Rep i e) = "(" ++ show e ++ "){" ++ show i ++ ",}"
show (Rng 0 1 e) = "(" ++ show e ++ ")?"
show (Rng i j e) = "(" ++ show e ++ "){" ++ show i ++ "," ++ show j ++ "}"
show (Perm e1 e2) = "(" ++ show e1 ++ show e2 ++ "|" ++ show e2 ++ show e1 ++ ")"
show (Merge e1 e2) = "(" ++ show e1 ++ "&" ++ show e2 ++ ")"
-- ------------------------------------------------------------
unexpected :: XmlTree -> String -> String
unexpected t e = emsg e ++ (cut 80 . xshow) [t]
where
emsg "" = "unexpected: "
emsg s = "expected: " ++ s ++ ", but got: "
cut n s
| null rest = s'
| otherwise = s' ++ "..."
where
(s', rest) = splitAt n s
-- ------------------------------------------------------------
nullable :: XmlRegex -> Bool
nullable (Zero _) = False
nullable Unit = True
nullable (Sym _p _) = False -- assumption: p holds for at least one tree
nullable Dot = False
nullable (Star _) = True
nullable (Alt e1 e2) = nullable e1 ||
nullable e2
nullable (Seq e1 e2) = nullable e1 &&
nullable e2
nullable (Rep _i e) = nullable e
nullable (Rng i _ e) = i == 0 ||
nullable e
nullable (Perm e1 e2) = nullable e1 &&
nullable e2
nullable (Merge e1 e2) = nullable e1 &&
nullable e2
-- ------------------------------------------------------------
delta :: XmlRegex -> XmlTree -> XmlRegex
delta e@(Zero _) _ = e
delta Unit c = mkZero $ unexpected c ""
delta (Sym p e) c
| p c = mkUnit
| otherwise = mkZero $ unexpected c e
delta Dot _ = mkUnit
delta e@(Star e1) c = mkSeq (delta e1 c) e
delta (Alt e1 e2) c = mkAlt (delta e1 c) (delta e2 c)
delta (Seq e1 e2) c
| nullable e1 = mkAlt (mkSeq (delta e1 c) e2) (delta e2 c)
| otherwise = mkSeq (delta e1 c) e2
delta (Rep i e) c = mkSeq (delta e c) (mkRep (i-1) e)
delta (Rng i j e) c = mkSeq (delta e c) (mkRng ((i-1) `max` 0) (j-1) e)
delta (Perm e1 e2) c = case e1' of
(Zero _) -> mkPerm e1 (delta e2 c)
_ -> mkPerm e1' e2
where
e1' = delta e1 c
delta (Merge e1 e2) c = mkAlt (mkMerge (delta e1 c) e2)
(mkMerge e1 (delta e2 c))
-- ------------------------------------------------------------
delta' :: XmlRegex -> XmlTrees -> XmlRegex
delta' = foldl delta
-- | match a sequence of XML trees with a regular expression over trees
--
-- If the input matches, the result is Nothing, else Just an error message is returned
matchXmlRegex :: XmlRegex -> XmlTrees -> Maybe String
matchXmlRegex e
= res . delta' e
where
res (Zero er) = Just er
res re
| nullable re = Nothing -- o.k.
| otherwise = Just $ "input does not match " ++ show e
-- ------------------------------------------------------------
-- | split a sequence of XML trees into a pair of a a matching prefix and a rest
--
-- If there is no matching prefix, Nothing is returned
splitXmlRegex :: XmlRegex -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex re = splitXmlRegex' re []
splitXmlRegex' :: XmlRegex -> XmlTrees -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex' re res []
| nullable re = Just (reverse res, [])
| otherwise = Nothing
splitXmlRegex' (Zero _) _ _
= Nothing
splitXmlRegex' re res xs@(x:xs')
| isJust res' = res'
| nullable re = Just (reverse res, xs)
| otherwise = Nothing
where
re' = delta re x
res' = splitXmlRegex' re' (x:res) xs'
-- ------------------------------------------------------------
-- | scan a sequence of XML trees and split it into parts matching the given regex
--
-- If the parts cannot be split because of a missing match, or because of the
-- empty sequence as match, Nothing is returned
scanXmlRegex :: XmlRegex -> XmlTrees -> Maybe [XmlTrees]
scanXmlRegex re ts = scanXmlRegex' re (splitXmlRegex re ts)
scanXmlRegex' :: XmlRegex -> Maybe (XmlTrees, XmlTrees) -> Maybe [XmlTrees]
scanXmlRegex' _ Nothing = Nothing
scanXmlRegex' _ (Just (rs, [])) = Just [rs]
scanXmlRegex' _ (Just ([], _)) = Nothing -- re is nullable (the empty word matches), nothing split off
-- would give infinite list of empty lists
scanXmlRegex' re (Just (rs, rest))
| isNothing res = Nothing
| otherwise = Just (rs : fromJust res)
where
res = scanXmlRegex' re (splitXmlRegex re rest)
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/XmlState.hs 0000644 0000000 0000000 00000006754 12752557014 016770 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.XmlState
Copyright : Copyright (C) 2010 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
the interface for the basic state maipulation functions
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.XmlState
( -- * Data Types
XIOState
, XIOSysState
, IOStateArrow
, IOSArrow
, SysConfig
, SysConfigList
,
-- * User State Manipulation
getUserState
, setUserState
, changeUserState
, withExtendedUserState
, withOtherUserState
, withoutUserState
,
-- * Run IO State arrows
runX
,
-- * Global System State Configuration and Access
configSysVars
, setSysAttr
, unsetSysAttr
, getSysAttr
, getAllSysAttrs
, setSysAttrString
, setSysAttrInt
, getSysAttrInt
, getConfigAttr
,
-- * Error Handling
clearErrStatus
, setErrStatus
, getErrStatus
, setErrMsgStatus
, setErrorMsgHandler
, errorMsgStderr
, errorMsgCollect
, errorMsgStderrAndCollect
, errorMsgIgnore
, getErrorMessages
, filterErrorMsg
, issueWarn
, issueErr
, issueFatal
, issueExc
, setDocumentStatus
, setDocumentStatusFromSystemState
, documentStatusOk
, -- * Tracing
setTraceLevel
, getTraceLevel
, withTraceLevel
, setTraceCmd
, getTraceCmd
, trace
, traceMsg
, traceValue
, traceString
, traceSource
, traceTree
, traceDoc
, -- * Document Base
setBaseURI
, getBaseURI
, changeBaseURI
, setDefaultBaseURI
, getDefaultBaseURI
, runInLocalURIContext
, -- * URI Manipulation
expandURIString
, expandURI
, mkAbsURI
, getFragmentFromURI
, getPathFromURI
, getPortFromURI
, getQueryFromURI
, getRegNameFromURI
, getSchemeFromURI
, getUserInfoFromURI
, -- * Mime Type Handling
getMimeTypeTable
, setMimeTypeTable
, setMimeTypeTableFromFile
, -- * System Configuration and Options
yes
, no
, withAcceptedMimeTypes
, withAddDefaultDTD
, withSysAttr
, withCanonicalize
, withCompression
, withCheckNamespaces
, withDefaultBaseURI
, withStrictDeserialize
, withEncodingErrors
, withErrors
, withFileMimeType
, withIgnoreNoneXmlContents
, withIndent
, withInputEncoding
, withInputOption
, withInputOptions
, withMimeTypeFile
, withMimeTypeHandler
, withNoEmptyElemFor
, withXmlPi
, withOutputEncoding
, withOutputXML
, withOutputHTML
, withOutputXHTML
, withOutputPLAIN
, withParseByMimeType
, withParseHTML
, withPreserveComment
, withProxy
, withRedirect
, withRemoveWS
, withShowHaskell
, withShowTree
, withStrictInput
, withSubstDTDEntities
, withSubstHTMLEntities
, withTextMode
, withTrace
, withValidate
, withWarnings
)
where
import Text.XML.HXT.Arrow.XmlState.ErrorHandling
import Text.XML.HXT.Arrow.XmlState.MimeTypeTable
import Text.XML.HXT.Arrow.XmlState.RunIOStateArrow
import Text.XML.HXT.Arrow.XmlState.SystemConfig
import Text.XML.HXT.Arrow.XmlState.TraceHandling
import Text.XML.HXT.Arrow.XmlState.TypeDefs
import Text.XML.HXT.Arrow.XmlState.URIHandling
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/XmlState/ErrorHandling.hs 0000644 0000000 0000000 00000021726 12752557014 021522 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.XmlState.ErrorHandling
Copyright : Copyright (C) 2010 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
the basic state arrows for XML processing
A state is needed for global processing options,
like encoding options, document base URI, trace levels
and error message handling
The state is separated into a user defined state
and a system state. The system state contains variables
for error message handling, for tracing, for the document base
for accessing XML documents with relative references, e.g. DTDs,
and a global key value store. This assoc list has strings as keys
and lists of XmlTrees as values. It is used to store arbitrary
XML and text values, e.g. user defined global options.
The user defined part of the store is in the default case empty, defined as ().
It can be extended with an arbitray data type
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.XmlState.ErrorHandling
where
import Control.Arrow -- arrow classes
import Control.Arrow.ArrowList
import Control.Arrow.ArrowIf
import Control.Arrow.ArrowTree
import Control.Arrow.ArrowIO
import Control.Exception ( SomeException )
import Data.Maybe
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlState.TypeDefs
import System.IO ( hPutStrLn
, hFlush
, stderr
)
-- ------------------------------------------------------------
changeErrorStatus :: (Int -> Int -> Int) -> IOStateArrow s Int Int
changeErrorStatus f = chgSysVar theErrorStatus f
-- | reset global error variable
clearErrStatus :: IOStateArrow s b b
clearErrStatus = configSysVar $ setS theErrorStatus 0
-- | set global error variable
setErrStatus :: IOStateArrow s Int Int
setErrStatus = changeErrorStatus max
-- | read current global error status
getErrStatus :: IOStateArrow s XmlTree Int
getErrStatus = getSysVar theErrorStatus
-- ------------------------------------------------------------
-- | raise the global error status level to that of the input tree
setErrMsgStatus :: IOStateArrow s XmlTree XmlTree
setErrMsgStatus = perform
( getErrorLevel >>> setErrStatus )
-- | set the error message handler and the flag for collecting the errors
setErrorMsgHandler :: Bool -> (String -> IO ()) -> IOStateArrow s b b
setErrorMsgHandler c f = configSysVar $ setS (theErrorMsgCollect .&&&. theErrorMsgHandler) (c, f)
-- | error message handler for output to stderr
sysErrorMsg :: IOStateArrow s XmlTree XmlTree
sysErrorMsg = perform
( getErrorLevel &&& getErrorMsg
>>>
arr formatErrorMsg
>>>
getSysVar theErrorMsgHandler &&& this
>>>
arrIO (\ (h, msg) -> h msg)
)
where
formatErrorMsg (level, msg) = "\n" ++ errClass level ++ ": " ++ msg
errClass l = fromMaybe "fatal error" . lookup l $ msgList
where
msgList = [ (c_ok, "no error")
, (c_warn, "warning")
, (c_err, "error")
, (c_fatal, "fatal error")
]
-- | the default error message handler: error output to stderr
errorMsgStderr :: IOStateArrow s b b
errorMsgStderr = setErrorMsgHandler False (\ x ->
do hPutStrLn stderr x
hFlush stderr
)
-- | error message handler for collecting errors
errorMsgCollect :: IOStateArrow s b b
errorMsgCollect = setErrorMsgHandler True (const $ return ())
-- | error message handler for output to stderr and collecting
errorMsgStderrAndCollect :: IOStateArrow s b b
errorMsgStderrAndCollect = setErrorMsgHandler True (hPutStrLn stderr)
-- | error message handler for ignoring errors
errorMsgIgnore :: IOStateArrow s b b
errorMsgIgnore = setErrorMsgHandler False (const $ return ())
-- |
-- if error messages are collected by the error handler for
-- processing these messages by the calling application,
-- this arrow reads the stored messages and clears the error message store
getErrorMessages :: IOStateArrow s b XmlTree
getErrorMessages = getSysVar theErrorMsgList
>>>
configSysVar (setS theErrorMsgList [])
>>>
arrL reverse
addToErrorMsgList :: IOStateArrow s XmlTree XmlTree
addToErrorMsgList = chgSysVar
( theErrorMsgCollect .&&&. theErrorMsgList )
( \ e (cs, es) -> (cs, if cs then e : es else es) )
-- ------------------------------------------------------------
-- |
-- filter error messages from input trees and issue errors
filterErrorMsg :: IOStateArrow s XmlTree XmlTree
filterErrorMsg = ( setErrMsgStatus
>>>
sysErrorMsg
>>>
addToErrorMsgList
>>>
none
)
`when`
isError
-- | generate a warnig message
issueWarn :: String -> IOStateArrow s b b
issueWarn msg = perform (warn msg >>> filterErrorMsg)
-- | generate an error message
issueErr :: String -> IOStateArrow s b b
issueErr msg = perform (err msg >>> filterErrorMsg)
-- | generate a fatal error message, e.g. document not found
issueFatal :: String -> IOStateArrow s b b
issueFatal msg = perform (fatal msg >>> filterErrorMsg)
-- | Default exception handler: issue a fatal error message and fail.
--
-- The parameter can be used to specify where the error occured
issueExc :: String -> IOStateArrow s SomeException b
issueExc m = ( issueFatal $< arr ((msg ++) . show) )
>>>
none
where
msg | null m = "Exception: "
| otherwise = "Exception in " ++ m ++ ": "
-- |
-- add the error level and the module where the error occured
-- to the attributes of a document root node and remove the children when level is greater or equal to 'c_err'.
-- called by 'setDocumentStatusFromSystemState' when the system state indicates an error
setDocumentStatus :: Int -> String -> IOStateArrow s XmlTree XmlTree
setDocumentStatus level msg
= ( addAttrl ( sattr a_status (show level)
<+>
sattr a_module msg
)
>>>
( if level >= c_err
then setChildren []
else this
)
)
`when`
isRoot
-- |
-- check whether the error level attribute in the system state
-- is set to error, in this case the children of the document root are
-- removed and the module name where the error occured and the error level are added as attributes with 'setDocumentStatus'
-- else nothing is changed
setDocumentStatusFromSystemState :: String -> IOStateArrow s XmlTree XmlTree
setDocumentStatusFromSystemState msg
= setStatus $< getErrStatus
where
setStatus level
| level <= c_warn = this
| otherwise = setDocumentStatus level msg
-- |
-- check whether tree is a document root and the status attribute has a value less than 'c_err'
documentStatusOk :: ArrowXml a => a XmlTree XmlTree
documentStatusOk = isRoot
>>>
( (getAttrValue a_status
>>>
isA (\ v -> null v || ((read v)::Int) <= c_warn)
)
`guards`
this
)
-- ------------------------------------------------------------
errorOutputToStderr :: String -> IO ()
errorOutputToStderr msg
= do
hPutStrLn stderr msg
hFlush stderr
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/XmlState/MimeTypeTable.hs 0000644 0000000 0000000 00000004317 12752557014 021462 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.XmlState.MimeTypeTable
Copyright : Copyright (C) 2010 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
the mime type configuration functions
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.XmlState.MimeTypeTable
where
import Control.Arrow -- arrow classes
import Control.Arrow.ArrowList
import Control.Arrow.ArrowIO
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.XmlState.TypeDefs
-- ------------------------------------------------------------
-- | set the table mapping of file extensions to mime types in the system state
--
-- Default table is defined in 'Text.XML.HXT.DOM.MimeTypeDefaults'.
-- This table is used when reading loacl files, (file: protocol) to determine the mime type
setMimeTypeTable :: MimeTypeTable -> IOStateArrow s b b
setMimeTypeTable mtt = configSysVar $ setS (theMimeTypes .&&&. theMimeTypeFile) (mtt, "")
-- | set the table mapping of file extensions to mime types by an external config file
--
-- The config file must follow the conventions of /etc/mime.types on a debian linux system,
-- that means all empty lines and all lines starting with a # are ignored. The other lines
-- must consist of a mime type followed by a possible empty list of extensions.
-- The list of extenstions and mime types overwrites the default list in the system state
-- of the IOStateArrow
setMimeTypeTableFromFile :: FilePath -> IOStateArrow s b b
setMimeTypeTableFromFile file = configSysVar $ setS theMimeTypeFile file
-- | read the system mimetype table
getMimeTypeTable :: IOStateArrow s b MimeTypeTable
getMimeTypeTable = getMime $< getSysVar (theMimeTypes .&&&. theMimeTypeFile)
where
getMime (mtt, "") = constA mtt
getMime (_, mtf) = perform (setMimeTypeTable $< arrIO0 ( readMimeTypeTable mtf))
>>>
getMimeTypeTable
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/XmlState/RunIOStateArrow.hs 0000644 0000000 0000000 00000027164 12752557014 021776 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.XmlState.RunIOStateArrow
Copyright : Copyright (C) 2010 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
run an io state arrow
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.XmlState.RunIOStateArrow
where
import Control.Arrow -- arrow classes
import Control.Arrow.ArrowList
import Control.Arrow.IOStateListArrow
import Data.Map ( empty )
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlState.ErrorHandling
import Text.XML.HXT.Arrow.XmlState.TraceHandling
import Text.XML.HXT.Arrow.XmlState.TypeDefs
-- ------------------------------------------------------------
-- |
-- apply an 'IOSArrow' to an empty root node with 'initialState' () as initial state
--
-- the main entry point for running a state arrow with IO
--
-- when running @ runX f@ an empty XML root node is applied to @f@.
-- usually @f@ will start with a constant arrow (ignoring the input), e.g. a 'Text.XML.HXT.Arrow.ReadDocument.readDocument' arrow.
--
-- for usage see examples with 'Text.XML.HXT.Arrow.WriteDocument.writeDocument'
--
-- if input has to be feed into the arrow use 'Control.Arrow.IOStateListArrow.runIOSLA' like in @ runIOSLA f emptyX inputDoc @
runX :: IOSArrow XmlTree c -> IO [c]
runX = runXIOState (initialState ())
runXIOState :: XIOState s -> IOStateArrow s XmlTree c -> IO [c]
runXIOState s0 f
= do
(_finalState, res) <- runIOSLA (emptyRoot >>> f) s0 undefined
return res
where
emptyRoot = root [] []
-- | the default global state, used as initial state when running an 'IOSArrow' with 'runIOSLA' or
-- 'runX'
initialState :: us -> XIOState us
initialState s = XIOState { xioSysState = initialSysState
, xioUserState = s
}
-- ------------------------------------------------------------
initialSysState :: XIOSysState
initialSysState = XIOSys
{ xioSysWriter = initialSysWriter
, xioSysEnv = initialSysEnv
}
initialSysWriter :: XIOSysWriter
initialSysWriter = XIOwrt
{ xioErrorStatus = c_ok
, xioErrorMsgList = []
, xioExpatErrors = none
, xioRelaxNoOfErrors = 0
, xioRelaxDefineId = 0
, xioRelaxAttrList = []
}
initialSysEnv :: XIOSysEnv
initialSysEnv = XIOEnv
{ xioTraceLevel = 0
, xioTraceCmd = traceOutputToStderr
, xioErrorMsgHandler = errorOutputToStderr
, xioErrorMsgCollect = False
, xioBaseURI = ""
, xioDefaultBaseURI = ""
, xioAttrList = []
, xioInputConfig = initialInputConfig
, xioParseConfig = initialParseConfig
, xioOutputConfig = initialOutputConfig
, xioRelaxConfig = initialRelaxConfig
, xioXmlSchemaConfig = initialXmlSchemaConfig
, xioCacheConfig = initialCacheConfig
}
initialInputConfig :: XIOInputConfig
initialInputConfig = XIOIcgf
{ xioStrictInput = False
, xioEncodingErrors = True
, xioInputEncoding = ""
, xioHttpHandler = dummyHTTPHandler
, xioInputOptions = []
, xioRedirect = False
, xioProxy = ""
}
initialParseConfig :: XIOParseConfig
initialParseConfig = XIOPcfg
{ xioMimeTypes = defaultMimeTypeTable
, xioMimeTypeHandlers = empty
, xioMimeTypeFile = ""
, xioAcceptedMimeTypes = []
, xioFileMimeType = ""
, xioWarnings = True
, xioRemoveWS = False
, xioParseByMimeType = False
, xioParseHTML = False
, xioLowerCaseNames = False
, xioTagSoup = False
, xioPreserveComment = False
, xioValidate = True
, xioSubstDTDEntities = True
, xioSubstHTMLEntities = False
, xioCheckNamespaces = False
, xioCanonicalize = True
, xioIgnoreNoneXmlContents = False
, xioTagSoupParser = dummyTagSoupParser
, xioExpat = False
, xioExpatParser = dummyExpatParser
}
initialOutputConfig :: XIOOutputConfig
initialOutputConfig = XIOOcfg
{ xioIndent = False
, xioOutputEncoding = ""
, xioOutputFmt = XMLoutput
, xioXmlPi = True
, xioNoEmptyElemFor = []
, xioAddDefaultDTD = False
, xioTextMode = False
, xioShowTree = False
, xioShowHaskell = False
}
initialRelaxConfig :: XIORelaxConfig
initialRelaxConfig = XIORxc
{ xioRelaxValidate = False
, xioRelaxSchema = ""
, xioRelaxCheckRestr = True
, xioRelaxValidateExtRef = True
, xioRelaxValidateInclude = True
, xioRelaxCollectErrors = True
, xioRelaxValidator = dummyRelaxValidator
}
initialXmlSchemaConfig :: XIOXmlSchemaConfig
initialXmlSchemaConfig = XIOScc
{ xioXmlSchemaValidate = False
, xioXmlSchemaSchema = ""
, xioXmlSchemaValidator = dummyXmlSchemaValidator
}
initialCacheConfig :: XIOCacheConfig
initialCacheConfig = XIOCch
{ xioBinaryCompression = id
, xioBinaryDeCompression = id
, xioWithCache = False
, xioCacheDir = ""
, xioDocumentAge = 0
, xioCache404Err = False
, xioCacheRead = dummyCacheRead
, xioStrictDeserialize = False
}
-- ------------------------------------------------------------
dummyHTTPHandler :: IOSArrow XmlTree XmlTree
dummyHTTPHandler = ( issueFatal $
unlines $
[ "HTTP handler not configured,"
, "please install package hxt-curl and use 'withCurl' config option"
, "or install package hxt-http and use 'withHTTP' config option"
]
)
>>>
addAttr transferMessage "HTTP handler not configured"
>>>
addAttr transferStatus "999"
dummyTagSoupParser :: IOSArrow b b
dummyTagSoupParser = issueFatal $
unlines $
[ "TagSoup parser not configured,"
, "please install package hxt-tagsoup"
, " and use 'withTagSoup' parser config option from this package"
]
dummyExpatParser :: IOSArrow b b
dummyExpatParser = issueFatal $
unlines $
[ "Expat parser not configured,"
, "please install package hxt-expat"
, " and use 'withExpat' parser config option from this package"
]
dummyRelaxValidator :: IOSArrow b b
dummyRelaxValidator = issueFatal $
unlines $
[ "RelaxNG validator not configured,"
, "please install package hxt-relaxng"
, " and use 'withRelaxNG' config option from this package"
]
dummyXmlSchemaValidator :: IOSArrow b b
dummyXmlSchemaValidator = issueFatal $
unlines $
[ "XML Schema validator not configured,"
, "please install package hxt-xmlschema"
, " and use 'withXmlSchema' config option from this package"
]
dummyCacheRead :: String -> IOSArrow b b
dummyCacheRead = const $
issueFatal $
unlines $
[ "Document cache not configured,"
, "please install package hxt-cache and use 'withCache' config option"
]
-- ------------------------------------------------------------
getConfigAttr :: String -> SysConfigList -> String
getConfigAttr n c = lookup1 n $ tl
where
s = (foldr (>>>) id c) initialSysState
tl = getS theAttrList s
-- ----------------------------------------
theSysConfigComp :: Selector XIOSysState a -> Selector SysConfig a
theSysConfigComp sel = S { getS = \ cf -> getS sel (cf initialSysState)
, setS = \ val cf -> setS sel val . cf
}
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/XmlState/TraceHandling.hs 0000644 0000000 0000000 00000012407 12752557014 021463 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.XmlState.TraceHandling
Copyright : Copyright (C) 2010 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
the trace arrows
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.XmlState.TraceHandling
where
import Control.Arrow -- arrow classes
import Control.Arrow.ArrowList
import Control.Arrow.ArrowIf
import Control.Arrow.ArrowTree
import Control.Arrow.ArrowIO
import System.IO ( hPutStrLn
, hFlush
, stderr
)
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlState.TypeDefs
import Text.XML.HXT.Arrow.XmlState.SystemConfig
import Text.XML.HXT.Arrow.Edit ( addHeadlineToXmlDoc
, treeRepOfXmlDoc
, indentDoc
)
-- ------------------------------------------------------------
-- | set the global trace level
setTraceLevel :: Int -> IOStateArrow s b b
setTraceLevel l = configSysVar $ withTrace l
-- | read the global trace level
getTraceLevel :: IOStateArrow s b Int
getTraceLevel = getSysVar theTraceLevel
-- | set the global trace command. This command does the trace output
setTraceCmd :: (Int -> String -> IO ()) -> IOStateArrow s b b
setTraceCmd c = configSysVar $ setS theTraceCmd c
-- | acces the command for trace output
getTraceCmd :: IOStateArrow a b (Int -> String -> IO ())
getTraceCmd = getSysVar theTraceCmd
-- | run an arrow with a given trace level, the old trace level is restored after the arrow execution
withTraceLevel :: Int -> IOStateArrow s b c -> IOStateArrow s b c
withTraceLevel level f = localSysEnv $ setTraceLevel level >>> f
-- | apply a trace arrow and issue message to stderr
trace :: Int -> IOStateArrow s b String -> IOStateArrow s b b
trace level trc = perform ( trc
>>>
( getTraceCmd &&& this )
>>>
arrIO (\ (cmd, msg) -> cmd level msg)
)
`when` ( getTraceLevel
>>>
isA (>= level)
)
-- | trace the current value transfered in a sequence of arrows.
--
-- The value is formated by a string conversion function. This is a substitute for
-- the old and less general traceString function
traceValue :: Int -> (b -> String) -> IOStateArrow s b b
traceValue level trc = trace level (arr $ (('-' : "- (" ++ show level ++ ") ") ++) . trc)
-- | an old alias for 'traceValue'
traceString :: Int -> (b -> String) -> IOStateArrow s b b
traceString = traceValue
-- | issue a string message as trace
traceMsg :: Int -> String -> IOStateArrow s b b
traceMsg level msg = traceValue level (const msg)
-- | issue the source representation of a document if trace level >= 3
--
-- for better readability the source is formated with indentDoc
traceSource :: IOStateArrow s XmlTree XmlTree
traceSource = trace 3 $
xshow $
choiceA [ isRoot :-> ( indentDoc
>>>
getChildren
)
, isElem :-> ( root [] [this]
>>> indentDoc
>>> getChildren
>>> isElem
)
, this :-> this
]
-- | issue the tree representation of a document if trace level >= 4
traceTree :: IOStateArrow s XmlTree XmlTree
traceTree = trace 4 $
xshow $
treeRepOfXmlDoc
>>>
addHeadlineToXmlDoc
>>>
getChildren
-- | trace a main computation step
-- issue a message when trace level >= 1, issue document source if level >= 3, issue tree when level is >= 4
traceDoc :: String -> IOStateArrow s XmlTree XmlTree
traceDoc msg = traceMsg 1 msg
>>>
traceSource
>>>
traceTree
-- ----------------------------------------------------------
traceOutputToStderr :: Int -> String -> IO ()
traceOutputToStderr _level msg
= do
hPutStrLn stderr msg
hFlush stderr
-- ----------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/XmlState/TypeDefs.hs 0000644 0000000 0000000 00000117660 14025461240 020500 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.XmlState.TypeDefs
Copyright : Copyright (C) 2010 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
the basic state arrows for XML processing
A state is needed for global processing options,
like encoding options, document base URI, trace levels
and error message handling
The state is separated into a user defined state
and a system state. The system state contains variables
for error message handling, for tracing, for the document base
for accessing XML documents with relative references, e.g. DTDs,
and a global key value store. This assoc list has strings as keys
and lists of XmlTrees as values. It is used to store arbitrary
XML and text values, e.g. user defined global options.
The user defined part of the store is in the default case empty, defined as ().
It can be extended with an arbitray data type
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.XmlState.TypeDefs
( module Text.XML.HXT.Arrow.XmlState.TypeDefs
, Selector(..)
, chgS
, idS
, (.&&&.)
)
where
import Control.Arrow
import Control.Arrow.ArrowList
import Control.Arrow.IOStateListArrow
import Control.DeepSeq
import Data.ByteString.Lazy (ByteString)
import Data.Char (isDigit)
import Data.Function.Selector (Selector (..), chgS, idS,
(.&&&.))
import qualified Data.Map as M
import Text.XML.HXT.DOM.Interface
-- ------------------------------------------------------------
{- datatypes -}
-- |
-- state datatype consists of a system state and a user state
-- the user state is not fixed
data XIOState us = XIOState { xioSysState :: !XIOSysState
, xioUserState :: !us
}
instance (NFData us) => NFData (XIOState us) where
rnf (XIOState sys usr) = rnf sys `seq` rnf usr
-- |
-- The arrow type for stateful arrows
type IOStateArrow s b c = IOSLA (XIOState s) b c
-- |
-- The arrow for stateful arrows with no user defined state
type IOSArrow b c = IOStateArrow () b c
-- ------------------------------------------------------------
-- user state functions
-- | read the user defined part of the state
getUserState :: IOStateArrow s b s
getUserState
= IOSLA $ \ s _ ->
return (s, [xioUserState s])
-- | change the user defined part of the state
changeUserState :: (b -> s -> s) -> IOStateArrow s b b
changeUserState cf
= IOSLA $ \ s v ->
let s' = s { xioUserState = cf v (xioUserState s) }
in return (s', [v])
-- | set the user defined part of the state
setUserState :: IOStateArrow s s s
setUserState
= changeUserState const
-- | extend user state
--
-- Run an arrow with an extended user state component, The old component
-- is stored together with a new one in a pair, the arrow is executed with this
-- extended state, and the augmented state component is removed form the state
-- when the arrow has finished its execution
withExtendedUserState :: s1 -> IOStateArrow (s1, s0) b c -> IOStateArrow s0 b c
withExtendedUserState initS1 f
= IOSLA $ \ s0 x ->
do
~(finalS, res) <- runIOSLA f ( XIOState { xioSysState = xioSysState s0
, xioUserState = (initS1, xioUserState s0)
}
) x
return ( XIOState { xioSysState = xioSysState finalS
, xioUserState = snd (xioUserState finalS)
}
, res
)
-- | change the type of user state
--
-- This conversion is useful, when running a state arrow with another
-- structure of the user state, e.g. with () when executing some IO arrows
withOtherUserState :: s1 -> IOStateArrow s1 b c -> IOStateArrow s0 b c
withOtherUserState s1 f
= IOSLA $ \ s x ->
do
(s', res) <- runIOSLA f ( XIOState { xioSysState = xioSysState s
, xioUserState = s1
}
) x
return ( XIOState { xioSysState = xioSysState s'
, xioUserState = xioUserState s
}
, res
)
withoutUserState :: IOSArrow b c -> IOStateArrow s0 b c
withoutUserState = withOtherUserState ()
-- ------------------------------------------------------------
-- system state structure and acces functions
-- |
-- predefined system state data type with all components for the
-- system functions, like trace, error handling, ...
data XIOSysState = XIOSys { xioSysWriter :: !XIOSysWriter
, xioSysEnv :: !XIOSysEnv
}
instance NFData XIOSysState where
rnf x = seq x () -- all fields of interest are strict
data XIOSysWriter = XIOwrt { xioErrorStatus :: !Int
, xioErrorMsgList :: !XmlTrees
, xioExpatErrors :: IOSArrow XmlTree XmlTree
, xioRelaxNoOfErrors :: !Int
, xioRelaxDefineId :: !Int
, xioRelaxAttrList :: AssocList String XmlTrees
}
data XIOSysEnv = XIOEnv { xioTraceLevel :: !Int
, xioTraceCmd :: Int -> String -> IO ()
, xioErrorMsgHandler :: String -> IO ()
, xioErrorMsgCollect :: !Bool
, xioBaseURI :: !String
, xioDefaultBaseURI :: !String
, xioAttrList :: !Attributes
, xioInputConfig :: !XIOInputConfig
, xioParseConfig :: !XIOParseConfig
, xioOutputConfig :: !XIOOutputConfig
, xioRelaxConfig :: !XIORelaxConfig
, xioXmlSchemaConfig :: !XIOXmlSchemaConfig
, xioCacheConfig :: !XIOCacheConfig
}
data XIOInputConfig = XIOIcgf { xioStrictInput :: !Bool
, xioEncodingErrors :: !Bool
, xioInputEncoding :: String
, xioHttpHandler :: IOSArrow XmlTree XmlTree
, xioInputOptions :: !Attributes
, xioRedirect :: !Bool
, xioProxy :: String
}
data XIOParseConfig = XIOPcfg { xioMimeTypes :: MimeTypeTable
, xioMimeTypeHandlers :: MimeTypeHandlers
, xioMimeTypeFile :: String
, xioAcceptedMimeTypes :: [String]
, xioFileMimeType :: String
, xioWarnings :: !Bool
, xioRemoveWS :: !Bool
, xioParseByMimeType :: !Bool
, xioParseHTML :: !Bool
, xioLowerCaseNames :: !Bool
, xioPreserveComment :: !Bool
, xioValidate :: !Bool
, xioSubstDTDEntities :: !Bool
, xioSubstHTMLEntities :: !Bool
, xioCheckNamespaces :: !Bool
, xioCanonicalize :: !Bool
, xioIgnoreNoneXmlContents :: !Bool
, xioTagSoup :: !Bool
, xioTagSoupParser :: IOSArrow XmlTree XmlTree
, xioExpat :: !Bool
, xioExpatParser :: IOSArrow XmlTree XmlTree
}
data XIOOutputConfig = XIOOcfg { xioIndent :: !Bool
, xioOutputEncoding :: !String
, xioOutputFmt :: !XIOXoutConfig
, xioXmlPi :: !Bool
, xioNoEmptyElemFor :: ![String]
, xioAddDefaultDTD :: !Bool
, xioTextMode :: !Bool
, xioShowTree :: !Bool
, xioShowHaskell :: !Bool
}
data XIOXoutConfig = XMLoutput | XHTMLoutput | HTMLoutput | PLAINoutput
deriving (Eq)
data XIORelaxConfig = XIORxc { xioRelaxValidate :: !Bool
, xioRelaxSchema :: String
, xioRelaxCheckRestr :: !Bool
, xioRelaxValidateExtRef :: !Bool
, xioRelaxValidateInclude :: !Bool
, xioRelaxCollectErrors :: !Bool
, xioRelaxValidator :: IOSArrow XmlTree XmlTree
}
data XIOXmlSchemaConfig = XIOScc { xioXmlSchemaValidate :: !Bool
, xioXmlSchemaSchema :: String
, xioXmlSchemaValidator :: IOSArrow XmlTree XmlTree
}
data XIOCacheConfig = XIOCch { xioBinaryCompression :: CompressionFct
, xioBinaryDeCompression :: DeCompressionFct
, xioWithCache :: !Bool
, xioCacheDir :: !String
, xioDocumentAge :: !Int
, xioCache404Err :: !Bool
, xioCacheRead :: String -> IOSArrow XmlTree XmlTree
, xioStrictDeserialize :: !Bool
}
type MimeTypeHandlers = M.Map String (IOSArrow XmlTree XmlTree)
type CompressionFct = ByteString -> ByteString
type DeCompressionFct = ByteString -> ByteString
type SysConfig = XIOSysState -> XIOSysState
type SysConfigList = [SysConfig]
-- ----------------------------------------
theSysState :: Selector (XIOState us) XIOSysState
theSysState = S { getS = xioSysState
, setS = \ x s -> s { xioSysState = x}
}
theUserState :: Selector (XIOState us) us
theUserState = S { getS = xioUserState
, setS = \ x s -> s { xioUserState = x}
}
-- ----------------------------------------
theSysWriter :: Selector XIOSysState XIOSysWriter
theSysWriter = S { getS = xioSysWriter
, setS = \ x s -> s { xioSysWriter = x}
}
theErrorStatus :: Selector XIOSysState Int
theErrorStatus = theSysWriter
>>>
S { getS = xioErrorStatus
, setS = \ x s -> s { xioErrorStatus = x }
}
theErrorMsgList :: Selector XIOSysState XmlTrees
theErrorMsgList = theSysWriter
>>>
S { getS = xioErrorMsgList
, setS = \ x s -> s { xioErrorMsgList = x }
}
theRelaxNoOfErrors :: Selector XIOSysState Int
theRelaxNoOfErrors = theSysWriter
>>>
S { getS = xioRelaxNoOfErrors
, setS = \ x s -> s { xioRelaxNoOfErrors = x}
}
theRelaxDefineId :: Selector XIOSysState Int
theRelaxDefineId = theSysWriter
>>>
S { getS = xioRelaxDefineId
, setS = \ x s -> s { xioRelaxDefineId = x}
}
theRelaxAttrList :: Selector XIOSysState (AssocList String XmlTrees)
theRelaxAttrList = theSysWriter
>>>
S { getS = xioRelaxAttrList
, setS = \ x s -> s { xioRelaxAttrList = x}
}
-- ----------------------------------------
theSysEnv :: Selector XIOSysState XIOSysEnv
theSysEnv = S { getS = xioSysEnv
, setS = \ x s -> s { xioSysEnv = x}
}
theInputConfig :: Selector XIOSysState XIOInputConfig
theInputConfig = theSysEnv
>>>
S { getS = xioInputConfig
, setS = \ x s -> s { xioInputConfig = x}
}
theStrictInput :: Selector XIOSysState Bool
theStrictInput = theInputConfig
>>>
S { getS = xioStrictInput
, setS = \ x s -> s { xioStrictInput = x}
}
theEncodingErrors :: Selector XIOSysState Bool
theEncodingErrors = theInputConfig
>>>
S { getS = xioEncodingErrors
, setS = \ x s -> s { xioEncodingErrors = x}
}
theInputEncoding :: Selector XIOSysState String
theInputEncoding = theInputConfig
>>>
S { getS = xioInputEncoding
, setS = \ x s -> s { xioInputEncoding = x}
}
theHttpHandler :: Selector XIOSysState (IOSArrow XmlTree XmlTree)
theHttpHandler = theInputConfig
>>>
S { getS = xioHttpHandler
, setS = \ x s -> s { xioHttpHandler = x}
}
theInputOptions :: Selector XIOSysState Attributes
theInputOptions = theInputConfig
>>>
S { getS = xioInputOptions
, setS = \ x s -> s { xioInputOptions = x}
}
theRedirect :: Selector XIOSysState Bool
theRedirect = theInputConfig
>>>
S { getS = xioRedirect
, setS = \ x s -> s { xioRedirect = x}
}
theProxy :: Selector XIOSysState String
theProxy = theInputConfig
>>>
S { getS = xioProxy
, setS = \ x s -> s { xioProxy = x}
}
-- ----------------------------------------
theOutputConfig :: Selector XIOSysState XIOOutputConfig
theOutputConfig = theSysEnv
>>>
S { getS = xioOutputConfig
, setS = \ x s -> s { xioOutputConfig = x}
}
theIndent :: Selector XIOSysState Bool
theIndent = theOutputConfig
>>>
S { getS = xioIndent
, setS = \ x s -> s { xioIndent = x}
}
theOutputEncoding :: Selector XIOSysState String
theOutputEncoding = theOutputConfig
>>>
S { getS = xioOutputEncoding
, setS = \ x s -> s { xioOutputEncoding = x}
}
theOutputFmt :: Selector XIOSysState XIOXoutConfig
theOutputFmt = theOutputConfig
>>>
S { getS = xioOutputFmt
, setS = \ x s -> s { xioOutputFmt = x}
}
theXmlPi :: Selector XIOSysState Bool
theXmlPi = theOutputConfig
>>>
S { getS = xioXmlPi
, setS = \ x s -> s { xioXmlPi = x}
}
theNoEmptyElemFor :: Selector XIOSysState [String]
theNoEmptyElemFor = theOutputConfig
>>>
S { getS = xioNoEmptyElemFor
, setS = \ x s -> s { xioNoEmptyElemFor = x}
}
theAddDefaultDTD :: Selector XIOSysState Bool
theAddDefaultDTD = theOutputConfig
>>>
S { getS = xioAddDefaultDTD
, setS = \ x s -> s { xioAddDefaultDTD = x}
}
theTextMode :: Selector XIOSysState Bool
theTextMode = theOutputConfig
>>>
S { getS = xioTextMode
, setS = \ x s -> s { xioTextMode = x}
}
theShowTree :: Selector XIOSysState Bool
theShowTree = theOutputConfig
>>>
S { getS = xioShowTree
, setS = \ x s -> s { xioShowTree = x}
}
theShowHaskell :: Selector XIOSysState Bool
theShowHaskell = theOutputConfig
>>>
S { getS = xioShowHaskell
, setS = \ x s -> s { xioShowHaskell = x}
}
-- ----------------------------------------
theRelaxConfig :: Selector XIOSysState XIORelaxConfig
theRelaxConfig = theSysEnv
>>>
S { getS = xioRelaxConfig
, setS = \ x s -> s { xioRelaxConfig = x}
}
theRelaxValidate :: Selector XIOSysState Bool
theRelaxValidate = theRelaxConfig
>>>
S { getS = xioRelaxValidate
, setS = \ x s -> s { xioRelaxValidate = x}
}
theRelaxSchema :: Selector XIOSysState String
theRelaxSchema = theRelaxConfig
>>>
S { getS = xioRelaxSchema
, setS = \ x s -> s { xioRelaxSchema = x}
}
theRelaxCheckRestr :: Selector XIOSysState Bool
theRelaxCheckRestr = theRelaxConfig
>>>
S { getS = xioRelaxCheckRestr
, setS = \ x s -> s { xioRelaxCheckRestr = x}
}
theRelaxValidateExtRef :: Selector XIOSysState Bool
theRelaxValidateExtRef = theRelaxConfig
>>>
S { getS = xioRelaxValidateExtRef
, setS = \ x s -> s { xioRelaxValidateExtRef = x}
}
theRelaxValidateInclude :: Selector XIOSysState Bool
theRelaxValidateInclude = theRelaxConfig
>>>
S { getS = xioRelaxValidateInclude
, setS = \ x s -> s { xioRelaxValidateInclude = x}
}
theRelaxCollectErrors :: Selector XIOSysState Bool
theRelaxCollectErrors = theRelaxConfig
>>>
S { getS = xioRelaxCollectErrors
, setS = \ x s -> s { xioRelaxCollectErrors = x}
}
theRelaxValidator :: Selector XIOSysState (IOSArrow XmlTree XmlTree)
theRelaxValidator = theRelaxConfig
>>>
S { getS = xioRelaxValidator
, setS = \ x s -> s { xioRelaxValidator = x}
}
-- ----------------------------------------
theXmlSchemaConfig :: Selector XIOSysState XIOXmlSchemaConfig
theXmlSchemaConfig = theSysEnv
>>>
S { getS = xioXmlSchemaConfig
, setS = \ x s -> s { xioXmlSchemaConfig = x}
}
theXmlSchemaValidate :: Selector XIOSysState Bool
theXmlSchemaValidate = theXmlSchemaConfig
>>>
S { getS = xioXmlSchemaValidate
, setS = \ x s -> s { xioXmlSchemaValidate = x}
}
theXmlSchemaSchema :: Selector XIOSysState String
theXmlSchemaSchema = theXmlSchemaConfig
>>>
S { getS = xioXmlSchemaSchema
, setS = \ x s -> s { xioXmlSchemaSchema = x}
}
theXmlSchemaValidator :: Selector XIOSysState (IOSArrow XmlTree XmlTree)
theXmlSchemaValidator = theXmlSchemaConfig
>>>
S { getS = xioXmlSchemaValidator
, setS = \ x s -> s { xioXmlSchemaValidator = x}
}
-- ----------------------------------------
theParseConfig :: Selector XIOSysState XIOParseConfig
theParseConfig = theSysEnv
>>>
S { getS = xioParseConfig
, setS = \ x s -> s { xioParseConfig = x}
}
theErrorMsgHandler :: Selector XIOSysState (String -> IO ())
theErrorMsgHandler = theSysEnv
>>>
S { getS = xioErrorMsgHandler
, setS = \ x s -> s { xioErrorMsgHandler = x }
}
theErrorMsgCollect :: Selector XIOSysState Bool
theErrorMsgCollect = theSysEnv
>>>
S { getS = xioErrorMsgCollect
, setS = \ x s -> s { xioErrorMsgCollect = x }
}
theBaseURI :: Selector XIOSysState String
theBaseURI = theSysEnv
>>>
S { getS = xioBaseURI
, setS = \ x s -> s { xioBaseURI = x }
}
theDefaultBaseURI :: Selector XIOSysState String
theDefaultBaseURI = theSysEnv
>>>
S { getS = xioDefaultBaseURI
, setS = \ x s -> s { xioDefaultBaseURI = x }
}
theTraceLevel :: Selector XIOSysState Int
theTraceLevel = theSysEnv
>>>
S { getS = xioTraceLevel
, setS = \ x s -> s { xioTraceLevel = x }
}
theTraceCmd :: Selector XIOSysState (Int -> String -> IO ())
theTraceCmd = theSysEnv
>>>
S { getS = xioTraceCmd
, setS = \ x s -> s { xioTraceCmd = x }
}
theTrace :: Selector XIOSysState (Int, Int -> String -> IO ())
theTrace = theTraceLevel .&&&. theTraceCmd
theAttrList :: Selector XIOSysState Attributes
theAttrList = theSysEnv
>>>
S { getS = xioAttrList
, setS = \ x s -> s { xioAttrList = x }
}
theMimeTypes :: Selector XIOSysState MimeTypeTable
theMimeTypes = theParseConfig
>>>
S { getS = xioMimeTypes
, setS = \ x s -> s { xioMimeTypes = x }
}
theMimeTypeHandlers :: Selector XIOSysState MimeTypeHandlers
theMimeTypeHandlers = theParseConfig
>>>
S { getS = xioMimeTypeHandlers
, setS = \ x s -> s { xioMimeTypeHandlers = x }
}
theMimeTypeFile :: Selector XIOSysState String
theMimeTypeFile = theParseConfig
>>>
S { getS = xioMimeTypeFile
, setS = \ x s -> s { xioMimeTypeFile = x }
}
theAcceptedMimeTypes :: Selector XIOSysState [String]
theAcceptedMimeTypes = theParseConfig
>>>
S { getS = xioAcceptedMimeTypes
, setS = \ x s -> s { xioAcceptedMimeTypes = x }
}
theFileMimeType :: Selector XIOSysState String
theFileMimeType = theParseConfig
>>>
S { getS = xioFileMimeType
, setS = \ x s -> s { xioFileMimeType = x }
}
theWarnings :: Selector XIOSysState Bool
theWarnings = theParseConfig
>>>
S { getS = xioWarnings
, setS = \ x s -> s { xioWarnings = x }
}
theRemoveWS :: Selector XIOSysState Bool
theRemoveWS = theParseConfig
>>>
S { getS = xioRemoveWS
, setS = \ x s -> s { xioRemoveWS = x }
}
thePreserveComment :: Selector XIOSysState Bool
thePreserveComment = theParseConfig
>>>
S { getS = xioPreserveComment
, setS = \ x s -> s { xioPreserveComment = x }
}
theParseByMimeType :: Selector XIOSysState Bool
theParseByMimeType = theParseConfig
>>>
S { getS = xioParseByMimeType
, setS = \ x s -> s { xioParseByMimeType = x }
}
theParseHTML :: Selector XIOSysState Bool
theParseHTML = theParseConfig
>>>
S { getS = xioParseHTML
, setS = \ x s -> s { xioParseHTML = x }
}
theLowerCaseNames :: Selector XIOSysState Bool
theLowerCaseNames = theParseConfig
>>>
S { getS = xioLowerCaseNames
, setS = \ x s -> s { xioLowerCaseNames = x }
}
theValidate :: Selector XIOSysState Bool
theValidate = theParseConfig
>>>
S { getS = xioValidate
, setS = \ x s -> s { xioValidate = x }
}
theSubstDTDEntities :: Selector XIOSysState Bool
theSubstDTDEntities = theParseConfig
>>>
S { getS = xioSubstDTDEntities
, setS = \ x s -> s { xioSubstDTDEntities = x }
}
theSubstHTMLEntities :: Selector XIOSysState Bool
theSubstHTMLEntities = theParseConfig
>>>
S { getS = xioSubstHTMLEntities
, setS = \ x s -> s { xioSubstHTMLEntities = x }
}
theCheckNamespaces :: Selector XIOSysState Bool
theCheckNamespaces = theParseConfig
>>>
S { getS = xioCheckNamespaces
, setS = \ x s -> s { xioCheckNamespaces = x }
}
theCanonicalize :: Selector XIOSysState Bool
theCanonicalize = theParseConfig
>>>
S { getS = xioCanonicalize
, setS = \ x s -> s { xioCanonicalize = x }
}
theIgnoreNoneXmlContents :: Selector XIOSysState Bool
theIgnoreNoneXmlContents = theParseConfig
>>>
S { getS = xioIgnoreNoneXmlContents
, setS = \ x s -> s { xioIgnoreNoneXmlContents = x }
}
theTagSoup :: Selector XIOSysState Bool
theTagSoup = theParseConfig
>>>
S { getS = xioTagSoup
, setS = \ x s -> s { xioTagSoup = x }
}
theTagSoupParser :: Selector XIOSysState (IOSArrow XmlTree XmlTree)
theTagSoupParser = theParseConfig
>>>
S { getS = xioTagSoupParser
, setS = \ x s -> s { xioTagSoupParser = x }
}
theExpat :: Selector XIOSysState Bool
theExpat = theParseConfig
>>>
S { getS = xioExpat
, setS = \ x s -> s { xioExpat = x }
}
theExpatParser :: Selector XIOSysState (IOSArrow XmlTree XmlTree)
theExpatParser = theParseConfig
>>>
S { getS = xioExpatParser
, setS = \ x s -> s { xioExpatParser = x }
}
theExpatErrors :: Selector XIOSysState (IOSArrow XmlTree XmlTree)
theExpatErrors = theSysWriter
>>>
S { getS = xioExpatErrors
, setS = \ x s -> s { xioExpatErrors = x }
}
-- ----------------------------------------
theCacheConfig :: Selector XIOSysState XIOCacheConfig
theCacheConfig = theSysEnv
>>>
S { getS = xioCacheConfig
, setS = \ x s -> s { xioCacheConfig = x}
}
theBinaryCompression :: Selector XIOSysState (ByteString -> ByteString)
theBinaryCompression = theCacheConfig
>>>
S { getS = xioBinaryCompression
, setS = \ x s -> s { xioBinaryCompression = x}
}
theBinaryDeCompression :: Selector XIOSysState (ByteString -> ByteString)
theBinaryDeCompression = theCacheConfig
>>>
S { getS = xioBinaryDeCompression
, setS = \ x s -> s { xioBinaryDeCompression = x}
}
theWithCache :: Selector XIOSysState Bool
theWithCache = theCacheConfig
>>>
S { getS = xioWithCache
, setS = \ x s -> s { xioWithCache = x}
}
theCacheDir :: Selector XIOSysState String
theCacheDir = theCacheConfig
>>>
S { getS = xioCacheDir
, setS = \ x s -> s { xioCacheDir = x}
}
theDocumentAge :: Selector XIOSysState Int
theDocumentAge = theCacheConfig
>>>
S { getS = xioDocumentAge
, setS = \ x s -> s { xioDocumentAge = x}
}
theCache404Err :: Selector XIOSysState Bool
theCache404Err = theCacheConfig
>>>
S { getS = xioCache404Err
, setS = \ x s -> s { xioCache404Err = x}
}
theCacheRead :: Selector XIOSysState (String -> IOSArrow XmlTree XmlTree)
theCacheRead = theCacheConfig
>>>
S { getS = xioCacheRead
, setS = \ x s -> s { xioCacheRead = x}
}
theStrictDeserialize :: Selector XIOSysState Bool
theStrictDeserialize = theCacheConfig
>>>
S { getS = xioStrictDeserialize
, setS = \ x s -> s { xioStrictDeserialize = x}
}
-- ------------------------------------------------------------
getSysVar :: Selector XIOSysState c -> IOStateArrow s b c
getSysVar sel = IOSLA $ \ s _x ->
return (s, (:[]) . getS (theSysState >>> sel) $ s)
setSysVar :: Selector XIOSysState c -> IOStateArrow s c c
setSysVar sel = (\ v -> configSysVar $ setS sel v) $< this
chgSysVar :: Selector XIOSysState c -> (b -> c -> c) -> IOStateArrow s b b
chgSysVar sel op = (\ v -> configSysVar $ chgS sel (op v)) $< this
configSysVar :: SysConfig -> IOStateArrow s c c
configSysVar cf = IOSLA $ \ s v ->
return (chgS theSysState cf s, [v])
configSysVars :: SysConfigList -> IOStateArrow s c c
configSysVars cfs = configSysVar $ foldr (>>>) id $ cfs
localSysVar :: Selector XIOSysState c -> IOStateArrow s a b -> IOStateArrow s a b
localSysVar sel f = IOSLA $ \ s0 v ->
let sel' = theSysState >>> sel in
let c0 = getS sel' s0 in
do
(s1, res) <- runIOSLA f s0 v
return (setS sel' c0 s1, res)
localSysEnv :: IOStateArrow s a b -> IOStateArrow s a b
localSysEnv = localSysVar theSysEnv
incrSysVar :: Selector XIOSysState Int -> IOStateArrow s a Int
incrSysVar cnt = getSysVar cnt
>>>
arr (+1)
>>>
setSysVar cnt
>>>
arr (\ x -> x - 1)
-- ------------------------------
-- | store a string in global state under a given attribute name
setSysAttr :: String -> IOStateArrow s String String
setSysAttr n = chgSysVar theAttrList (addEntry n)
-- | remove an entry in global state, arrow input remains unchanged
unsetSysAttr :: String -> IOStateArrow s b b
unsetSysAttr n = configSysVar $ chgS theAttrList (delEntry n)
-- | read an attribute value from global state
getSysAttr :: String -> IOStateArrow s b String
getSysAttr n = getSysVar theAttrList
>>^
lookup1 n
-- | read all attributes from global state
getAllSysAttrs :: IOStateArrow s b Attributes
getAllSysAttrs = getSysVar theAttrList
setSysAttrString :: String -> String -> IOStateArrow s b b
setSysAttrString n v = perform ( constA v
>>>
setSysAttr n
)
-- | store an int value in global state
setSysAttrInt :: String -> Int -> IOStateArrow s b b
setSysAttrInt n v = setSysAttrString n (show v)
-- | read an int value from global state
--
-- > getSysAttrInt 0 myIntAttr
getSysAttrInt :: Int -> String -> IOStateArrow s b Int
getSysAttrInt def n = getSysAttr n
>>^
toInt def
toInt :: Int -> String -> Int
toInt def s
| not (null s)
&&
all isDigit s = read s
| otherwise = def
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/XmlState/URIHandling.hs 0000644 0000000 0000000 00000021073 12752557014 021063 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.XmlState.URIHandling
Copyright : Copyright (C) 2010 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
the basic state arrows for URI handling
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.XmlState.URIHandling
where
import Control.Arrow -- arrow classes
import Control.Arrow.ArrowList
import Control.Arrow.ArrowIf
import Control.Arrow.ArrowIO
import Control.Monad ( mzero
, mplus )
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlState.TypeDefs
import Text.XML.HXT.Arrow.XmlState.TraceHandling
import Data.Maybe
import Network.URI ( URI
, escapeURIChar
, isUnescapedInURI
, nonStrictRelativeTo
, parseURIReference
, uriAuthority
, uriFragment
, uriPath
, uriPort
, uriQuery
, uriRegName
, uriScheme
, uriUserInfo
)
import System.Directory ( getCurrentDirectory )
-- ------------------------------------------------------------
-- | set the base URI of a document, used e.g. for reading includes, e.g. external entities,
-- the input must be an absolute URI
setBaseURI :: IOStateArrow s String String
setBaseURI = setSysVar theBaseURI
>>>
traceValue 2 (("setBaseURI: new base URI is " ++) . show)
-- | read the base URI from the globale state
getBaseURI :: IOStateArrow s b String
getBaseURI = getSysVar theBaseURI
>>>
( ( getDefaultBaseURI
>>>
setBaseURI
>>>
getBaseURI
)
`when`
isA null -- set and get it, if not yet done
)
-- | change the base URI with a possibly relative URI, can be used for
-- evaluating the xml:base attribute. Returns the new absolute base URI.
-- Fails, if input is not parsable with parseURIReference
--
-- see also: 'setBaseURI', 'mkAbsURI'
changeBaseURI :: IOStateArrow s String String
changeBaseURI = mkAbsURI >>> setBaseURI
-- | set the default base URI, if parameter is null, the system base (@ file:\/\/\/\\/ @) is used,
-- else the parameter, must be called before any document is read
setDefaultBaseURI :: String -> IOStateArrow s b String
setDefaultBaseURI base = ( if null base
then arrIO getDir
else constA base
)
>>>
setSysVar theDefaultBaseURI
>>>
traceValue 2 (("setDefaultBaseURI: new default base URI is " ++) . show)
where
getDir _ = do
cwd <- getCurrentDirectory
return ("file://" ++ normalize cwd ++ "/")
-- under Windows getCurrentDirectory returns something like: "c:\path\to\file"
-- backslaches are not allowed in URIs and paths must start with a /
-- so this is transformed into "/c:/path/to/file"
normalize wd'@(d : ':' : _)
| d `elem` ['A'..'Z']
||
d `elem` ['a'..'z']
= '/' : concatMap win32ToUriChar wd'
normalize wd' = concatMap escapeNonUriChar wd'
win32ToUriChar '\\' = "/"
win32ToUriChar c = escapeNonUriChar c
escapeNonUriChar c = escapeURIChar isUnescapedInURI c -- from Network.URI
-- | get the default base URI
getDefaultBaseURI :: IOStateArrow s b String
getDefaultBaseURI = getSysVar theDefaultBaseURI -- read default uri in system state
>>>
( ( setDefaultBaseURI "" -- set the default uri in system state
>>>
getDefaultBaseURI
)
`when` isA null
) -- when uri not yet set
-- ------------------------------------------------------------
-- | remember base uri, run an arrow and restore the base URI, used with external entity substitution
runInLocalURIContext :: IOStateArrow s b c -> IOStateArrow s b c
runInLocalURIContext f = localSysVar theBaseURI f
-- ----------------------------------------------------------
-- | parse a URI reference, in case of a failure,
-- try to escape unescaped chars, convert backslashes to slashes for windows paths,
-- and try parsing again
parseURIReference' :: String -> Maybe URI
parseURIReference' uri
= parseURIReference uri
`mplus`
( if unesc
then parseURIReference uri'
else mzero
)
where
unesc = not . all isUnescapedInURI $ uri
escape '\\' = "/"
escape c = escapeURIChar isUnescapedInURI c
uri' = concatMap escape uri
-- | compute the absolut URI for a given URI and a base URI
expandURIString :: String -> String -> Maybe String
expandURIString uri base
= do
base' <- parseURIReference' base
uri' <- parseURIReference' uri
-- abs' <- nonStrictRelativeTo uri' base'
let abs' = nonStrictRelativeTo uri' base'
return $ show abs'
-- | arrow variant of 'expandURIString', fails if 'expandURIString' returns Nothing
expandURI :: ArrowXml a => a (String, String) String
expandURI
= arrL (maybeToList . uncurry expandURIString)
-- | arrow for expanding an input URI into an absolute URI using global base URI, fails if input is not a legal URI
mkAbsURI :: IOStateArrow s String String
mkAbsURI
= ( this &&& getBaseURI ) >>> expandURI
-- | arrow for selecting the scheme (protocol) of the URI, fails if input is not a legal URI.
--
-- See Network.URI for URI components
getSchemeFromURI :: ArrowList a => a String String
getSchemeFromURI = getPartFromURI scheme
where
scheme = init . uriScheme
-- | arrow for selecting the registered name (host) of the URI, fails if input is not a legal URI
getRegNameFromURI :: ArrowList a => a String String
getRegNameFromURI = getPartFromURI host
where
host = maybe "" uriRegName . uriAuthority
-- | arrow for selecting the port number of the URI without leading \':\', fails if input is not a legal URI
getPortFromURI :: ArrowList a => a String String
getPortFromURI = getPartFromURI port
where
port = dropWhile (==':') . maybe "" uriPort . uriAuthority
-- | arrow for selecting the user info of the URI without trailing \'\@\', fails if input is not a legal URI
getUserInfoFromURI :: ArrowList a => a String String
getUserInfoFromURI = getPartFromURI ui
where
ui = reverse . dropWhile (=='@') . reverse . maybe "" uriUserInfo . uriAuthority
-- | arrow for computing the path component of an URI, fails if input is not a legal URI
getPathFromURI :: ArrowList a => a String String
getPathFromURI = getPartFromURI uriPath
-- | arrow for computing the query component of an URI, fails if input is not a legal URI
getQueryFromURI :: ArrowList a => a String String
getQueryFromURI = getPartFromURI uriQuery
-- | arrow for computing the fragment component of an URI, fails if input is not a legal URI
getFragmentFromURI :: ArrowList a => a String String
getFragmentFromURI = getPartFromURI uriFragment
-- | arrow for computing the path component of an URI, fails if input is not a legal URI
getPartFromURI :: ArrowList a => (URI -> String) -> a String String
getPartFromURI sel
= arrL (maybeToList . getPart)
where
getPart s = do
uri <- parseURIReference' s
return (sel uri)
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Arrow/XmlState/SystemConfig.hs 0000644 0000000 0000000 00000025105 13001350442 021351 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.XmlState.SystemConfig
Copyright : Copyright (C) 2010 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
system configuration and common options options
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.XmlState.SystemConfig
where
import Control.Arrow
import Data.Map ( insert )
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.XmlState.ErrorHandling
import Text.XML.HXT.Arrow.XmlState.TypeDefs
-- ------------------------------
-- config options
-- | @withTrace level@ : system option, set the trace level, (0..4)
withTrace :: Int -> SysConfig
withTrace = setS theTraceLevel
-- | @withSysAttr key value@ : store an arbitrary key value pair in system state
withSysAttr :: String -> String -> SysConfig
withSysAttr n v = chgS theAttrList (addEntry n v)
-- | Specify the set of accepted mime types.
--
-- All contents of documents for which the mime type is not found in this list
-- are discarded.
withAcceptedMimeTypes :: [String] -> SysConfig
withAcceptedMimeTypes = setS theAcceptedMimeTypes
-- | Specify a content handler for documents of a given mime type
withMimeTypeHandler :: String -> IOSArrow XmlTree XmlTree -> SysConfig
withMimeTypeHandler mt pa = chgS theMimeTypeHandlers $ insert mt pa
-- | @withMimeTypeFile filename@ : input option,
-- set the mime type table for @file:@ documents by given file.
-- The format of this config file must be in the syntax of a debian linux \"mime.types\" config file
withMimeTypeFile :: String -> SysConfig
withMimeTypeFile = setS theMimeTypeFile
-- | Force a given mime type for all file contents.
--
-- The mime type for file access will then not be computed by looking into a mime.types file
withFileMimeType :: String -> SysConfig
withFileMimeType = setS theFileMimeType
-- | @withWarnings yes/no@ : system option, issue warnings during reading, HTML parsing and processing,
-- default is 'yes'
withWarnings :: Bool -> SysConfig
withWarnings = setS theWarnings
-- | @withErrors yes/no@ : system option for suppressing error messages, default is 'no'
withErrors :: Bool -> SysConfig
withErrors b = setS theErrorMsgHandler h
where
h | b = errorOutputToStderr
| otherwise = const $ return ()
-- | @withRemoveWS yes/no@ : read and write option, remove all whitespace, used for document indentation, default is 'no'
withRemoveWS :: Bool -> SysConfig
withRemoveWS = setS theRemoveWS
-- | @withPreserveComment yes/no@ : read option, preserve comments during canonicalization, default is 'no'
withPreserveComment :: Bool -> SysConfig
withPreserveComment = setS thePreserveComment
-- | @withParseByMimeType yes/no@ : read option, select the parser by the mime type of the document
-- (pulled out of the HTTP header).
--
-- When the mime type is set to \"text\/html\"
-- the configured HTML parser is taken, when it\'s set to
-- \"text\/xml\" or \"text\/xhtml\" the configured XML parser is taken.
-- If the mime type is something else, no further processing is performed,
-- the contents is given back to the application in form of a single text node.
-- If the default document encoding is set to isoLatin1, this even enables processing
-- of arbitray binary data.
withParseByMimeType :: Bool -> SysConfig
withParseByMimeType = setS theParseByMimeType
-- | @withParseHTML yes/no@: read option, use HTML parser, default is 'no' (use XML parser)
withParseHTML :: Bool -> SysConfig
withParseHTML = setS theParseHTML
-- | @withValidate yes/no@: read option, validate document against DTD, default is 'yes'
withValidate :: Bool -> SysConfig
withValidate = setS theValidate
-- | @withSubstDTDEntities yes/no@: read option, substitute general entities defined in DTD, default is 'yes'.
-- switching this option and the validate option off can lead to faster parsing, because then
-- there is no need to access the DTD
withSubstDTDEntities :: Bool -> SysConfig
withSubstDTDEntities = setS theSubstDTDEntities
-- | @withSubstHTMLEntities yes/no@: read option, substitute general entities defined in HTML DTD, default is 'no'.
-- switching this option on and the substDTDEntities and validate options off can lead to faster parsing
-- because there is no need to access a DTD, but still the HTML general entities are substituted
withSubstHTMLEntities :: Bool -> SysConfig
withSubstHTMLEntities = setS theSubstHTMLEntities
-- | @withCheckNamespaces yes/no@: read option, check namespaces, default is 'no'
withCheckNamespaces :: Bool -> SysConfig
withCheckNamespaces = setS theCheckNamespaces
-- | @withCanonicalize yes/no@ : read option, canonicalize document, default is 'yes'
withCanonicalize :: Bool -> SysConfig
withCanonicalize = setS theCanonicalize
-- | @withIgnoreNoneXmlContents yes\/no@ : input option, ignore document contents of none XML\/HTML documents.
--
-- This option can be useful for implementing crawler like applications, e.g. an URL checker.
-- In those cases net traffic can be reduced.
withIgnoreNoneXmlContents :: Bool -> SysConfig
withIgnoreNoneXmlContents = setS theIgnoreNoneXmlContents
-- ------------------------------------------------------------
-- | @withStrictInput yes/no@ : input option, input of file and HTTP contents is read eagerly, default is 'no'
withStrictInput :: Bool -> SysConfig
withStrictInput = setS theStrictInput
-- | @withEncodingErrors yes/no@ : input option, ignore all encoding errors, default is 'no'
withEncodingErrors :: Bool -> SysConfig
withEncodingErrors = setS theEncodingErrors
-- | @withInputEncoding encodingName@ : input option
--
-- Set default document encoding ('utf8', 'isoLatin1', 'usAscii', 'iso8859_2', ... , 'iso8859_16', ...).
-- Only XML, HTML and text documents are decoded,
-- default decoding for XML\/HTML is utf8, for text iso latin1 (no decoding).
withInputEncoding :: String -> SysConfig
withInputEncoding = setS theInputEncoding
-- | @withDefaultBaseURI URI@ , input option, set the default base URI
--
-- This option can be useful when parsing documents from stdin or contained in a string, and interpreting
-- relative URIs within the document
withDefaultBaseURI :: String -> SysConfig
withDefaultBaseURI = setS theDefaultBaseURI
withInputOption :: String -> String -> SysConfig
withInputOption n v = chgS theInputOptions (addEntry n v)
withInputOptions :: Attributes -> SysConfig
withInputOptions = foldr (>>>) id . map (uncurry withInputOption)
-- | @withRedirect yes/no@ : input option, automatically follow redirected URIs, default is 'yes'
withRedirect :: Bool -> SysConfig
withRedirect = setS theRedirect
-- | @withProxy \"host:port\"@ : input option, configure a proxy for HTTP access, e.g. www-cache:3128
withProxy :: String -> SysConfig
withProxy = setS theProxy
-- ------------------------------------------------------------
-- | @withIndent yes/no@ : output option, indent document before output, default is 'no'
withIndent :: Bool -> SysConfig
withIndent = setS theIndent
-- | @withOutputEncoding encoding@ , output option,
-- default is the default input encoding or utf8, if input encoding is not set
withOutputEncoding :: String -> SysConfig
withOutputEncoding = setS theOutputEncoding
-- | @withOutputXML@ : output option, default writing
--
-- Default is writing XML: quote special XML chars \>,\<,\",\',& where neccessary,
-- add XML processing instruction
-- and encode document with respect to 'withOutputEncoding'
withOutputXML :: SysConfig
withOutputXML = setS theOutputFmt XMLoutput
-- | Write XHTML: quote all special XML chars, use HTML entity refs or char refs for none ASCII chars
withOutputHTML :: SysConfig
withOutputHTML = setS theOutputFmt HTMLoutput
-- | Write XML: quote only special XML chars, don't substitute chars by HTML entities,
-- and don\'t generate empty elements for HTML elements,
-- which may contain any contents, e.g. @@ instead of @@
withOutputXHTML :: SysConfig
withOutputXHTML = setS theOutputFmt XHTMLoutput
-- | suppreses all char and entitiy substitution
withOutputPLAIN :: SysConfig
withOutputPLAIN = setS theOutputFmt PLAINoutput
withXmlPi :: Bool -> SysConfig
withXmlPi = setS theXmlPi
withNoEmptyElemFor :: [String] -> SysConfig
withNoEmptyElemFor = setS theNoEmptyElemFor
withAddDefaultDTD :: Bool -> SysConfig
withAddDefaultDTD = setS theAddDefaultDTD
withTextMode :: Bool -> SysConfig
withTextMode = setS theTextMode
withShowTree :: Bool -> SysConfig
withShowTree = setS theShowTree
withShowHaskell :: Bool -> SysConfig
withShowHaskell = setS theShowHaskell
-- | Configure compression and decompression for binary serialization/deserialization.
-- First component is the compression function applied after serialization,
-- second the decompression applied before deserialization.
withCompression :: (CompressionFct, DeCompressionFct) -> SysConfig
withCompression = setS (theBinaryCompression .&&&. theBinaryDeCompression)
-- | Strict input for deserialization of binary data
withStrictDeserialize :: Bool -> SysConfig
withStrictDeserialize = setS theStrictDeserialize
-- ------------------------------------------------------------
yes :: Bool
yes = True
no :: Bool
no = False
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Core.hs 0000644 0000000 0000000 00000004226 12752557014 015015 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Core
Copyright : Copyright (C) 2006-2010 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
The HXT arrow interface
The application programming interface to the arrow modules of the Haskell XML Toolbox.
This module exports all important arrows for input, output, parsing, validating and transforming XML.
It also exports all basic datatypes and functions of the toolbox.
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Core
( module Control.Arrow.ListArrows
, module Text.XML.HXT.DOM.Interface
, module Text.XML.HXT.Arrow.XmlArrow
, module Text.XML.HXT.Arrow.XmlState
, module Text.XML.HXT.Arrow.DocumentInput
, module Text.XML.HXT.Arrow.DocumentOutput
, module Text.XML.HXT.Arrow.Edit
, module Text.XML.HXT.Arrow.GeneralEntitySubstitution
, module Text.XML.HXT.Arrow.Namespace
, module Text.XML.HXT.Arrow.Pickle
, module Text.XML.HXT.Arrow.ProcessDocument
, module Text.XML.HXT.Arrow.ReadDocument
, module Text.XML.HXT.Arrow.WriteDocument
, module Text.XML.HXT.Arrow.Binary
, module Text.XML.HXT.Arrow.XmlOptions
, module Text.XML.HXT.Version
)
where
import Control.Arrow.ListArrows -- arrow classes
import Data.Atom () -- import this explicitly
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.DocumentInput
import Text.XML.HXT.Arrow.DocumentOutput
import Text.XML.HXT.Arrow.Edit
import Text.XML.HXT.Arrow.GeneralEntitySubstitution
import Text.XML.HXT.Arrow.Namespace
import Text.XML.HXT.Arrow.Pickle
import Text.XML.HXT.Arrow.ProcessDocument
import Text.XML.HXT.Arrow.ReadDocument
import Text.XML.HXT.Arrow.WriteDocument
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlOptions
import Text.XML.HXT.Arrow.XmlState
import Text.XML.HXT.Arrow.XmlRegex () -- import this explicitly
import Text.XML.HXT.Arrow.Binary
import Text.XML.HXT.Version
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/DOM/FormatXmlTree.hs 0000644 0000000 0000000 00000003003 12752557014 017265 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.DOM.FormatXmlTree
Copyright : Copyright (C) 2008 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
Format a xml tree in tree representation
-}
-- ------------------------------------------------------------
module Text.XML.HXT.DOM.FormatXmlTree
( formatXmlTree
, formatXmlContents
)
where
import Data.Maybe
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.DOM.ShowXml
import Text.XML.HXT.DOM.XmlNode
-- ------------------------------------------------------------
formatXmlContents :: XmlTree -> XmlTrees
formatXmlContents t
= [mkText (formatXmlTree t)]
formatXmlTree :: XmlTree -> String
formatXmlTree
= formatTree xnode2String
xnode2String :: XNode -> String
xnode2String n
| isElem n
= "XTag " ++ showName n ++ showAtts n
| isPi n
= "XPi " ++ showName n ++ showAtts n
| otherwise
= show n
where
showName :: XNode -> String
showName = maybe "" show . getName
showAtts :: XNode -> String
showAtts = concatMap showAl . fromMaybe [] . getAttrl
showAl :: XmlTree -> String
showAl t -- (NTree (XAttr an) av)
| isAttr t
= "\n| " ++ (maybe "" show . getName $ t) ++ "=" ++ show (xshow . getChildren $ t)
| otherwise
= show t
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/DOM/Interface.hs 0000644 0000000 0000000 00000002047 12752557014 016443 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.DOM.Interface
Copyright : Copyright (C) 2008 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
The interface to the primitive DOM data types and constants
and utility functions
-}
-- ------------------------------------------------------------
module Text.XML.HXT.DOM.Interface
( module Text.XML.HXT.DOM.XmlKeywords
, module Text.XML.HXT.DOM.TypeDefs
, module Text.XML.HXT.DOM.Util
, module Text.XML.HXT.DOM.MimeTypes
, module Data.String.EncodingNames
)
where
import Text.XML.HXT.DOM.XmlKeywords -- constants
import Text.XML.HXT.DOM.TypeDefs -- XML Tree types
import Text.XML.HXT.DOM.Util
import Text.XML.HXT.DOM.MimeTypes -- mime types related stuff
import Data.String.EncodingNames -- char encoding names for readDocument
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/DOM/MimeTypeDefaults.hs 0000644 0000000 0000000 00000054515 12752557014 017773 0 ustar 00 0000000 0000000 -- | default mime type table
--
-- this file is generated from file /etc/mime.types
module Text.XML.HXT.DOM.MimeTypeDefaults
where
-- | the table with the mapping from file name extensions to mime types
mimeTypeDefaults :: [(String, String)]
mimeTypeDefaults
= [ ("123", "application/vnd.lotus-1-2-3")
, ("3ds", "image/x-3ds")
, ("3g2", "video/x-3gpp2")
, ("3gp", "video/3gpp")
, ("669", "audio/x-mod")
, ("BAY", "image/x-dcraw")
, ("BLEND", "application/x-blender")
, ("BMQ", "image/x-dcraw")
, ("C", "text/x-c++src")
, ("CR2", "image/x-dcraw")
, ("CRW", "image/x-dcraw")
, ("CS1", "image/x-dcraw")
, ("CSSL", "text/css")
, ("DC2", "image/x-dcraw")
, ("DCR", "image/x-dcraw")
, ("FFF", "image/x-dcraw")
, ("K25", "image/x-dcraw")
, ("KDC", "image/x-dcraw")
, ("MOS", "image/x-dcraw")
, ("MRW", "image/x-dcraw")
, ("NEF", "image/x-dcraw")
, ("NSV", "video/x-nsv")
, ("ORF", "image/x-dcraw")
, ("PAR2", "application/x-par2")
, ("PEF", "image/x-dcraw")
, ("RAF", "image/x-dcraw")
, ("RDC", "image/x-dcraw")
, ("SRF", "image/x-dcraw")
, ("TTC", "application/x-font-ttf")
, ("X3F", "image/x-dcraw")
, ("XM", "audio/x-mod")
, ("Z", "application/x-compress")
, ("a", "application/x-archive")
, ("aac", "audio/x-aac")
, ("abw", "application/x-abiword")
, ("abw.CRASHED", "application/x-abiword")
, ("abw.gz", "application/x-abiword")
, ("ac3", "audio/ac3")
, ("adb", "text/x-adasrc")
, ("ads", "text/x-adasrc")
, ("afm", "application/x-font-afm")
, ("ag", "image/x-applix-graphics")
, ("ai", "application/illustrator")
, ("aif", "audio/x-aiff")
, ("aif", "audio/x-aiff")
, ("aifc", "audio/x-aiff")
, ("aiff", "audio/x-aiff")
, ("aiff", "audio/x-aiff")
, ("al", "application/x-perl")
, ("anim[1-9j]", "video/x-anim")
, ("aop", "application/x-frontline")
, ("arj", "application/x-arj")
, ("as", "application/x-applix-spreadsheet")
, ("asax", "application/x-asax")
, ("asc", "text/plain")
, ("ascx", "application/x-ascx")
, ("asf", "video/x-ms-asf")
, ("ashx", "application/x-ashx")
, ("asix", "application/x-asix")
, ("asmx", "application/x-asmx")
, ("asp", "application/x-asp")
, ("aspx", "application/x-aspx")
, ("asx", "video/x-ms-asf")
, ("au", "audio/basic")
, ("avi", "video/x-msvideo")
, ("aw", "application/x-applix-word")
, ("axd", "application/x-axd")
, ("bak", "application/x-trash")
, ("bay", "image/x-dcraw")
, ("bcpio", "application/x-bcpio")
, ("bdf", "application/x-font-bdf")
, ("bib", "text/x-bibtex")
, ("bin", "application/octet-stream")
, ("bin", "application/x-stuffit")
, ("blend", "application/x-blender")
, ("blender", "application/x-blender")
, ("bmp", "image/bmp")
, ("bmq", "image/x-dcraw")
, ("boo", "text/x-boo")
, ("bz", "application/x-bzip")
, ("bz", "application/x-bzip")
, ("bz2", "application/x-bzip")
, ("bz2", "application/x-bzip")
, ("c", "text/x-csrc")
, ("c++", "text/x-c++src")
, ("caves", "application/x-gnome-stones")
, ("cc", "text/x-c++src")
, ("cdf", "application/x-netcdf")
, ("cdr", "application/vnd.corel-draw")
, ("cer", "application/x-x509-ca-cert")
, ("cert", "application/x-x509-ca-cert")
, ("cgi", "application/x-cgi")
, ("cgm", "image/cgm")
, ("chm", "application/x-chm")
, ("chrt", "application/x-kchart")
, ("cht", "application/chemtool")
, ("class", "application/x-java")
, ("cls", "text/x-tex")
, ("cmbx", "application/x-cmbx")
, ("config", "application/x-config")
, ("connection", "application/x-gnome-db-connection")
, ("cpio", "application/x-cpio")
, ("cpio.gz", "application/x-cpio-compressed")
, ("cpp", "text/x-c++src")
, ("cr2", "image/x-dcraw")
, ("crt", "application/x-x509-ca-cert")
, ("crw", "image/x-dcraw")
, ("cs", "text/x-csharp")
, ("cs1", "image/x-dcraw")
, ("csh", "application/x-csh")
, ("css", "text/css")
, ("csv", "text/x-comma-separated-values")
, ("cue", "application/x-cue")
, ("cur", "image/x-win-bitmap")
, ("cxx", "text/x-c++src")
, ("d", "text/x-dsrc")
, ("dat", "video/mpeg")
, ("database", "application/x-gnome-db-database")
, ("dbf", "application/x-dbase")
, ("dc", "application/x-dc-rom")
, ("dc2", "image/x-dcraw")
, ("dcl", "text/x-dcl")
, ("dcm", "application/dicom")
, ("dcr", "image/x-dcraw")
, ("deb", "application/x-deb")
, ("der", "application/x-x509-ca-cert")
, ("desktop", "application/x-desktop")
, ("devhelp", "application/x-devhelp")
, ("dia", "application/x-dia-diagram")
, ("dif", "video/dv")
, ("diff", "text/x-patch")
, ("disco", "application/x-disco")
, ("display", "application/x-gdesklets-display")
, ("djv", "image/vnd.djvu")
, ("djvu", "image/vnd.djvu")
, ("doc", "application/msword")
, ("docbook", "application/docbook+xml")
, ("dsl", "text/x-dsl")
, ("dtd", "text/x-dtd")
, ("dv", "video/dv")
, ("dvi", "application/x-dvi")
, ("dwg", "image/vnd.dwg")
, ("dxf", "image/vnd.dxf")
, ("ear", "application/x-java-archive")
, ("egon", "application/x-egon")
, ("el", "text/x-emacs-lisp")
, ("eps", "image/x-eps")
, ("epsf", "image/x-eps")
, ("epsi", "image/x-eps")
, ("etheme", "application/x-e-theme")
, ("etx", "text/x-setext")
, ("exe", "application/x-executable")
, ("exe", "application/x-ms-dos-executable")
, ("ez", "application/andrew-inset")
, ("f", "text/x-fortran")
, ("fff", "image/x-dcraw")
, ("fig", "image/x-xfig")
, ("fits", "image/x-fits")
, ("flac", "audio/x-flac")
, ("flc", "video/x-flic")
, ("fli", "video/x-flic")
, ("flw", "application/x-kivio")
, ("fo", "text/x-xslfo")
, ("g3", "image/fax-g3")
, ("gb", "application/x-gameboy-rom")
, ("gcrd", "text/directory")
, ("gen", "application/x-genesis-rom")
, ("gf", "application/x-tex-gf")
, ("gg", "application/x-sms-rom")
, ("gif", "image/gif")
, ("glabels", "application/x-glabels")
, ("glade", "application/x-glade")
, ("gmo", "application/x-gettext-translation")
, ("gnc", "application/x-gnucash")
, ("gnucash", "application/x-gnucash")
, ("gnumeric", "application/x-gnumeric")
, ("gpg", "application/pgp-encrypted")
, ("gra", "application/x-graphite")
, ("gsf", "application/x-font-type1")
, ("gtar", "application/x-gtar")
, ("gz", "application/x-gzip")
, ("h", "text/x-chdr")
, ("h++", "text/x-chdr")
, ("hdf", "application/x-hdf")
, ("hh", "text/x-c++hdr")
, ("hp", "text/x-chdr")
, ("hpgl", "application/vnd.hp-hpgl")
, ("hs", "text/x-haskell")
, ("htm", "text/html")
, ("html", "text/html")
, ("ica", "application/x-ica")
, ("icb", "image/x-icb")
, ("ico", "image/x-ico")
, ("ics", "text/calendar")
, ("idl", "text/x-idl")
, ("ief", "image/ief")
, ("iff", "image/x-iff")
, ("il", "text/x-msil")
, ("ilbm", "image/x-ilbm")
, ("iso", "application/x-cd-image")
, ("it", "audio/x-it")
, ("jam", "application/x-jamin")
, ("jar", "application/x-jar")
, ("jar", "application/x-java-archive")
, ("java", "text/x-java")
, ("jng", "image/x-jng")
, ("jnlp", "application/x-java-jnlp-file")
, ("jp2", "image/jpeg2000")
, ("jpe", "image/jpeg")
, ("jpeg", "image/jpeg")
, ("jpg", "image/jpeg")
, ("jpr", "application/x-jbuilder-project")
, ("jpx", "application/x-jbuilder-project")
, ("js", "application/x-javascript")
, ("js", "text/x-js")
, ("k", "application/x-tex-pk")
, ("k25", "image/x-dcraw")
, ("karbon", "application/x-karbon")
, ("kdc", "image/x-dcraw")
, ("kdelnk", "application/x-desktop")
, ("kfo", "application/x-kformula")
, ("kil", "application/x-killustrator")
, ("kino", "application/x-smil")
, ("kon", "application/x-kontour")
, ("kpm", "application/x-kpovmodeler")
, ("kpr", "application/x-kpresenter")
, ("kpt", "application/x-kpresenter")
, ("kra", "application/x-krita")
, ("ksp", "application/x-kspread")
, ("kud", "application/x-kugar")
, ("kwd", "application/x-kword")
, ("kwt", "application/x-kword")
, ("la", "application/x-shared-library-la")
, ("lha", "application/x-lha")
, ("lhs", "text/x-literate-haskell")
, ("lhz", "application/x-lhz")
, ("log", "text/x-log")
, ("ltx", "text/x-tex")
, ("lwo", "image/x-lwo")
, ("lwob", "image/x-lwo")
, ("lws", "image/x-lws")
, ("lyx", "application/x-lyx")
, ("lzh", "application/x-lha")
, ("lzh", "application/x-lha")
, ("lzo", "application/x-lzop")
, ("m", "text/x-objcsrc")
, ("m15", "audio/x-mod")
, ("m3u", "audio/x-mpegurl")
, ("m4a", "audio/x-m4a")
, ("man", "application/x-troff-man")
, ("master", "application/x-master-page")
, ("md", "application/x-genesis-rom")
, ("mdp", "application/x-mdp")
, ("mds", "application/x-mds")
, ("mdsx", "application/x-mdsx")
, ("me", "text/x-troff-me")
, ("mergeant", "application/x-mergeant")
, ("mgp", "application/x-magicpoint")
, ("mid", "audio/midi")
, ("midi", "audio/midi")
, ("mif", "application/x-mif")
, ("mkv", "application/x-matroska")
, ("mm", "text/x-troff-mm")
, ("mml", "text/mathml")
, ("mng", "video/x-mng")
, ("moc", "text/x-moc")
, ("mod", "audio/x-mod")
, ("moov", "video/quicktime")
, ("mos", "image/x-dcraw")
, ("mov", "video/quicktime")
, ("movie", "video/x-sgi-movie")
, ("mp2", "video/mpeg")
, ("mp3", "audio/mpeg")
, ("mpe", "video/mpeg")
, ("mpeg", "video/mpeg")
, ("mpg", "video/mpeg")
, ("mps", "application/x-mps")
, ("mrproject", "application/x-planner")
, ("mrw", "image/x-dcraw")
, ("ms", "text/x-troff-ms")
, ("msod", "image/x-msod")
, ("msx", "application/x-msx-rom")
, ("mtm", "audio/x-mod")
, ("n", "text/x-nemerle")
, ("n64", "application/x-n64-rom")
, ("nb", "application/mathematica")
, ("nc", "application/x-netcdf")
, ("nef", "image/x-dcraw")
, ("nes", "application/x-nes-rom")
, ("nsv", "video/x-nsv")
, ("o", "application/x-object")
, ("obj", "application/x-tgif")
, ("oda", "application/oda")
, ("odb", "application/vnd.oasis.opendocument.database")
, ("odc", "application/vnd.oasis.opendocument.chart")
, ("odf", "application/vnd.oasis.opendocument.formula")
, ("odg", "application/vnd.oasis.opendocument.graphics")
, ("odi", "application/vnd.oasis.opendocument.image")
, ("odm", "application/vnd.oasis.opendocument.text-master")
, ("odp", "application/vnd.oasis.opendocument.presentation")
, ("ods", "application/vnd.oasis.opendocument.spreadsheet")
, ("odt", "application/vnd.oasis.opendocument.text")
, ("ogg", "application/ogg")
, ("old", "application/x-trash")
, ("oleo", "application/x-oleo")
, ("orf", "image/x-dcraw")
, ("otg", "application/vnd.oasis.opendocument.graphics-template")
, ("oth", "application/vnd.oasis.opendocument.text-web")
, ("otp", "application/vnd.oasis.opendocument.presentation-template")
, ("ots", "application/vnd.oasis.opendocument.spreadsheet-template")
, ("ott", "application/vnd.oasis.opendocument.text-template")
, ("p", "text/x-pascal")
, ("p12", "application/x-pkcs12")
, ("p7s", "application/pkcs7-signature")
, ("par2", "application/x-par2")
, ("pas", "text/x-pascal")
, ("patch", "text/x-patch")
, ("pbm", "image/x-portable-bitmap")
, ("pcd", "image/x-photo-cd")
, ("pcf", "application/x-font-pcf")
, ("pcf.Z", "application/x-font-type1")
, ("pcf.gz", "application/x-font-pcf")
, ("pcl", "application/vnd.hp-pcl")
, ("pdb", "application/vnd.palm")
, ("pdb", "application/x-palm-database")
, ("pdf", "application/pdf")
, ("pef", "image/x-dcraw")
, ("pem", "application/x-x509-ca-cert")
, ("perl", "application/x-perl")
, ("pfa", "application/x-font-type1")
, ("pfb", "application/x-font-type1")
, ("pfx", "application/x-pkcs12")
, ("pgm", "image/x-portable-graymap")
, ("pgn", "application/x-chess-pgn")
, ("pgp", "application/pgp")
, ("pgp", "application/pgp-encrypted")
, ("php", "application/x-php")
, ("php3", "application/x-php")
, ("php4", "application/x-php")
, ("pict", "image/x-pict")
, ("pict1", "image/x-pict")
, ("pict2", "image/x-pict")
, ("pkr", "application/pgp-keys")
, ("pl", "application/x-perl")
, ("planner", "application/x-planner")
, ("pln", "application/x-planperfect")
, ("pls", "audio/x-scpls")
, ("pls", "audio/x-scpls")
, ("pm", "application/x-perl")
, ("png", "image/png")
, ("pnm", "image/x-portable-anymap")
, ("po", "text/x-gettext-translation")
, ("pot", "application/vnd.ms-powerpoint")
, ("pot", "text/x-gettext-translation-template")
, ("ppm", "image/x-portable-pixmap")
, ("pps", "application/vnd.ms-powerpoint")
, ("ppt", "application/vnd.ms-powerpoint")
, ("ppz", "application/vnd.ms-powerpoint")
, ("prc", "application/x-palm-database")
, ("prj", "application/x-anjuta-project")
, ("prjx", "application/x-prjx")
, ("ps", "application/postscript")
, ("ps.gz", "application/x-gzpostscript")
, ("psd", "image/x-psd")
, ("psf", "application/x-font-linux-psf")
, ("psid", "audio/prs.sid")
, ("pto", "application/x-ptoptimizer-script")
, ("pw", "application/x-pw")
, ("py", "text/x-python")
, ("pyc", "application/x-python-bytecode")
, ("pyo", "application/x-python-bytecode")
, ("qif", "application/x-qw")
, ("qt", "video/quicktime")
, ("qtvr", "video/quicktime")
, ("ra", "audio/vnd.rn-realaudio")
, ("ra", "audio/x-pn-realaudio")
, ("raf", "image/x-dcraw")
, ("ram", "audio/x-pn-realaudio")
, ("ram", "audio/x-pn-realaudio")
, ("rar", "application/x-rar")
, ("rar", "application/x-rar-compressed")
, ("ras", "image/x-cmu-raster")
, ("rdc", "image/x-dcraw")
, ("rdf", "text/rdf")
, ("rdp", "application/x-rdp")
, ("rej", "application/x-reject")
, ("rem", "application/x-remoting")
, ("resources", "application/x-resources")
, ("resx", "application/x-resourcesx")
, ("rgb", "image/x-rgb")
, ("rle", "image/rle")
, ("rm", "application/vnd.rn-realmedia")
, ("rm", "audio/x-pn-realaudio")
, ("rmm", "audio/x-pn-realaudio")
, ("rms", "application/vnd.rn-realmedia-secure")
, ("rmvb", "application/vnd.rn-realmedia-vbr")
, ("rng", "text/x-rng")
, ("roff", "application/x-troff")
, ("rpm", "application/x-rpm")
, ("rss", "text/rss")
, ("rt", "text/vnd.rn-realtext")
, ("rtf", "application/rtf")
, ("rtx", "text/richtext")
, ("rv", "video/vnd.rn-realvideo")
, ("s3m", "audio/x-s3m")
, ("sam", "application/x-amipro")
, ("sc", "application/x-sc")
, ("scd", "application/x-scribus")
, ("scd.gz", "application/x-scribus")
, ("scm", "text/x-scheme")
, ("sda", "application/vnd.stardivision.draw")
, ("sdc", "application/vnd.stardivision.calc")
, ("sdd", "application/vnd.stardivision.impress")
, ("sdp", "application/sdp")
, ("sdp", "application/vnd.stardivision.impress")
, ("sds", "application/vnd.stardivision.chart")
, ("sdw", "application/vnd.stardivision.writer")
, ("sgi", "image/x-sgi")
, ("sgl", "application/vnd.stardivision.writer")
, ("sgm", "text/sgml")
, ("sgml", "text/sgml")
, ("sh", "application/x-shellscript")
, ("shar", "application/x-shar")
, ("siag", "application/x-siag")
, ("sid", "audio/prs.sid")
, ("sig", "application/pgp-signature")
, ("sik", "application/x-trash")
, ("sit", "application/stuffit")
, ("sit", "application/x-stuffit")
, ("skr", "application/pgp-keys")
, ("sla", "application/x-scribus")
, ("sla.gz", "application/x-scribus")
, ("slk", "text/spreadsheet")
, ("smd", "application/vnd.stardivision.mail")
, ("smf", "application/vnd.stardivision.math")
, ("smi", "application/smil")
, ("smi", "application/x-smil")
, ("smil", "application/smil")
, ("smil", "application/x-smil")
, ("sml", "application/smil")
, ("sms", "application/x-sms-rom")
, ("snd", "audio/basic")
, ("so", "application/x-sharedlib")
, ("soap", "application/x-soap-remoting")
, ("spd", "application/x-font-speedo")
, ("sql", "text/x-sql")
, ("src", "application/x-wais-source")
, ("srf", "image/x-dcraw")
, ("ssm", "application/x-streamingmedia")
, ("stc", "application/vnd.sun.xml.calc.template")
, ("std", "application/vnd.sun.xml.draw.template")
, ("sti", "application/vnd.sun.xml.impress.template")
, ("stm", "audio/x-stm")
, ("stw", "application/vnd.sun.xml.writer.template")
, ("sty", "text/x-tex")
, ("sun", "image/x-sun-raster")
, ("sv4cpio", "application/x-sv4cpio")
, ("sv4crc", "application/x-sv4crc")
, ("svg", "image/svg+xml")
, ("swf", "application/x-shockwave-flash")
, ("sxc", "application/vnd.sun.xml.calc")
, ("sxd", "application/vnd.sun.xml.draw")
, ("sxg", "application/vnd.sun.xml.writer.global")
, ("sxi", "application/vnd.sun.xml.impress")
, ("sxm", "application/vnd.sun.xml.math")
, ("sxw", "application/vnd.sun.xml.writer")
, ("sylk", "text/spreadsheet")
, ("t", "application/x-troff")
, ("tar", "application/x-tar")
, ("tar.Z", "application/x-compressed-tar")
, ("tar.Z", "application/x-tarz")
, ("tar.bz", "application/x-bzip-compressed-tar")
, ("tar.bz", "application/x-bzip-compressed-tar")
, ("tar.bz2", "application/x-bzip-compressed-tar")
, ("tar.bz2", "application/x-bzip-compressed-tar")
, ("tar.gz", "application/x-compressed-tar")
, ("tar.gz", "application/x-compressed-tar")
, ("tar.lzo", "application/x-lzop-compressed-tar")
, ("tar.lzo", "application/x-tzo")
, ("taz", "application/x-compressed-tar")
, ("tbz", "application/x-bzip-compressed-tar")
, ("tbz2", "application/x-bzip-compressed-tar")
, ("tcl", "text/x-tcl")
, ("tex", "text/x-tex")
, ("texi", "text/x-texinfo")
, ("texinfo", "text/x-texinfo")
, ("tga", "image/x-tga")
, ("tgz", "application/x-compressed-tar")
, ("tgz", "application/x-compressed-tar")
, ("theme", "application/x-theme")
, ("tif", "image/tiff")
, ("tiff", "image/tiff")
, ("tk", "text/x-tcl")
, ("tm", "text/x-texmacs")
, ("toc", "application/x-toc")
, ("torrent", "application/x-bittorrent")
, ("tr", "application/x-troff")
, ("ts", "application/x-linguist")
, ("ts", "text/x-texmacs")
, ("tsv", "text/tab-separated-values")
, ("ttc", "application/x-font-ttf")
, ("ttf", "application/x-font-ttf")
, ("txt", "text/plain")
, ("tzo", "application/x-lzop-compressed-tar")
, ("tzo", "application/x-tzo")
, ("ui", "application/x-designer")
, ("uil", "text/x-uil")
, ("ult", "audio/x-mod")
, ("uni", "audio/x-mod")
, ("uri", "text/x-uri")
, ("url", "text/x-uri")
, ("ustar", "application/x-ustar")
, ("vb", "text/x-vb")
, ("vcf", "text/directory")
, ("vcs", "text/calendar")
, ("vct", "text/directory")
, ("vob", "video/mpeg")
, ("voc", "audio/x-voc")
, ("vor", "application/vnd.stardivision.writer")
, ("war", "application/x-java-archive")
, ("wav", "audio/x-wav")
, ("wb1", "application/x-quattro-pro")
, ("wb1", "application/x-quattropro")
, ("wb2", "application/x-quattro-pro")
, ("wb2", "application/x-quattropro")
, ("wb3", "application/x-quattro-pro")
, ("wb3", "application/x-quattropro")
, ("wk1", "application/vnd.lotus-1-2-3")
, ("wk3", "application/vnd.lotus-1-2-3")
, ("wk4", "application/vnd.lotus-1-2-3")
, ("wks", "application/vnd.lotus-1-2-3")
, ("wmf", "image/x-wmf")
, ("wml", "text/vnd.wap.wml")
, ("wmv", "video/x-ms-wmv")
, ("wpd", "application/vnd.wordperfect")
, ("wpg", "application/x-wpg")
, ("wri", "application/x-mswrite")
, ("wrl", "model/vrml")
, ("wsdl", "application/x-wsdl")
, ("x3f", "image/x-dcraw")
, ("xac", "application/x-gnucash")
, ("xbel", "application/x-xbel")
, ("xbm", "image/x-xbitmap")
, ("xcf", "image/x-xcf")
, ("xcf.bz2", "image/x-compressed-xcf")
, ("xcf.gz", "image/x-compressed-xcf")
, ("xds", "text/x-xds")
, ("xhtml", "application/xhtml+xml")
, ("xi", "audio/x-xi")
, ("xla", "application/vnd.ms-excel")
, ("xlc", "application/vnd.ms-excel")
, ("xld", "application/vnd.ms-excel")
, ("xll", "application/vnd.ms-excel")
, ("xlm", "application/vnd.ms-excel")
, ("xls", "application/vnd.ms-excel")
, ("xlt", "application/vnd.ms-excel")
, ("xlw", "application/vnd.ms-excel")
, ("xm", "audio/x-xm")
, ("xmi", "text/x-xmi")
, ("xml", "text/xml")
, ("xpl", "audio/x-scpls")
, ("xpm", "image/x-xpixmap")
, ("xsl", "text/x-xsl")
, ("xsl", "text/x-xslt")
, ("xslfo", "text/x-xslfo")
, ("xslt", "text/x-xslt")
, ("xul", "application/vnd.mozilla.xul+xml")
, ("xwd", "image/x-xwindowdump")
, ("zabw", "application/x-abiword")
, ("zip", "application/zip")
, ("zoo", "application/x-zoo")
]
hxt-9.3.1.22/src/Text/XML/HXT/DOM/MimeTypes.hs 0000644 0000000 0000000 00000011131 12752557014 016451 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.DOM.MimeTypes
Copyright : Copyright (C) 2008 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
mime type related data and functions
-}
-- ------------------------------------------------------------
module Text.XML.HXT.DOM.MimeTypes
where
import Control.Monad ( mplus )
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as C
import Data.Char
import Data.List
import qualified Data.Map as M
import Data.Maybe
import Text.XML.HXT.DOM.MimeTypeDefaults
-- ------------------------------------------------------------
type MimeTypeTable = M.Map String String
-- ------------------------------------------------------------
-- mime types
--
-- see RFC \"http:\/\/www.rfc-editor.org\/rfc\/rfc3023.txt\"
application_xhtml,
application_xml,
application_xml_external_parsed_entity,
application_xml_dtd,
text_html,
text_pdf,
text_plain,
text_xdtd,
text_xml,
text_xml_external_parsed_entity :: String
application_xhtml = "application/xhtml+xml"
application_xml = "application/xml"
application_xml_external_parsed_entity = "application/xml-external-parsed-entity"
application_xml_dtd = "application/xml-dtd"
text_html = "text/html"
text_pdf = "text/pdf"
text_plain = "text/plain"
text_xdtd = "text/x-dtd"
text_xml = "text/xml"
text_xml_external_parsed_entity = "text/xml-external-parsed-entity"
isTextMimeType :: String -> Bool
isTextMimeType = ("text/" `isPrefixOf`)
isHtmlMimeType :: String -> Bool
isHtmlMimeType t = t == text_html
isXmlMimeType :: String -> Bool
isXmlMimeType t = ( t `elem` [ application_xhtml
, application_xml
, application_xml_external_parsed_entity
, application_xml_dtd
, text_xml
, text_xml_external_parsed_entity
, text_xdtd
]
||
"+xml" `isSuffixOf` t -- application/mathml+xml
) -- or image/svg+xml
defaultMimeTypeTable :: MimeTypeTable
defaultMimeTypeTable = M.fromList mimeTypeDefaults
extensionToMimeType :: String -> MimeTypeTable -> String
extensionToMimeType e = fromMaybe "" . lookupMime
where
lookupMime t = M.lookup e t -- try exact match
`mplus`
M.lookup (map toLower e) t -- else try lowercase match
`mplus`
M.lookup (map toUpper e) t -- else try uppercase match
-- ------------------------------------------------------------
readMimeTypeTable :: FilePath -> IO MimeTypeTable
readMimeTypeTable inp = do
cb <- B.readFile inp
return . M.fromList . parseMimeTypeTable . C.unpack $ cb
parseMimeTypeTable :: String -> [(String, String)]
parseMimeTypeTable = concat
. map buildPairs
. map words
. filter (not . ("#" `isPrefixOf`))
. filter (not . all (isSpace))
. lines
where
buildPairs :: [String] -> [(String, String)]
buildPairs [] = []
buildPairs (mt:exts) = map (\ x -> (x, mt)) $ exts
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/DOM/QualifiedName.hs 0000644 0000000 0000000 00000054464 14025463522 017254 0 ustar 00 0000000 0000000 {-# LANGUAGE DeriveDataTypeable #-}
-- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.DOM.QualifiedName
Copyright : Copyright (C) 2011 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
The types and functions for qualified names
-}
-- ------------------------------------------------------------
module Text.XML.HXT.DOM.QualifiedName
( QName
, XName(unXN)
, NsEnv
, mkQName
, mkName
, mkNsName
, mkSNsName
, mkPrefixLocalPart
, equivQName
, equivUri
, equalQNameBy
, namePrefix
, localPart
, namespaceUri
, newXName
, nullXName
, isNullXName
, newQName
, mkQName'
, namePrefix'
, localPart'
, namespaceUri'
, setNamePrefix'
, setLocalPart'
, setNamespaceUri'
, qualifiedName
, qualifiedName'
, universalName
, universalUri
, buildUniversalName
, normalizeNsUri
, setNamespace -- namespace related functions
, isNCName
, isWellformedQualifiedName
, isWellformedQName
, isWellformedNSDecl
, isWellformedNameSpaceName
, isNameSpaceName
, isDeclaredNamespace
, xmlNamespaceXName
, xmlXName
, xmlnsNamespaceXName
, xmlnsXName
, xmlnsQN
, toNsEnv
)
where
{-
import Debug.Trace
-}
import Control.Arrow ((***))
import Control.DeepSeq
import Control.FlatSeq
import Data.AssocList
import Data.Binary
import Data.Char (toLower)
import Data.IORef
import Data.List (isPrefixOf)
import qualified Data.Map as M
import Data.Typeable
import System.IO.Unsafe (unsafePerformIO)
import Text.XML.HXT.DOM.XmlKeywords (a_xml, a_xmlns,
xmlNamespace,
xmlnsNamespace)
import Data.Char.Properties.XMLCharProps (isXmlNCNameChar,
isXmlNCNameStartChar)
-- -----------------------------------------------------------------------------
-- | XML names are represented by Strings, but these strings do not mix up with normal strings.
-- Names are always reduced to normal form, and they are stored internally in a name cache
-- for sharing equal names by the same data structure
data XName = XN { _idXN :: !Int -- for optimization of equality test, see Eq instance
, unXN :: String
}
deriving (Typeable)
instance Eq XName where
(XN id1 _) == (XN id2 _) = id1 == id2
instance Ord XName where
compare (XN _ n1) (XN _ n2) = compare n1 n2
{-
instance Read XName where
readsPrec p str = [ (newXName x, y) | (x, y) <- readsPrec p str ]
instance Show XName where
show (XN _ s) = show s
-}
instance NFData XName where
rnf (XN _ s) = rnf s
instance WNFData XName where
rwnf (XN _ s) = rnf s
instance Binary XName where
put (XN _ s) = put s
get = do
s <- get
return $! newXName s
-----------------------------------------------------------------------------
-- |
-- Type for the namespace association list, used when propagating namespaces by
-- modifying the 'QName' values in a tree
type NsEnv = AssocList XName XName
-----------------------------------------------------------------------------
-- |
-- Namespace support for element and attribute names.
--
-- A qualified name consists of a name prefix, a local name
-- and a namespace uri.
-- All modules, which are not namespace aware, use only the 'localPart' component.
-- When dealing with namespaces, the document tree must be processed by 'Text.XML.HXT.Arrow.Namespace.propagateNamespaces'
-- to split names of structure \"prefix:localPart\" and label the name with the apropriate namespace uri
data QName = QN { localPart' :: !XName
, namePrefix' :: !XName
, namespaceUri' :: !XName
}
deriving (Typeable)
-- -----------------------------------------------------------------------------
-- | Two QNames are equal if (1. case) namespaces are both empty and the qualified names
-- (prefix:localpart) are the same or (2. case) namespaces are set and namespaces and
-- local parts are equal
instance Eq QName where
(QN lp1 px1 ns1) == (QN lp2 px2 ns2)
| ns1 /= ns2 = False -- namespaces are set and differ
| not (isNullXName ns1) = lp1 == lp2 -- namespaces are set and are equal: local parts must be equal
| otherwise = lp1 == lp2 -- no namespaces are set: local parts must be equal
&& -- and prefixes are not set or they are equal
px1 == px2
instance Ord QName where
compare (QN lp1 px1 ns1) (QN lp2 px2 ns2)
| isNullXName ns1 && isNullXName ns2 -- no namespaces set: px is significant
= compare (px1, lp1) (px2, lp2)
| otherwise -- namespace aware cmp: ns is significant, px is irrelevant
= compare (lp1, ns1) (lp2, ns2)
instance NFData QName where
rnf x = seq x ()
instance WNFData QName
instance Show QName where
show = showQN
-- -----------------------------------------------------------------------------
instance Binary QName where
put (QN lp px ns) = put (unXN px) >>
put (unXN lp) >>
put (unXN ns)
get = do
px <- get
lp <- get
ns <- get
return $! newNsName lp px ns
-- ^^
-- strict apply !!!
-- build the QNames strict, else the name sharing optimization will not be in effect
-- -----------------------------------------------------------------------------
isNullXName :: XName -> Bool
isNullXName = (== nullXName)
{-# INLINE isNullXName #-}
namePrefix :: QName -> String
namePrefix = unXN . namePrefix'
{-# INLINE namePrefix #-}
localPart :: QName -> String
localPart = unXN . localPart'
{-# INLINE localPart #-}
namespaceUri :: QName -> String
namespaceUri = unXN . namespaceUri'
{-# INLINE namespaceUri #-}
-- ------------------------------------------------------------
-- | set name prefix
setNamespaceUri' :: XName -> QName -> QName
setNamespaceUri' ns (QN lp px _ns) = newQName lp px ns
-- | set local part
setLocalPart' :: XName -> QName -> QName
setLocalPart' lp (QN _lp px ns) = newQName lp px ns
-- | set name prefix
setNamePrefix' :: XName -> QName -> QName
setNamePrefix' px (QN lp _px ns) = newQName lp px ns
-- ------------------------------------------------------------
-- |
-- builds the full name \"prefix:localPart\", if prefix is not null, else the local part is the result
qualifiedName :: QName -> String
qualifiedName (QN lp px _ns)
| isNullXName px = unXN lp
| otherwise = unXN px ++ (':' : unXN lp)
-- | functional list version of qualifiedName used in xshow
qualifiedName' :: QName -> String -> String
qualifiedName' (QN lp px _ns)
| isNullXName px = (unXN lp ++)
| otherwise = (unXN px ++) . (':' :) . (unXN lp ++)
-- |
-- builds the \"universal\" name, that is the namespace uri surrounded with \"{\" and \"}\" followed by the local part
-- (specialisation of 'buildUniversalName')
universalName :: QName -> String
universalName = buildUniversalName (\ ns lp -> '{' : ns ++ '}' : lp)
-- |
-- builds an \"universal\" uri, that is the namespace uri followed by the local part. This is usefull for RDF applications,
-- where the subject, predicate and object often are concatenated from namespace uri and local part
-- (specialisation of 'buildUniversalName')
universalUri :: QName -> String
universalUri = buildUniversalName (++)
-- |
-- builds a string from the namespace uri and the local part. If the namespace uri is empty, the local part is returned, else
-- namespace uri and local part are combined with the combining function given by the first parameter
buildUniversalName :: (String -> String -> String) -> QName -> String
buildUniversalName bf n@(QN _lp _px ns)
| isNullXName ns = localPart n
| otherwise = unXN ns `bf` localPart n
showQN :: QName -> String
showQN n
| null ns = show $ qualifiedName n
| otherwise = show $ "{" ++ ns ++ "}" ++ qualifiedName n
where
ns = namespaceUri n
-- ------------------------------------------------------------
--
-- internal XName functions
mkQName' :: XName -> XName -> XName -> QName
mkQName' px lp ns = newQName lp px ns
{-# DEPRECATED mkQName' "use newQName instead with lp px ns param seq " #-}
-- ------------------------------------------------------------
-- |
-- constructs a simple name, with prefix and localPart but without a namespace uri.
--
-- see also 'mkQName', 'mkName'
mkPrefixLocalPart :: String -> String -> QName
mkPrefixLocalPart px lp
| null px = newLpName lp
| otherwise = newPxName lp px
-- |
-- constructs a simple, namespace unaware name.
-- If the name is in @prefix:localpart@ form and the prefix is not empty
-- the name is split internally into
-- a prefix and a local part.
mkName :: String -> QName
mkName n
| (':' `elem` n)
&&
not (null px) -- more restrictive: isWellformedQualifiedName n
= newPxName lp px
| otherwise = newLpName n
where
(px, (_ : lp)) = span (/= ':') n
-- |
-- constructs a complete qualified name with 'namePrefix', 'localPart' and 'namespaceUri'.
-- This function can be used to build not wellformed prefix:localpart names.
-- The XPath module uses wildcard names like @xxx:*@. These must be build with 'mkQName'
-- and not with mkName.
mkQName :: String -> String -> String -> QName
mkQName px lp ns
| null ns = mkPrefixLocalPart px lp
| otherwise = newNsName lp px ns
-- ------------------------------------------------------------
-- |
-- old name for 'mkName'
mkSNsName :: String -> QName
mkSNsName = mkName
{-# DEPRECATED mkSNsName "use mkName instead" #-}
-- |
-- constructs a simple, namespace aware name, with prefix:localPart as first parameter,
-- namspace uri as second.
--
-- see also 'mkName', 'mkPrefixLocalPart'
{-
mkNsName :: String -> String -> QName
mkNsName n ns = trace ("mkNsName: " ++ show n ++ " " ++ show ns) (mkNsName' n ns)
-}
mkNsName :: String -> String -> QName
mkNsName n ns
| null ns = qn
| otherwise = setNamespaceUri' ns' qn
where
qn = mkName n
ns' = newXName ns
-- ------------------------------------------------------------
-- | Equivalent QNames are defined as follows: The URIs are normalized before comparison.
-- Comparison is done with 'equalQNameBy' and 'equivUri'
equivQName :: QName -> QName -> Bool
equivQName = equalQNameBy equivUri
-- | Comparison of normalized namespace URIs using 'normalizeNsUri'
equivUri :: String -> String -> Bool
equivUri x y = normalizeNsUri x == normalizeNsUri y
-- | Sometimes a weaker equality relation than 'equalQName' is appropriate, e.g no case significance in names, ...
-- a name normalization function can be applied to the strings before comparing. Called by 'equalQName' and
-- 'equivQName'
equalQNameBy :: (String -> String -> Bool) -> QName -> QName -> Bool
equalQNameBy equiv q1 q2 = localPart q1 == localPart q2
&&
(namespaceUri q1 `equiv` namespaceUri q2)
-- | Normalization of URIs: Normalization is done by conversion into lowercase letters. A trailing \"\/\" is ignored
normalizeNsUri :: String -> String
normalizeNsUri = map toLower . stripSlash
where
stripSlash "" = ""
stripSlash s
| last s == '/' = init s
| otherwise = s
-- -----------------------------------------------------------------------------
-- Namespace predicates
-- |
-- Compute the name prefix and the namespace uri for a qualified name.
--
-- This function does not test whether the name is a wellformed qualified name.
-- see Namespaces in XML Rule [6] to [8]. Error checking is done with separate functions,
-- see 'isWellformedQName' and 'isWellformedQualifiedName' for error checking.
setNamespace :: NsEnv -> QName -> QName
setNamespace env n@(QN lp px _ns)
= maybe n (\ ns -> newQName lp px ns) . lookup px $ env
-- -----------------------------------------------------------------------------
--
-- |
-- test for wellformed NCName, rule [4] XML Namespaces
isNCName :: String -> Bool
isNCName [] = False
isNCName n = and ( zipWith ($)
(isXmlNCNameStartChar : repeat isXmlNCNameChar)
n
)
-- |
-- test for wellformed QName, rule [6] XML Namespaces
-- predicate is used in filter 'valdateNamespaces'.
isWellformedQualifiedName :: String -> Bool
isWellformedQualifiedName s
| null lp = isNCName px
| otherwise = isNCName px && isNCName (tail lp)
where
(px, lp) = span (/= ':') s
-- |
-- test for wellformed QName values.
-- A QName is wellformed, if the local part is a NCName, the namePrefix, if not empty, is also a NCName.
-- predicate is used in filter 'valdateNamespaces'.
isWellformedQName :: QName -> Bool
isWellformedQName (QN lp px _ns)
= (isNCName . unXN) lp -- rule [8] XML Namespaces
&&
( isNullXName px
||
(isNCName . unXN) px -- rule [7] XML Namespaces
)
-- |
-- test whether an attribute name is a namesapce declaration name.
-- If this is not the case True is the result, else
-- the name must be a well formed namespace name:
-- All namespace prefixes starting with \"xml\" are reserved for XML related definitions.
-- predicate is used in filter 'valdateNamespaces'.
isWellformedNSDecl :: QName -> Bool
isWellformedNSDecl n
= not (isNameSpaceName n)
||
isWellformedNameSpaceName n
-- |
-- test for a namespace name to be well formed
isWellformedNameSpaceName :: QName -> Bool
isWellformedNameSpaceName n@(QN lp px _ns)
| isNullXName px = lp == xmlnsXName
| otherwise = px == xmlnsXName
&&
not (null lp')
&&
not (a_xml `isPrefixOf` lp')
where
lp' = localPart n
-- |
-- test whether a name is a namespace declaration attribute name
isNameSpaceName :: QName -> Bool
isNameSpaceName (QN lp px _ns)
| isNullXName px = lp == xmlnsXName
| otherwise = px == xmlnsXName
-- |
--
-- predicate is used in filter 'valdateNamespaces'.
isDeclaredNamespace :: QName -> Bool
isDeclaredNamespace (QN _lp px ns)
| isNullXName px = True -- no namespace used
| px == xmlnsXName = ns == xmlnsNamespaceXName -- "xmlns" has a predefined namespace uri
| px == xmlXName = ns == xmlNamespaceXName -- "xml" has a predefiend namespace"
| otherwise = not (isNullXName ns) -- namespace values are not empty
-- -----------------------------------------------------------------------------
toNsEnv :: AssocList String String -> NsEnv
toNsEnv = map (newXName *** newXName)
-- -----------------------------------------------------------------------------
-- the name and string cache
data NameCache = NC { _newXN :: !Int -- next free name id
, _xnCache :: !(M.Map String XName)
, _qnCache :: !(M.Map (XName, XName, XName) QName) -- we need another type than QName
} -- for the key because of the unusable
-- Eq instance of QName
type ChangeNameCache r = NameCache -> (NameCache, r)
-- ------------------------------------------------------------
-- | the internal cache for QNames (and name strings)
theNameCache :: IORef NameCache
theNameCache = unsafePerformIO (newIORef $ initialCache)
{-# NOINLINE theNameCache #-}
initialXNames :: [XName]
nullXName
, xmlnsNamespaceXName
, xmlnsXName
, xmlNamespaceXName
, xmlXName :: XName
initialXNames@[
nullXName
, xmlnsNamespaceXName
, xmlnsXName
, xmlNamespaceXName
, xmlXName
] = zipWith XN [0..] $
[ ""
, xmlnsNamespace
, a_xmlns
, xmlNamespace
, a_xml
]
initialQNames :: [QName]
xmlnsQN :: QName
initialQNames@[xmlnsQN] = [QN xmlnsXName nullXName xmlnsNamespaceXName]
initialCache :: NameCache
initialCache = NC
(length initialXNames)
(M.fromList $ map (\ xn -> (unXN xn, xn)) initialXNames)
(M.fromList $ map (\ qn@(QN lp px ns) -> ((lp, px, ns), qn)) initialQNames)
-- ------------------------------------------------------------
changeNameCache :: NFData r => ChangeNameCache r -> r
changeNameCache action = unsafePerformIO changeNameCache'
where
action' c =
let r = action c
in
fst r `seq` r -- eval name cache to whnf
changeNameCache' =
do
-- putStrLn "modify cache"
res <- atomicModifyIORef theNameCache action'
-- putStrLn "cache modified"
return res
{-# NOINLINE changeNameCache #-}
newXName' :: String -> ChangeNameCache XName
newXName' n c@(NC nxn xm qm)
= case M.lookup n xm of
Just xn -> (c, xn)
Nothing -> let nxn' = nxn + 1 in
let xn = (XN nxn n) in
let xm' = M.insert n xn xm in
-- trace ("newXName: XN " ++ show nxn ++ " " ++ show n) $
rnf xn `seq` (NC nxn' xm' qm, xn)
newQName' :: XName -> XName -> XName -> ChangeNameCache QName
newQName' lp px ns c@(NC nxn xm qm)
= case M.lookup q' qm of
Just qn -> -- trace ("oldQName: " ++ show qn) $ -- log evaluation sequence
(c, qn)
Nothing -> let qm' = M.insert q' q qm in
-- trace ("newQName: " ++ show q) $ -- log insertion of a new QName
q `seq` (NC nxn xm qm', q)
where
q' = (lp, px, ns)
q = QN lp px ns
andThen :: ChangeNameCache r1 ->
(r1 -> ChangeNameCache r2) -> ChangeNameCache r2
andThen a1 a2 c0 = let (c1, r1) = a1 c0 in
(a2 r1) c1
newXName :: String -> XName
newXName n = changeNameCache $
newXName' n
newQName :: XName -> XName -> XName -> QName
newQName lp px ns = lp `seq` px `seq` ns `seq` -- XNames must be evaluated, else MVar blocks
( changeNameCache $
newQName' lp px ns
)
newLpName :: String -> QName
newLpName lp = changeNameCache $
newXName' lp `andThen` \ lp' ->
newQName' lp' nullXName nullXName
newPxName :: String -> String -> QName
newPxName lp px = changeNameCache $
newXName' lp `andThen` \ lp' ->
newXName' px `andThen` \ px' ->
newQName' lp' px' nullXName
newNsName :: String -> String -> String -> QName
newNsName lp px ns = changeNameCache $
newXName' lp `andThen` \ lp' ->
newXName' px `andThen` \ px' ->
newXName' ns `andThen` \ ns' ->
newQName' lp' px' ns'
-----------------------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/DOM/ShowXml.hs 0000644 0000000 0000000 00000045610 13001362013 016124 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.DOM.ShowXml
Copyright : Copyright (C) 2008-9 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
XML tree conversion to external string representation
-}
-- ------------------------------------------------------------
module Text.XML.HXT.DOM.ShowXml
( xshow
, xshowBlob
, xshow'
, xshow''
)
where
import Prelude hiding (showChar, showString)
import Data.Maybe
import Data.Tree.Class
import Data.Tree.NTree.TypeDefs
import Text.XML.HXT.DOM.TypeDefs
import Text.XML.HXT.DOM.XmlKeywords
import Text.XML.HXT.DOM.XmlNode (getDTDAttrl, mkDTDElem)
import Text.Regex.XMLSchema.Generic(sed)
-- -----------------------------------------------------------------------------
--
-- the toString conversion functions
-- |
-- convert a list of trees into a string
--
-- see also : 'xmlTreesToText' for filter version, 'Text.XML.HXT.Parser.XmlParsec.xread' for the inverse operation
xshow :: XmlTrees -> String
xshow [(NTree (XText s) _)] = s -- special case optimisation
xshow [(NTree (XBlob b) _)] = blobToString b -- special case optimisation
xshow ts = showXmlTrees showString showString ts ""
-- | convert an XML tree into a binary large object (a bytestring)
xshowBlob :: XmlTrees -> Blob
xshowBlob [(NTree (XBlob b) _)] = b -- special case optimisation
xshowBlob [(NTree (XText s) _)] = stringToBlob s -- special case optimisation
xshowBlob ts = stringToBlob $ xshow ts
-- |
-- convert a list of trees into a blob.
--
-- Apply a quoting function for XML quoting of content,
-- a 2. quoting funtion for attribute values
-- and an encoding function after tree conversion
xshow' :: (Char -> StringFct) ->
(Char -> StringFct) ->
(Char -> StringFct) ->
XmlTrees -> Blob
xshow' cquot aquot enc ts = stringToBlob $ (concatMap' enc (showTrees ts "")) ""
where
showTrees = showXmlTrees (concatMap' cquot) (concatMap' aquot)
xshow'' :: (Char -> StringFct) ->
(Char -> StringFct) ->
XmlTrees -> String
xshow'' cquot aquot ts = showTrees ts ""
where
showTrees = showXmlTrees (concatMap' cquot) (concatMap' aquot)
-- ------------------------------------------------------------
type StringFct = String -> String
-- ------------------------------------------------------------
showXmlTrees :: (String -> StringFct) ->
(String -> StringFct) ->
XmlTrees -> StringFct
showXmlTrees cf af
= showTrees
where
-- ------------------------------------------------------------
showTrees :: XmlTrees -> StringFct
showTrees = foldr (.) id . map showXmlTree
{-# INLINE showTrees #-}
showTrees' :: XmlTrees -> StringFct
showTrees' = foldr (\ x y -> x . showNL . y) id . map showXmlTree
{-# INLINE showTrees' #-}
-- ------------------------------------------------------------
showXmlTree :: XmlTree -> StringFct
showXmlTree (NTree (XText s) _) -- common cases first
= cf s
showXmlTree (NTree (XTag t al) [])
= showLt . showQName t . showTrees al . showSlash . showGt
showXmlTree (NTree (XTag t al) cs)
= showLt . showQName t . showTrees al . showGt
. showTrees cs
. showLt . showSlash . showQName t . showGt
showXmlTree (NTree (XAttr an) cs)
= showBlank
. showQName an
. showEq
. showQuot
. af (xshow cs)
. showQuot
showXmlTree (NTree (XBlob b) _)
= cf . blobToString $ b
showXmlTree (NTree (XCharRef i) _)
= showString "" . showString (show i) . showChar ';'
showXmlTree (NTree (XEntityRef r) _)
= showString "&" . showString r . showChar ';'
showXmlTree (NTree (XCmt c) _)
= showString ""
showXmlTree (NTree (XCdata d) _)
= showString ""
where
-- quote "]]>" in CDATA contents
d' = sed (const "]]>") "\\]\\]>" d
showXmlTree (NTree (XPi n al) _)
= showString ""
. showQName n
. (foldr (.) id . map showPiAttr) al
. showString "?>"
where
showPiAttr :: XmlTree -> StringFct
showPiAttr a@(NTree (XAttr an) cs)
| qualifiedName an == a_value
--
-- no XML quoting of PI value
= showBlank . showXmlTrees showString showString cs
| otherwise
--
= showXmlTree a
showPiAttr a
= showXmlTree a -- id
showXmlTree (NTree (XDTD de al) cs)
= showXmlDTD de al cs
showXmlTree (NTree (XError l e) _)
= showString ""
-- ------------------------------------------------------------
showXmlDTD :: DTDElem -> Attributes -> XmlTrees -> StringFct
showXmlDTD DOCTYPE al cs = showString ""
where
showInternalDTD [] = id
showInternalDTD ds = showString " [\n"
. showTrees' ds
. showChar ']'
showXmlDTD ELEMENT al cs = showString ""
showXmlDTD ATTLIST al cs = showString " ( showPEAttr
. fromMaybe [] . getDTDAttrl
. head
) cs
Just a -> ( showString a
. showAttrType (lookup1 a_type al)
. showAttrKind (lookup1 a_kind al)
)
)
)
. showString " >"
where
showAttrType t
| t == k_peref
= showBlank . showPEAttr al
| t == k_enumeration
= showAttrEnum
| t == k_notation
= showBlank . showString k_notation . showAttrEnum
| otherwise
= showBlank . showString t
showAttrEnum
= showString " ("
. foldr1
(\ s1 s2 -> s1 . showString " | " . s2)
(map (getEnum . fromMaybe [] . getDTDAttrl) cs)
. showString ")"
where
getEnum :: Attributes -> StringFct
getEnum l = showAttr a_name l . showPEAttr l
showAttrKind k
| k == k_default
= showBlank
. showQuoteString (lookup1 a_default al)
| k == k_fixed
= showBlank
. showString k_fixed
. showBlank
. showQuoteString (lookup1 a_default al)
| k == ""
= id
| otherwise
= showBlank
. showString k
showXmlDTD NOTATION al _cs
= showString ""
showXmlDTD PENTITY al cs = showEntity "% " al cs
showXmlDTD ENTITY al cs = showEntity "" al cs
showXmlDTD PEREF al _cs = showPEAttr al
showXmlDTD CONDSECT _ (c1 : cs)
= showString ""
showXmlDTD CONTENT al cs = showContent (mkDTDElem CONTENT al cs)
showXmlDTD NAME al _cs = showAttr a_name al
showXmlDTD de al _cs = showString "NOT YET IMPLEMETED: "
. showString (show de)
. showBlank
. showString (show al)
. showString " [...]\n"
-- ------------------------------------------------------------
showEntity :: String -> Attributes -> XmlTrees -> StringFct
showEntity kind al cs = showString ""
showEntityValue :: XmlTrees -> StringFct
showEntityValue [] = id
showEntityValue cs = showBlank
. showQuot
. af (xshow cs)
. showQuot
-- ------------------------------------------------------------
showContent :: XmlTree -> StringFct
showContent (NTree (XDTD de al) cs)
= cont2String de
where
cont2String :: DTDElem -> StringFct
cont2String NAME = showAttr a_name al
cont2String PEREF = showPEAttr al
cont2String CONTENT = showLpar
. foldr1
(combine (lookup1 a_kind al))
(map showContent cs)
. showRpar
. showAttr a_modifier al
cont2String n = error ("cont2string " ++ show n ++ " is undefined")
combine k s1 s2 = s1
. showString ( if k == v_seq
then ", "
else " | "
)
. s2
showContent n = showXmlTree n
-- ------------------------------------------------------------
showElemType :: String -> XmlTrees -> StringFct
showElemType t cs
| t == v_pcdata = showLpar . showString v_pcdata . showRpar
| t == v_mixed
&&
(not . null) cs = showLpar
. showString v_pcdata
. ( foldr (.) id
. map (mixedContent . selAttrl . getNode)
) cs1
. showRpar
. showAttr a_modifier al1
| t == v_mixed -- incorrect tree, e.g. after erronius pe substitution
= showLpar
. showRpar
| t == v_children
&&
(not . null) cs = showContent (head cs)
| t == v_children = showLpar
. showRpar
| t == k_peref = foldr (.) id
. map showContent $ cs
| otherwise = showString t
where
[(NTree (XDTD CONTENT al1) cs1)] = cs
mixedContent :: Attributes -> StringFct
mixedContent l = showString " | " . showAttr a_name l . showPEAttr l
selAttrl (XDTD _ as) = as
selAttrl (XText tex) = [(a_name, tex)]
selAttrl _ = []
-- ------------------------------------------------------------
showQName :: QName -> StringFct
showQName = qualifiedName'
{-# INLINE showQName #-}
-- ------------------------------------------------------------
showQuoteString :: String -> StringFct
showQuoteString s = showQuot . showString s . showQuot
-- ------------------------------------------------------------
showAttr :: String -> Attributes -> StringFct
showAttr k al = showString (fromMaybe "" . lookup k $ al)
-- ------------------------------------------------------------
showPEAttr :: Attributes -> StringFct
showPEAttr al = showPE (lookup a_peref al)
where
showPE (Just pe) = showChar '%'
. showString pe
. showChar ';'
showPE Nothing = id
-- ------------------------------------------------------------
showExternalId :: Attributes -> StringFct
showExternalId al = id2Str (lookup k_system al) (lookup k_public al)
where
id2Str Nothing Nothing = id
id2Str (Just s) Nothing = showBlank
. showString k_system
. showBlank
. showQuoteString s
id2Str Nothing (Just p) = showBlank
. showString k_public
. showBlank
. showQuoteString p
id2Str (Just s) (Just p) = showBlank
. showString k_public
. showBlank
. showQuoteString p
. showBlank
. showQuoteString s
-- ------------------------------------------------------------
showNData :: Attributes -> StringFct
showNData al = nd2Str (lookup k_ndata al)
where
nd2Str Nothing = id
nd2Str (Just v) = showBlank
. showString k_ndata
. showBlank
. showString v
-- ------------------------------------------------------------
showBlank,
showEq, showLt, showGt, showSlash, showQuot, showLpar, showRpar, showNL :: StringFct
showBlank = showChar ' '
{-# INLINE showBlank #-}
showEq = showChar '='
{-# INLINE showEq #-}
showLt = showChar '<'
{-# INLINE showLt #-}
showGt = showChar '>'
{-# INLINE showGt #-}
showSlash = showChar '/'
{-# INLINE showSlash #-}
showQuot = showChar '\"'
{-# INLINE showQuot #-}
showLpar = showChar '('
{-# INLINE showLpar #-}
showRpar = showChar ')'
{-# INLINE showRpar #-}
showNL = showChar '\n'
{-# INLINE showNL #-}
showChar :: Char -> StringFct
showChar = (:)
{-# INLINE showChar #-}
showString :: String -> StringFct
showString = (++)
{-# INLINE showString #-}
concatMap' :: (Char -> StringFct) -> String -> StringFct
concatMap' f = foldr (\ x r -> f x . r) id
{-# INLINE concatMap' #-}
-- -----------------------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/DOM/TypeDefs.hs 0000644 0000000 0000000 00000025365 13001350442 016256 0 ustar 00 0000000 0000000 {-# LANGUAGE DeriveDataTypeable #-}
-- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.DOM.TypeDefs
Copyright : Copyright (C) 2008-2010 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
The core data types of the HXT DOM.
-}
-- ------------------------------------------------------------
module Text.XML.HXT.DOM.TypeDefs
( module Data.AssocList
, module Text.XML.HXT.DOM.TypeDefs
, module Text.XML.HXT.DOM.QualifiedName
)
where
import Control.DeepSeq
import Control.FlatSeq
import Data.AssocList
import Data.Binary
import qualified Data.ByteString.Lazy as BS
import qualified Data.ByteString.Lazy.Char8 as CS
import Data.Tree.NTree.TypeDefs
import Data.Tree.NTree.Zipper.TypeDefs
import Data.Typeable
import Text.XML.HXT.DOM.QualifiedName
-- -----------------------------------------------------------------------------
--
-- Basic types for xml tree and filters
-- | Rose tree with XML nodes (XNode)
type XmlTree = NTree XNode
-- | List of rose trees with XML nodes
type XmlTrees = NTrees XNode
-- | Navigatable rose tree with XML nodes
type XmlNavTree = NTZipper XNode
-- | List of navigatable rose trees with XML nodes
type XmlNavTrees = [NTZipper XNode]
-- -----------------------------------------------------------------------------
--
-- XNode
-- | Represents elements
data XNode = XText String -- ^ ordinary text (leaf)
| XBlob Blob -- ^ text represented more space efficient as bytestring (leaf)
| XCharRef Int -- ^ character reference (leaf)
| XEntityRef String -- ^ entity reference (leaf)
| XCmt String -- ^ comment (leaf)
| XCdata String -- ^ CDATA section (leaf)
| XPi QName XmlTrees -- ^ Processing Instr with qualified name (leaf)
-- with list of attributes.
-- If tag name is xml, attributes are \"version\", \"encoding\", \"standalone\",
-- else attribute list is empty, content is a text child node
| XTag QName XmlTrees -- ^ tag with qualified name and list of attributes (inner node or leaf)
| XDTD DTDElem Attributes -- ^ DTD element with assoc list for dtd element features
| XAttr QName -- ^ attribute with qualified name, the attribute value is stored in children
| XError Int String -- ^ error message with level and text
deriving (Eq, Show, Typeable)
instance NFData XNode where
rnf (XText s) = rnf s
rnf (XTag qn cs) = rnf qn `seq` rnf cs
rnf (XAttr qn) = rnf qn
rnf (XCharRef i) = rnf i
rnf (XEntityRef n) = rnf n
rnf (XCmt c) = rnf c
rnf (XCdata s) = rnf s
rnf (XPi qn ts) = rnf qn `seq` rnf ts
rnf (XDTD de al) = rnf de `seq` rnf al
rnf (XBlob b) = BS.length b `seq` ()
rnf (XError n e) = rnf n `seq` rnf e
instance WNFData XNode where
rwnf (XText s) = rwnf s
rwnf (XTag qn cs) = rwnf qn `seq` rwnf cs
rwnf (XAttr qn) = rwnf qn
rwnf (XCharRef i) = i `seq` ()
rwnf (XEntityRef n) = rwnf n
rwnf (XCmt c) = rwnf c
rwnf (XCdata s) = rwnf s
rwnf (XPi qn ts) = rwnf qn `seq` rwnf ts
rwnf (XDTD de al) = rwnf de `seq` rwnfAttributes al
rwnf (XBlob _b) = () -- BS.length b `seq` () -- lazy bytestrings are not evaluated
rwnf (XError n e) = n `seq` rwnf e
-- | Evaluate an assoc list of strings
rwnfAttributes :: Attributes -> ()
rwnfAttributes [] = ()
rwnfAttributes ((k, v) : as) = rwnf k `seq` rwnf v `seq` rwnfAttributes as
instance Binary XNode where
put (XText s) = put ( 0::Word8) >> put s
put (XTag qn cs) = put ( 6::Word8) >> put qn >> put cs
put (XAttr qn) = put ( 8::Word8) >> put qn
put (XCharRef i) = put ( 1::Word8) >> put i
put (XEntityRef n) = put ( 2::Word8) >> put n
put (XCmt c) = put ( 3::Word8) >> put c
put (XCdata s) = put ( 4::Word8) >> put s
put (XPi qn ts) = put ( 5::Word8) >> put qn >> put ts
put (XDTD de al) = put ( 7::Word8) >> put de >> put al
put (XError n e) = put ( 9::Word8) >> put n >> put e
put (XBlob b) = put (10::Word8) >> put b
get = do
tag <- getWord8
case tag of
0 -> get >>= return . XText
1 -> get >>= return . XCharRef
2 -> get >>= return . XEntityRef
3 -> get >>= return . XCmt
4 -> get >>= return . XCdata
5 -> do
qn <- get
get >>= return . XPi qn
6 -> do
qn <- get
get >>= return . XTag qn
7 -> do
de <- get
get >>= return . XDTD de
8 -> get >>= return . XAttr
9 -> do
n <- get
get >>= return . XError n
10 -> get >>= return . XBlob
_ -> error "XNode.get: error while decoding XNode"
-- -----------------------------------------------------------------------------
--
-- DTDElem
-- | Represents a DTD element
data DTDElem = DOCTYPE -- ^ attr: name, system, public, XDTD elems as children
| ELEMENT -- ^ attr: name, kind
--
-- name: element name
--
-- kind: \"EMPTY\" | \"ANY\" | \"\#PCDATA\" | children | mixed
| CONTENT -- ^ element content
--
-- attr: kind, modifier
--
-- modifier: \"\" | \"?\" | \"*\" | \"+\"
--
-- kind: seq | choice
| ATTLIST -- ^ attributes:
-- name - name of element
--
-- value - name of attribute
--
-- type: \"CDATA\" | \"ID\" | \"IDREF\" | \"IDREFS\" | \"ENTITY\" | \"ENTITIES\" |
--
-- \"NMTOKEN\" | \"NMTOKENS\" |\"NOTATION\" | \"ENUMTYPE\"
--
-- kind: \"#REQUIRED\" | \"#IMPLIED\" | \"DEFAULT\"
| ENTITY -- ^ for entity declarations
| PENTITY -- ^ for parameter entity declarations
| NOTATION -- ^ for notations
| CONDSECT -- ^ for INCLUDEs, IGNOREs and peRefs: attr: type
--
-- type = INCLUDE, IGNORE or %...;
| NAME -- ^ attr: name
--
-- for lists of names in notation types or nmtokens in enumeration types
| PEREF -- ^ for Parameter Entity References in DTDs
deriving (Eq, Ord, Enum, Show, Read, Typeable)
instance NFData DTDElem
where rnf x = seq x ()
instance WNFData DTDElem
instance Binary DTDElem where
put de = put ((toEnum . fromEnum $ de)::Word8) -- DTDElem is not yet instance of Enum
get = do tag <- getWord8
return $! (toEnum . fromEnum $ tag)
-- -----------------------------------------------------------------------------
-- | Binary large object implemented as a lazy bytestring
type Blob = BS.ByteString
blobToString :: Blob -> String
blobToString = CS.unpack
{-# INLINE blobToString #-}
stringToBlob :: String -> Blob
stringToBlob = CS.pack
{-# INLINE stringToBlob #-}
-- -----------------------------------------------------------------------------
-- | Attribute list
--
-- used for storing option lists and features of DTD parts
type Attributes = AssocList String String
-- -----------------------------------------------------------------------------
--
-- Constants for error levels
-- | no error, everything is ok
c_ok :: Int
c_ok = 0
-- | Error level for XError, type warning
c_warn :: Int
c_warn = c_ok + 1
-- | Error level for XError, type error
c_err :: Int
c_err = c_warn + 1
-- | Error level for XError, type fatal error
c_fatal :: Int
c_fatal = c_err + 1
-- -----------------------------------------------------------------------------
-- | data type for representing a set of nodes as a tree structure
--
-- this structure is e.g. used to repesent the result of an XPath query
-- such that the selected nodes can be processed or selected later in
-- processing a document tree
data XmlNodeSet = XNS { thisNode :: Bool -- ^ is this node part of the set ?
, attrNodes :: [QName] -- ^ the set of attribute nodes
, childNodes :: ChildNodes -- ^ the set of child nodes, a list of pairs of index and node set
}
deriving (Eq, Show, Typeable)
type ChildNodes = [(Int, XmlNodeSet)]
-- -----------------------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/DOM/Util.hs 0000644 0000000 0000000 00000017513 12752557014 015464 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.DOM.Util
Copyright : Copyright (C) 2008 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
Little useful things for strings, lists and other values
-}
-- ------------------------------------------------------------
module Text.XML.HXT.DOM.Util
( stringTrim
, stringToLower
, stringToUpper
, stringAll
, stringFirst
, stringLast
, normalizeNumber
, normalizeWhitespace
, normalizeBlanks
, escapeURI
, textEscapeXml
, stringEscapeXml
, attrEscapeXml
, stringToInt
, stringToHexString
, charToHexString
, intToHexString
, hexStringToInt
, decimalStringToInt
, doubles
, singles
, noDoubles
, swap
, partitionEither
, toMaybe
, uncurry3
, uncurry4
)
where
import Data.Char
import Data.List
import Data.Maybe
-- ------------------------------------------------------------
-- |
-- remove leading and trailing whitespace with standard Haskell predicate isSpace
stringTrim :: String -> String
stringTrim = reverse . dropWhile isSpace . reverse . dropWhile isSpace
-- |
-- convert string to uppercase with standard Haskell toUpper function
stringToUpper :: String -> String
stringToUpper = map toUpper
-- |
-- convert string to lowercase with standard Haskell toLower function
stringToLower :: String -> String
stringToLower = map toLower
-- | find all positions where a string occurs within another string
stringAll :: (Eq a) => [a] -> [a] -> [Int]
stringAll x = map fst . filter ((x `isPrefixOf`) . snd) . zip [0..] . tails
-- | find the position of the first occurence of a string
stringFirst :: (Eq a) => [a] -> [a] -> Maybe Int
stringFirst x = listToMaybe . stringAll x
-- | find the position of the last occurence of a string
stringLast :: (Eq a) => [a] -> [a] -> Maybe Int
stringLast x = listToMaybe . reverse . stringAll x
-- ------------------------------------------------------------
-- | Removes leading \/ trailing whitespaces and leading zeros
normalizeNumber :: String -> String
normalizeNumber
= reverse . dropWhile (== ' ') . reverse .
dropWhile (\x -> x == '0' || x == ' ')
-- | Reduce whitespace sequences to a single whitespace
normalizeWhitespace :: String -> String
normalizeWhitespace = unwords . words
-- | replace all whitespace chars by blanks
normalizeBlanks :: String -> String
normalizeBlanks = map (\ x -> if isSpace x then ' ' else x)
-- ------------------------------------------------------------
-- | Escape all disallowed characters in URI
-- references (see )
escapeURI :: String -> String
escapeURI ref
= concatMap replace ref
where
notAllowed :: Char -> Bool
notAllowed c
= c < '\31'
||
c `elem` ['\DEL', ' ', '<', '>', '\"', '{', '}', '|', '\\', '^', '`' ]
replace :: Char -> String
replace c
| notAllowed c
= '%' : charToHexString c
| otherwise
= [c]
-- ------------------------------------------------------------
escapeXml :: String -> String -> String
escapeXml escSet
= concatMap esc
where
esc c
| c `elem` escSet
= "" ++ show (fromEnum c) ++ ";"
| otherwise
= [c]
-- |
-- escape XML chars <, >, ", and ampercent by transforming them into character references
--
-- see also : 'attrEscapeXml'
stringEscapeXml :: String -> String
stringEscapeXml = escapeXml "<>\"\'&"
-- |
-- escape XML chars < and ampercent by transforming them into character references, used for escaping text nodes
--
-- see also : 'attrEscapeXml'
textEscapeXml :: String -> String
textEscapeXml = escapeXml "<&"
-- |
-- escape XML chars in attribute values, same as stringEscapeXml, but none blank whitespace
-- is also escaped
--
-- see also : 'stringEscapeXml'
attrEscapeXml :: String -> String
attrEscapeXml = escapeXml "<>\"\'&\n\r\t"
stringToInt :: Int -> String -> Int
stringToInt base digits
= sign * (foldl acc 0 $ concatMap digToInt digits1)
where
splitSign ('-' : ds) = ((-1), ds)
splitSign ('+' : ds) = ( 1 , ds)
splitSign ds = ( 1 , ds)
(sign, digits1) = splitSign digits
digToInt c
| c >= '0' && c <= '9'
= [ord c - ord '0']
| c >= 'A' && c <= 'Z'
= [ord c - ord 'A' + 10]
| c >= 'a' && c <= 'z'
= [ord c - ord 'a' + 10]
| otherwise
= []
acc i1 i0
= i1 * base + i0
-- |
-- convert a string of hexadecimal digits into an Int
hexStringToInt :: String -> Int
hexStringToInt = stringToInt 16
-- |
-- convert a string of digits into an Int
decimalStringToInt :: String -> Int
decimalStringToInt = stringToInt 10
-- |
-- convert a string into a hexadecimal string applying charToHexString
--
-- see also : 'charToHexString'
stringToHexString :: String -> String
stringToHexString = concatMap charToHexString
-- |
-- convert a char (byte) into a 2-digit hexadecimal string
--
-- see also : 'stringToHexString', 'intToHexString'
charToHexString :: Char -> String
charToHexString c
= [ fourBitsToChar (c' `div` 16)
, fourBitsToChar (c' `mod` 16)
]
where
c' = fromEnum c
-- |
-- convert a none negative Int into a hexadecimal string
--
-- see also : 'charToHexString'
intToHexString :: Int -> String
intToHexString i
| i == 0
= "0"
| i > 0
= intToStr i
| otherwise
= error ("intToHexString: negative argument " ++ show i)
where
intToStr 0 = ""
intToStr i' = intToStr (i' `div` 16) ++ [fourBitsToChar (i' `mod` 16)]
fourBitsToChar :: Int -> Char
fourBitsToChar i = "0123456789ABCDEF" !! i
-- ------------------------------------------------------------
-- |
-- take all elements of a list which occur more than once. The result does not contain doubles.
-- (doubles . doubles == doubles)
doubles :: Eq a => [a] -> [a]
doubles
= doubles' []
where
doubles' acc []
= acc
doubles' acc (e : s)
| e `elem` s
&&
e `notElem` acc
= doubles' (e:acc) s
| otherwise
= doubles' acc s
-- |
-- drop all elements from a list which occur more than once.
singles :: Eq a => [a] -> [a]
singles
= singles' []
where
singles' acc []
= acc
singles' acc (e : s)
| e `elem` s
||
e `elem` acc
= singles' acc s
| otherwise
= singles' (e : acc) s
-- |
-- remove duplicates from list
noDoubles :: Eq a => [a] -> [a]
noDoubles []
= []
noDoubles (e : s)
| e `elem` s = noDoubles s
| otherwise = e : noDoubles s
-- ------------------------------------------------------------
swap :: (a,b) -> (b,a)
swap (x,y) = (y,x)
partitionEither :: [Either a b] -> ([a], [b])
partitionEither =
foldr (\x ~(ls,rs) -> either (\l -> (l:ls,rs)) (\r -> (ls,r:rs)) x) ([],[])
toMaybe :: Bool -> a -> Maybe a
toMaybe False _ = Nothing
toMaybe True x = Just x
-- ------------------------------------------------------------
-- | mothers little helpers for to much curry
uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
uncurry3 f ~(a, b, c) = f a b c
uncurry4 :: (a -> b -> c -> d -> e) -> (a, b, c, d) -> e
uncurry4 f ~(a, b, c, d) = f a b c d
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/DOM/XmlKeywords.hs 0000644 0000000 0000000 00000012147 12752557014 017035 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.DOM.XmlKeywords
Copyright : Copyright (C) 2005 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
Constants for XML keywords, for special attribute names
and special attribute values
-}
-- ------------------------------------------------------------
module Text.XML.HXT.DOM.XmlKeywords
where
-- ------------------------------------------------------------
--
-- string constants for representing DTD keywords and attributes
t_xml, -- tag names
t_root :: String
a_default, -- attribute names
a_contentLength,
a_column,
a_encoding,
a_kind,
a_line,
a_module,
a_modifier,
a_name,
a_output_encoding,
a_peref,
a_source,
a_status,
a_standalone,
a_type,
a_url,
a_value,
a_version,
a_xml,
a_xmlns :: String
v_0, -- attribute values
v_1,
v_2,
v_yes,
v_no,
v_any,
v_children,
v_choice,
v_empty,
v_mixed,
v_seq,
v_null,
v_option,
v_pcdata,
v_star,
v_plus :: String
k_any, -- DTD keywords
k_cdata,
k_empty,
k_entity,
k_entities,
k_id,
k_idref,
k_idrefs,
k_include,
k_ignore,
k_nmtoken,
k_nmtokens,
k_peref,
k_public,
k_system,
k_enumeration,
k_fixed,
k_implied,
k_ndata,
k_notation,
k_pcdata,
k_required,
k_default :: String
-- ------------------------------------------------------------
t_xml = "xml"
t_root = "/" -- name of root node tag
a_column = "column"
a_contentLength = "Content-Length"
a_default = "default"
a_encoding = "encoding"
a_kind = "kind"
a_line = "line"
a_module = "module"
a_modifier = "modifier"
a_name = "name"
a_output_encoding = "output-encoding"
a_peref = k_peref
a_source = "source"
a_standalone = "standalone"
a_status = "status"
a_type = "type"
a_url = "url"
a_value = "value"
a_version = "version"
a_xml = "xml"
a_xmlns = "xmlns"
v_yes = "yes"
v_no = "no"
v_0 = "0"
v_1 = "1"
v_2 = "2"
v_any = k_any
v_children = "children"
v_choice = "choice"
v_empty = k_empty
v_pcdata = k_pcdata
v_mixed = "mixed"
v_seq = "seq"
v_null = ""
v_option = "?"
v_star = "*"
v_plus = "+"
k_any = "ANY"
k_cdata = "CDATA"
k_empty = "EMPTY"
k_entity = "ENTITY"
k_entities = "ENTITIES"
k_id = "ID"
k_idref = "IDREF"
k_idrefs = "IDREFS"
k_include = "INCLUDE"
k_ignore = "IGNORE"
k_nmtoken = "NMTOKEN"
k_nmtokens = "NMTOKENS"
k_peref = "PERef"
k_public = "PUBLIC"
k_system = "SYSTEM"
k_enumeration = "#ENUMERATION"
k_fixed = "#FIXED"
k_implied = "#IMPLIED"
k_ndata = "NDATA"
k_notation = "NOTATION"
k_pcdata = "#PCDATA"
k_required = "#REQUIRED"
k_default = "#DEFAULT"
dtdPrefix :: String
dtdPrefix = "doctype-"
-- ------------------------------------------------------------
--
-- attribute names for transfer protocol attributes
-- used in XmlInput for describing header information
-- of http and other requests
transferPrefix
, transferProtocol
, transferMimeType
, transferEncoding
, transferURI
, transferDefaultURI
, transferStatus
, transferMessage
, transferVersion :: String
transferPrefix = "transfer-"
transferProtocol = transferPrefix ++ "Protocol"
transferVersion = transferPrefix ++ "Version"
transferMimeType = transferPrefix ++ "MimeType"
transferEncoding = transferPrefix ++ "Encoding"
transferDefaultURI = transferPrefix ++ "DefaultURI"
transferStatus = transferPrefix ++ "Status"
transferMessage = transferPrefix ++ "Message"
transferURI = transferPrefix ++ "URI"
-- ------------------------------------------------------------
--
httpPrefix :: String
httpPrefix = "http-"
stringProtocol :: String
stringProtocol = "string:"
-- ------------------------------------------------------------
--
-- known namespaces
-- |
-- the predefined namespace uri for xml: \"http:\/\/www.w3.org\/XML\/1998\/namespace\"
xmlNamespace :: String
xmlNamespace = "http://www.w3.org/XML/1998/namespace"
-- |
-- the predefined namespace uri for xmlns: \"http:\/\/www.w3.org\/2000\/xmlns\/\"
xmlnsNamespace :: String
xmlnsNamespace = "http://www.w3.org/2000/xmlns/"
-- | Relax NG namespace
relaxNamespace :: String
relaxNamespace = "http://relaxng.org/ns/structure/1.0"
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/DOM/XmlNode.hs 0000644 0000000 0000000 00000042015 12752557014 016110 0 ustar 00 0000000 0000000 {-# LANGUAGE FlexibleInstances, MultiParamTypeClasses, FunctionalDependencies #-}
-- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.DOM.XmlNode
Copyright : Copyright (C) 2008 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
Interface for XmlArrow to basic data types NTree and XmlTree
If this module must be used in code working with arrows,
it should be imported qualified e.g. @as XN@, to prevent name clashes.
For code working on the \"node and tree level\" this module
is the interface for writing code without using the
constructor functions of 'XNode' and 'NTree' directly
-}
-- ------------------------------------------------------------
module Text.XML.HXT.DOM.XmlNode
( module Text.XML.HXT.DOM.XmlNode
, module Data.Tree.Class
, module Data.Tree.NTree.TypeDefs
)
where
import Control.Monad
import Control.FlatSeq
import Data.Function ( on )
import Data.Maybe ( fromMaybe
, fromJust
)
import Data.Tree.Class
import Data.Tree.NTree.TypeDefs
import Text.XML.HXT.DOM.Interface
class XmlNode a where
-- discriminating predicates
isText :: a -> Bool
isBlob :: a -> Bool
isCharRef :: a -> Bool
isEntityRef :: a -> Bool
isCmt :: a -> Bool
isCdata :: a -> Bool
isPi :: a -> Bool
isElem :: a -> Bool
isRoot :: a -> Bool
isDTD :: a -> Bool
isAttr :: a -> Bool
isError :: a -> Bool
-- constructor functions for leave nodes
mkText :: String -> a
mkBlob :: Blob -> a
mkCharRef :: Int -> a
mkEntityRef :: String -> a
mkCmt :: String -> a
mkCdata :: String -> a
mkPi :: QName -> XmlTrees -> a
mkError :: Int -> String -> a
-- selectors
getText :: a -> Maybe String
getBlob :: a -> Maybe Blob
getCharRef :: a -> Maybe Int
getEntityRef :: a -> Maybe String
getCmt :: a -> Maybe String
getCdata :: a -> Maybe String
getPiName :: a -> Maybe QName
getPiContent :: a -> Maybe XmlTrees
getElemName :: a -> Maybe QName
getAttrl :: a -> Maybe XmlTrees
getDTDPart :: a -> Maybe DTDElem
getDTDAttrl :: a -> Maybe Attributes
getAttrName :: a -> Maybe QName
getErrorLevel :: a -> Maybe Int
getErrorMsg :: a -> Maybe String
-- derived selectors
getName :: a -> Maybe QName
getQualifiedName :: a -> Maybe String
getUniversalName :: a -> Maybe String
getUniversalUri :: a -> Maybe String
getLocalPart :: a -> Maybe String
getNamePrefix :: a -> Maybe String
getNamespaceUri :: a -> Maybe String
-- "modifier" functions
changeText :: (String -> String) -> a -> a
changeBlob :: (Blob -> Blob) -> a -> a
changeCmt :: (String -> String) -> a -> a
changeName :: (QName -> QName) -> a -> a
changeElemName :: (QName -> QName) -> a -> a
changeAttrl :: (XmlTrees -> XmlTrees) -> a -> a
changeAttrName :: (QName -> QName) -> a -> a
changePiName :: (QName -> QName) -> a -> a
changeDTDAttrl :: (Attributes -> Attributes) -> a -> a
setText :: String -> a -> a
setBlob :: Blob -> a -> a
setCmt :: String -> a -> a
setName :: QName -> a -> a
setElemName :: QName -> a -> a
setElemAttrl :: XmlTrees -> a -> a
setAttrName :: QName -> a -> a
setPiName :: QName -> a -> a
setDTDAttrl :: Attributes -> a -> a
-- default implementations
getName n = getElemName n `mplus` getAttrName n `mplus` getPiName n
getQualifiedName n = getName n >>= return . qualifiedName
getUniversalName n = getName n >>= return . universalName
getUniversalUri n = getName n >>= return . universalUri
getLocalPart n = getName n >>= return . localPart
getNamePrefix n = getName n >>= return . namePrefix
getNamespaceUri n = getName n >>= return . namespaceUri
setText = changeText . const
setBlob = changeBlob . const
setCmt = changeCmt . const
setName = changeName . const
setElemName = changeElemName . const
setElemAttrl = changeAttrl . const
setAttrName = changeAttrName . const
setPiName = changePiName . const
setDTDAttrl = changeDTDAttrl . const
-- XNode and XmlTree are instances of XmlNode
instance XmlNode XNode where
isText (XText _) = True
isText (XBlob _) = True
isText _ = False
{-# INLINE isText #-}
isBlob (XBlob _) = True
isBlob _ = False
{-# INLINE isBlob #-}
isCharRef (XCharRef _) = True
isCharRef _ = False
{-# INLINE isCharRef #-}
isEntityRef (XEntityRef _) = True
isEntityRef _ = False
{-# INLINE isEntityRef #-}
isCmt (XCmt _) = True
isCmt _ = False
{-# INLINE isCmt #-}
isCdata (XCdata _) = True
isCdata _ = False
{-# INLINE isCdata #-}
isPi (XPi _ _) = True
isPi _ = False
{-# INLINE isPi #-}
isElem (XTag _ _) = True
isElem _ = False
{-# INLINE isElem #-}
isRoot t = isElem t
&&
fromMaybe "" (getQualifiedName t) == t_root
isDTD (XDTD _ _) = True
isDTD _ = False
{-# INLINE isDTD #-}
isAttr (XAttr _) = True
isAttr _ = False
{-# INLINE isAttr #-}
isError (XError _ _) = True
isError _ = False
{-# INLINE isError #-}
mkText = XText
{-# INLINE mkText #-}
mkBlob = XBlob
{-# INLINE mkBlob #-}
mkCharRef = XCharRef
{-# INLINE mkCharRef #-}
mkEntityRef = XEntityRef
{-# INLINE mkEntityRef #-}
mkCmt = XCmt
{-# INLINE mkCmt #-}
mkCdata = XCdata
{-# INLINE mkCdata #-}
mkPi = XPi
{-# INLINE mkPi #-}
mkError = XError
{-# INLINE mkError #-}
getText (XText t) = Just t
getText (XBlob b) = Just . blobToString $ b
getText _ = Nothing
{-# INLINE getText #-}
getBlob (XBlob b) = Just b
getBlob _ = Nothing
{-# INLINE getBlob #-}
getCharRef (XCharRef c) = Just c
getCharRef _ = Nothing
{-# INLINE getCharRef #-}
getEntityRef (XEntityRef e) = Just e
getEntityRef _ = Nothing
{-# INLINE getEntityRef #-}
getCmt (XCmt c) = Just c
getCmt _ = Nothing
{-# INLINE getCmt #-}
getCdata (XCdata d) = Just d
getCdata _ = Nothing
{-# INLINE getCdata #-}
getPiName (XPi n _) = Just n
getPiName _ = Nothing
{-# INLINE getPiName #-}
getPiContent (XPi _ c) = Just c
getPiContent _ = Nothing
{-# INLINE getPiContent #-}
getElemName (XTag n _) = Just n
getElemName _ = Nothing
{-# INLINE getElemName #-}
getAttrl (XTag _ al) = Just al
getAttrl (XPi _ al) = Just al
getAttrl _ = Nothing
{-# INLINE getAttrl #-}
getDTDPart (XDTD p _) = Just p
getDTDPart _ = Nothing
{-# INLINE getDTDPart #-}
getDTDAttrl (XDTD _ al) = Just al
getDTDAttrl _ = Nothing
{-# INLINE getDTDAttrl #-}
getAttrName (XAttr n) = Just n
getAttrName _ = Nothing
{-# INLINE getAttrName #-}
getErrorLevel (XError l _) = Just l
getErrorLevel _ = Nothing
{-# INLINE getErrorLevel #-}
getErrorMsg (XError _ m) = Just m
getErrorMsg _ = Nothing
{-# INLINE getErrorMsg #-}
changeText cf (XText t) = XText . cf $ t
changeText cf (XBlob b) = XText . cf . blobToString $ b
changeText _ _ = error "changeText undefined"
{-# INLINE changeText #-}
changeBlob cf (XBlob b) = XBlob . cf $ b
changeBlob _ _ = error "changeBlob undefined"
{-# INLINE changeBlob #-}
changeCmt cf (XCmt c) = XCmt . cf $ c
changeCmt _ _ = error "changeCmt undefined"
{-# INLINE changeCmt #-}
changeName cf (XTag n al) = XTag (cf n) al
changeName cf (XAttr n) = XAttr . cf $ n
changeName cf (XPi n al) = XPi (cf n) al
changeName _ _ = error "changeName undefined"
{-# INLINE changeName #-}
changeElemName cf (XTag n al) = XTag (cf n) al
changeElemName _ _ = error "changeElemName undefined"
{-# INLINE changeElemName #-}
changeAttrl cf (XTag n al) = XTag n (cf al)
changeAttrl cf (XPi n al) = XPi n (cf al)
changeAttrl _ _ = error "changeAttrl undefined"
{-# INLINE changeAttrl #-}
changeAttrName cf (XAttr n) = XAttr . cf $ n
changeAttrName _ _ = error "changeAttrName undefined"
{-# INLINE changeAttrName #-}
changePiName cf (XPi n al) = XPi (cf n) al
changePiName _ _ = error "changePiName undefined"
{-# INLINE changePiName #-}
changeDTDAttrl cf (XDTD p al) = XDTD p (cf al)
changeDTDAttrl _ _ = error "changeDTDAttrl undefined"
{-# INLINE changeDTDAttrl #-}
mkElementNode :: QName -> XmlTrees -> XNode
mkElementNode = XTag
{-# INLINE mkElementNode #-}
mkAttrNode :: QName -> XNode
mkAttrNode = XAttr
{-# INLINE mkAttrNode #-}
mkDTDNode :: DTDElem -> Attributes -> XNode
mkDTDNode = XDTD
{-# INLINE mkDTDNode #-}
instance (XmlNode a, Tree t) => XmlNode (t a) where
isText = isText . getNode
{-# INLINE isText #-}
isBlob = isBlob . getNode
{-# INLINE isBlob #-}
isCharRef = isCharRef . getNode
{-# INLINE isCharRef #-}
isEntityRef = isEntityRef . getNode
{-# INLINE isEntityRef #-}
isCmt = isCmt . getNode
{-# INLINE isCmt #-}
isCdata = isCdata . getNode
{-# INLINE isCdata #-}
isPi = isPi . getNode
{-# INLINE isPi #-}
isElem = isElem . getNode
{-# INLINE isElem #-}
isRoot = isRoot . getNode
{-# INLINE isRoot #-}
isDTD = isDTD . getNode
{-# INLINE isDTD #-}
isAttr = isAttr . getNode
{-# INLINE isAttr #-}
isError = isError . getNode
{-# INLINE isError #-}
mkText = mkLeaf . mkText
{-# INLINE mkText #-}
mkBlob = mkLeaf . mkBlob
{-# INLINE mkBlob #-}
mkCharRef = mkLeaf . mkCharRef
{-# INLINE mkCharRef #-}
mkEntityRef = mkLeaf . mkEntityRef
{-# INLINE mkEntityRef #-}
mkCmt = mkLeaf . mkCmt
{-# INLINE mkCmt #-}
mkCdata = mkLeaf . mkCdata
{-# INLINE mkCdata #-}
mkPi n = mkLeaf . mkPi n
{-# INLINE mkPi #-}
mkError l = mkLeaf . mkError l
{-# INLINE mkError #-}
getText = getText . getNode
{-# INLINE getText #-}
getBlob = getBlob . getNode
{-# INLINE getBlob #-}
getCharRef = getCharRef . getNode
{-# INLINE getCharRef #-}
getEntityRef = getEntityRef . getNode
{-# INLINE getEntityRef #-}
getCmt = getCmt . getNode
{-# INLINE getCmt #-}
getCdata = getCdata . getNode
{-# INLINE getCdata #-}
getPiName = getPiName . getNode
{-# INLINE getPiName #-}
getPiContent = getPiContent . getNode
{-# INLINE getPiContent #-}
getElemName = getElemName . getNode
{-# INLINE getElemName #-}
getAttrl = getAttrl . getNode
{-# INLINE getAttrl #-}
getDTDPart = getDTDPart . getNode
{-# INLINE getDTDPart #-}
getDTDAttrl = getDTDAttrl . getNode
{-# INLINE getDTDAttrl #-}
getAttrName = getAttrName . getNode
{-# INLINE getAttrName #-}
getErrorLevel = getErrorLevel . getNode
{-# INLINE getErrorLevel #-}
getErrorMsg = getErrorMsg . getNode
{-# INLINE getErrorMsg #-}
changeText = changeNode . changeText
{-# INLINE changeText #-}
changeBlob = changeNode . changeBlob
{-# INLINE changeBlob #-}
changeCmt = changeNode . changeCmt
{-# INLINE changeCmt #-}
changeName = changeNode . changeName
{-# INLINE changeName #-}
changeElemName = changeNode . changeElemName
{-# INLINE changeElemName #-}
changeAttrl = changeNode . changeAttrl
{-# INLINE changeAttrl #-}
changeAttrName = changeNode . changeAttrName
{-# INLINE changeAttrName #-}
changePiName = changeNode . changePiName
{-# INLINE changePiName #-}
changeDTDAttrl = changeNode . changeDTDAttrl
{-# INLINE changeDTDAttrl #-}
mkElement :: QName -> XmlTrees -> XmlTrees -> XmlTree
mkElement n al = mkTree (mkElementNode n al)
{-# INLINE mkElement #-}
mkRoot :: XmlTrees -> XmlTrees -> XmlTree
mkRoot al = mkTree (mkElementNode (mkName t_root) al)
mkAttr :: QName -> XmlTrees -> XmlTree
mkAttr n = mkTree (mkAttrNode n)
{-# INLINE mkAttr #-}
mkDTDElem :: DTDElem -> Attributes -> XmlTrees -> XmlTree
mkDTDElem e al = mkTree (mkDTDNode e al)
addAttr :: XmlTree -> XmlTrees -> XmlTrees
addAttr a al
| isAttr a = add al
| otherwise = al
where
an = (qualifiedName . fromJust . getAttrName) a
add []
= [a]
add (a1:al1)
| isAttr a1
&&
(qualifiedName . fromJust . getAttrName) a1 == an
= a : al1
| otherwise
= a1 : add al1
mergeAttrl :: XmlTrees -> XmlTrees -> XmlTrees
mergeAttrl = foldr addAttr
-- ------------------------------------------------------------
-- | weak normalform versions of constructors
mkElement' :: QName -> XmlTrees -> XmlTrees -> XmlTree
mkElement' n al cl = id $!! mkElement n al cl
{-# INLINE mkElement' #-}
mkRoot' :: XmlTrees -> XmlTrees -> XmlTree
mkRoot' al cl = id $!! mkRoot al cl
{-# INLINE mkRoot' #-}
mkAttr' :: QName -> XmlTrees -> XmlTree
mkAttr' n av = id $!! mkAttr n av
{-# INLINE mkAttr' #-}
mkText' :: String -> XmlTree
mkText' t = id $!! mkText t
{-# INLINE mkText' #-}
mkCharRef' :: Int -> XmlTree
mkCharRef' i = id $!! mkCharRef i
{-# INLINE mkCharRef' #-}
mkEntityRef' :: String -> XmlTree
mkEntityRef' n = id $!! mkEntityRef n
{-# INLINE mkEntityRef' #-}
mkCmt' :: String -> XmlTree
mkCmt' c = id $!! mkCmt c
{-# INLINE mkCmt' #-}
mkCdata' :: String -> XmlTree
mkCdata' d = id $!! mkCdata d
{-# INLINE mkCdata' #-}
mkPi' :: QName -> XmlTrees -> XmlTree
mkPi' n v = id $!! mkPi n v
{-# INLINE mkPi' #-}
mkError' :: Int -> String -> XmlTree
mkError' l m = id $!! mkError l m
{-# INLINE mkError' #-}
mkDTDElem' :: DTDElem -> Attributes -> XmlTrees -> XmlTree
mkDTDElem' e al cl = id $!! mkDTDElem e al cl
{-# INLINE mkDTDElem' #-}
-- ------------------------------------------------------------
toText :: XmlTree -> XmlTree
toText t
| isCharRef t
= mkText
. (:[]) . toEnum
. fromJust
. getCharRef
$ t
| isCdata t
= mkText
. fromJust
. getCdata
$ t
| otherwise
= t
concText :: XmlTree -> XmlTree -> XmlTrees
concText t1 t2
| isText t1 && isText t2
= (:[]) . mkText $ fromJust (getText t1) ++ fromJust (getText t2)
| otherwise
= [t1, t2]
mergeText :: XmlTree -> XmlTree -> XmlTrees
mergeText
= concText `on` toText
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/DTDValidation/AttributeValueValidation.hs 0000644 0000000 0000000 00000027113 12752557014 023526 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.DTDValidation.TypeDefs
Copyright : Copyright (C) 2008 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
This module provides functions for validating attributes.
The main functions are:
- Check if the attribute value meets the lexical constraints of its type
- Normalization of an attribute value
-}
-- ------------------------------------------------------------
-- Special namings in source code:
--
-- - nd - XDTD node
--
-- - n - XTag node
--
module Text.XML.HXT.DTDValidation.AttributeValueValidation
( checkAttributeValue
, normalizeAttributeValue
)
where
import Text.XML.HXT.Parser.XmlParsec
( parseNMToken
, parseName
)
import Text.XML.HXT.DTDValidation.TypeDefs
-- ------------------------------------------------------------
-- |
-- Checks if the attribute value meets the lexical constraints of its type.
--
-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node
--
-- - 2.parameter attrDecl : the declaration of the attribute from the DTD
--
-- - returns : a function which takes an element (XTag or XDTD ATTLIST),
-- checks if the attribute value meets the lexical constraints
-- of its type and returns a list of errors
checkAttributeValue :: XmlTrees -> XmlTree -> XmlArrow
checkAttributeValue dtdPart attrDecl
| isDTDAttlistNode attrDecl
= choiceA
[ isElem :-> ( checkAttrVal $< getAttrValue attrName )
, isDTDAttlist :-> ( checkAttrVal $< (getDTDAttrl >>^ dtd_default) )
, this :-> none
]
| otherwise
= none
where
al = getDTDAttributes attrDecl
attrName = dtd_value al
attrType = dtd_type al
checkAttrVal attrValue
= checkValue attrType dtdPart normalizedVal attrDecl
where
normalizedVal = normalizeAttributeValue (Just attrDecl) attrValue
-- |
-- Dispatches the attibute check by the attribute type.
--
-- * 1.parameter typ : the attribute type
--
-- - 2.parameter dtdPart : the children of the @DOCTYPE@ node
--
-- - 3.parameter attrValue : the normalized attribute value to be checked
--
-- - 4.parameter attrDecl : the declaration of the attribute from the DTD
--
-- - returns : a functions which takes an element (XTag or XDTD ATTLIST),
-- checks if the attribute value meets the lexical constraints
-- of its type and returns a list of errors
checkValue :: String -> XmlTrees -> String -> XmlTree -> XmlArrow
checkValue typ dtdPart attrValue attrDecl
| typ == k_cdata = none
| typ == k_enumeration = checkValueEnumeration attrDecl attrValue
| typ == k_entity = checkValueEntity dtdPart attrDecl attrValue
| typ == k_entities = checkValueEntities dtdPart attrDecl attrValue
| typ == k_id = checkValueId attrDecl attrValue
| typ == k_idref = checkValueIdref attrDecl attrValue
| typ == k_idrefs = checkValueIdrefs attrDecl attrValue
| typ == k_nmtoken = checkValueNmtoken attrDecl attrValue
| typ == k_nmtokens = checkValueNmtokens attrDecl attrValue
| typ == k_notation = checkValueEnumeration attrDecl attrValue
| otherwise = error ("Attribute type " ++ show typ ++ " unknown.")
-- |
-- Checks the value of Enumeration attribute types. (3.3.1 \/ p.27 in Spec)
--
-- * 1.parameter attrDecl : the declaration of the attribute from the DTD
--
-- - 2.parameter attrValue : the normalized attribute value to be checked
checkValueEnumeration :: XmlTree -> String -> XmlArrow
checkValueEnumeration attrDecl attrValue
| isDTDAttlistNode attrDecl
&&
attrValue `notElem` enumVals
= err ( "Attribute " ++ show (dtd_value al) ++ " for element " ++ show (dtd_name al) ++
" must have a value from list "++ show enumVals {- ++ " but has value " ++ show attrValue-} ++ ".")
| otherwise
= none
where
al = getDTDAttributes attrDecl
enumVals :: [String]
enumVals = map (dtd_name . getDTDAttributes) $ (runLA getChildren attrDecl)
-- |
-- Checks the value of ENTITY attribute types. (3.3.1 \/ p.26 in Spec)
--
-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node, to get the
-- unparsed entity declarations
--
-- - 2.parameter attrDecl : the declaration of the attribute from the DTD
--
-- - 3.parameter attrValue : the normalized attribute value to be checked
checkValueEntity :: XmlTrees -> XmlTree -> String -> XmlArrow
checkValueEntity dtdPart attrDecl attrValue
| isDTDAttlistNode attrDecl
&&
attrValue `notElem` upEntities
= err ( "Entity " ++ show attrValue ++ " of attribute " ++ show (dtd_value al) ++
" for element " ++ show (dtd_name al) ++ " is not unparsed. " ++
"The following unparsed entities exist: " ++ show upEntities ++ ".")
| otherwise
= none
where
al = getDTDAttributes attrDecl
upEntities :: [String]
upEntities = map (dtd_name . getDTDAttributes) (isUnparsedEntity $$ dtdPart)
-- |
-- Checks the value of ENTITIES attribute types. (3.3.1 \/ p.26 in Spec)
--
-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node, to get the
-- unparsed entity declarations
--
-- - 2.parameter attrDecl : the declaration of the attribute from the DTD
--
-- - 3.parameter attrValue : the normalized attribute value to be checked
checkValueEntities ::XmlTrees -> XmlTree -> String -> XmlArrow
checkValueEntities dtdPart attrDecl attrValue
| isDTDAttlistNode attrDecl
= if null valueList
then err ("Attribute " ++ show (dtd_value al) ++ " of element " ++
show (dtd_name al) ++ " must be one or more names.")
else catA . map (checkValueEntity dtdPart attrDecl) $ valueList
| otherwise
= none
where
al = getDTDAttributes attrDecl
valueList = words attrValue
-- |
-- Checks the value of NMTOKEN attribute types. (3.3.1 \/ p.26 in Spec)
--
-- * 1.parameter attrDecl : the declaration of the attribute from the DTD
--
-- - 2.parameter attrValue : the normalized attribute value to be checked
checkValueNmtoken :: XmlTree -> String -> XmlArrow
checkValueNmtoken attrDecl attrValue
| isDTDAttlistNode attrDecl
= constA attrValue >>> checkNmtoken
| otherwise
= none
where
al = getDTDAttributes attrDecl
checkNmtoken
= mkText >>> arrL (parseNMToken "")
>>>
isError
>>>
getErrorMsg
>>>
arr (\ s -> ( "Attribute value " ++ show attrValue ++ " of attribute " ++ show (dtd_value al) ++
" for element " ++ show (dtd_name al) ++ " must be a name token, "++ (lines s) !! 1 ++".") )
>>>
mkError c_err
-- |
-- Checks the value of NMTOKENS attribute types. (3.3.1 \/ p.26 in Spec)
--
-- * 1.parameter attrDecl : the declaration of the attribute from the DTD
--
-- - 2.parameter attrValue : the normalized attribute value to be checked
checkValueNmtokens :: XmlTree -> String -> XmlArrow
checkValueNmtokens attrDecl attrValue
| isDTDAttlistNode attrDecl
= if null valueList
then err ( "Attribute "++ show (dtd_value al) ++" of element " ++
show (dtd_name al) ++ " must be one or more name tokens.")
else catA . map (checkValueNmtoken attrDecl) $ valueList
| otherwise
= none
where
al = getDTDAttributes attrDecl
valueList = words attrValue
-- |
-- Checks the value of ID attribute types. (3.3.1 \/ p.25 in Spec)
--
-- * 1.parameter attrDecl : the declaration of the attribute from the DTD
--
-- - 2.parameter attrValue : the normalized attribute value to be checked
checkValueId :: XmlTree -> String -> XmlArrow
checkValueId attrDecl attrValue
= checkForName "Attribute value" attrDecl attrValue
-- |
-- Checks the value of IDREF attribute types. (3.3.1 \/ p.26 in Spec)
--
-- * 1.parameter attrDecl : the declaration of the attribute from the DTD
--
-- - 2.parameter attrValue : the normalized attribute value to be checked
checkValueIdref :: XmlTree -> String -> XmlArrow
checkValueIdref attrDecl attrValue
= checkForName "Attribute value" attrDecl attrValue
-- |
-- Checks the value of IDREFS attribute types. (3.3.1 \/ p.26 in Spec)
--
-- * 1.parameter attrDecl : the declaration of the attribute from the DTD
--
-- - 2.parameter attrValue : the normalized attribute value to be checked
checkValueIdrefs :: XmlTree -> String -> XmlArrow
checkValueIdrefs attrDecl attrValue
= catA . map (checkValueIdref attrDecl) . words $ attrValue
-- -----------------------------------------------------------------------------
-- General helper functions for checking attribute values
--
-- |
-- Checks if the value of an attribute is a name.
--
-- * 1.parameter msg : error message, should be "Entity" or "Attribute value"
--
-- - 2.parameter attrDecl : the declaration of the attribute from the DTD
--
-- - 3.parameter attrValue : the normalized attribute value to be checked
checkForName :: String -> XmlTree -> String -> XmlArrow
checkForName msg attrDecl attrValue
| isDTDAttlistNode attrDecl
= constA attrValue >>> checkName
| otherwise
= none
where
al = getDTDAttributes attrDecl
checkName
= mkText >>> arrL (parseName "")
>>>
isError
>>>
getErrorMsg
>>>
arr (\s -> ( msg ++ " " ++ show attrValue ++" of attribute " ++ show (dtd_value al) ++
" for element "++ show (dtd_name al) ++" must be a name, " ++ (lines s) !! 1 ++ ".") )
>>>
mkError c_err
-- -----------------------------------------------------------------------------
-- |
-- Normalizes an attribute value with respect to its type. (3.3.3 \/ p.29 in Spec)
--
-- * 1.parameter attrDecl : the declaration of the attribute from the DTD. Expected
-- is a list. If the list is empty, no declaration exists.
--
-- - 2.parameter value : the attribute value to be normalized
--
-- - returns : the normalized value
--
normalizeAttributeValue :: Maybe XmlTree -> String -> String
normalizeAttributeValue (Just attrDecl) value
= normalizeAttribute attrType
where
al = getDTDAttributes attrDecl
attrType = dtd_type al
normalizeAttribute :: String -> String
normalizeAttribute typ
| typ == k_cdata = cdataNormalization value
| otherwise = otherNormalization value
-- Attribute not declared in DTD, normalization as CDATA
normalizeAttributeValue Nothing value
= cdataNormalization value
-- ------------------------------------------------------------
-- Helper functions for normalization
-- |
-- Normalization of CDATA attribute values.
-- is already done when parsing
-- during entity substituion for attribute values
cdataNormalization :: String -> String
cdataNormalization = id
-- | Normalization of attribute values other than CDATA.
otherNormalization :: String -> String
otherNormalization = reduceWSSequences . stringTrim . cdataNormalization
-- | Reduce whitespace sequences to a single whitespace.
reduceWSSequences :: String -> String
reduceWSSequences str = unwords (words str)
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/DTDValidation/DTDValidation.hs 0000644 0000000 0000000 00000047026 12752557014 021206 0 ustar 00 0000000 0000000 {-# LANGUAGE FlexibleContexts #-}
-- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.DTDValidation.TypeDefs
Copyright : Copyright (C) 2008 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
This module provides functions for validating the DTD of XML documents
represented as XmlTree.
Unlike other popular XML validation tools the validation process returns
a list of errors instead of aborting after the first error was found.
Unlike validation of the document, the DTD branch is traversed four times:
- Validation of Notations
- Validation of Unparsed Entities
- Validation of Element declarations
- Validation of Attribute declarations
-}
-- ------------------------------------------------------------
module Text.XML.HXT.DTDValidation.DTDValidation
( removeDoublicateDefs
, validateDTD
)
where
import Text.XML.HXT.DTDValidation.AttributeValueValidation
import Text.XML.HXT.DTDValidation.TypeDefs
-- |
-- Validate a DTD.
--
-- - returns : a functions which takes the DTD subset of the XmlTree, checks
-- if the DTD is valid and returns a list of errors
validateDTD :: XmlArrow
validateDTD -- dtdPart
= isDTDDoctype
`guards`
( listA getChildren
>>>
( validateParts $<< (getNotationNames &&& getElemNames) )
)
where
validateParts notationNames elemNames
= validateNotations
<+>
validateEntities notationNames
<+>
validateElements elemNames
<+>
validateAttributes elemNames notationNames
getNotationNames :: LA [XmlTree] [String]
getNotationNames = listA $ unlistA >>> isDTDNotation >>> getDTDAttrValue a_name
getElemNames :: LA [XmlTree] [String]
getElemNames = listA $ unlistA >>> isDTDElement >>> getDTDAttrValue a_name
-- ------------------------------------------------------------
checkName :: String -> SLA [String] XmlTree XmlTree -> SLA [String] XmlTree XmlTree
checkName name msg
= ifA ( getState
>>>
isA (name `elem`)
)
msg
(nextState (name:) >>> none)
-- ------------------------------------------------------------
-- |
-- Validation of Notations, checks if all notation names are unique.
-- Validity constraint: Unique Notation Name (4.7 \/ p.44 in Spec)
--
-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node
--
-- - returns : a list of errors
validateNotations :: LA XmlTrees XmlTree
validateNotations
= fromSLA [] ( unlistA
>>>
isDTDNotation
>>>
(checkForUniqueNotation $< getDTDAttrl)
)
where
checkForUniqueNotation :: Attributes -> SLA [String] XmlTree XmlTree
checkForUniqueNotation al
= checkName name $
err ( "Notation "++ show name ++ " was already specified." )
where
name = dtd_name al
-- |
-- Validation of Entities.
--
-- 1. Issues a warning if entities are declared multiple times.
--
-- Optional warning: (4.2 \/ p.35 in Spec)
--
--
-- 2. Validates that a notation is declared for an unparsed entity.
--
-- Validity constraint: Notation Declared (4.2.2 \/ p.36 in Spec)
--
-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node
--
-- - 2.parameter notationNames : list of all notation names declared in the DTD
--
-- - returns : a list of errors
validateEntities :: [String] -> LA XmlTrees XmlTree
validateEntities notationNames
= ( fromSLA [] ( unlistA
>>>
isDTDEntity
>>>
(checkForUniqueEntity $< getDTDAttrl)
)
)
<+>
( unlistA
>>>
isUnparsedEntity
>>>
(checkNotationDecl $< getDTDAttrl)
)
where
-- Check if entities are declared multiple times
checkForUniqueEntity :: Attributes -> SLA [String] XmlTree XmlTree
checkForUniqueEntity al
= checkName name $
warn ( "Entity "++ show name ++ " was already specified. " ++
"First declaration will be used." )
where
name = dtd_name al
-- Find unparsed entities for which no notation is specified
checkNotationDecl :: Attributes -> XmlArrow
checkNotationDecl al
| notationName `elem` notationNames
= none
| otherwise
= err ( "The notation " ++ show notationName ++ " must be declared " ++
"when referenced in the unparsed entity declaration for " ++
show upEntityName ++ "."
)
where
notationName = lookup1 k_ndata al
upEntityName = dtd_name al
-- |
-- Validation of Element declarations.
--
-- 1. Validates that an element is not declared multiple times.
--
-- Validity constraint: Unique Element Type Declaration (3.2 \/ p.21 in Spec)
--
--
-- 2. Validates that an element name only appears once in a mixed-content declaration.
--
-- Validity constraint: No Duplicate Types (3.2 \/ p.21 in Spec)
--
--
-- 3. Issues a warning if an element mentioned in a content model is not declared in the
-- DTD.
--
-- Optional warning: (3.2 \/ p.21 in Spec)
--
-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node
--
-- - 2.parameter elemNames : list of all element names declared in the DTD
--
-- - returns : a list of errors
validateElements :: [String] -> LA XmlTrees XmlTree
validateElements elemNames -- dtdPart
= ( fromSLA [] ( unlistA
>>>
isDTDElement
>>>
(checkForUniqueElement $< getDTDAttrl)
)
)
<+>
( unlistA
>>>
isMixedContentElement
>>>
(checkMixedContent $< getDTDAttrl)
)
<+>
( unlistA
>>>
isDTDElement
>>>
(checkContentModel elemNames $< getDTDAttrl)
)
where
-- Validates that an element is not declared multiple times
checkForUniqueElement :: Attributes -> SLA [String] XmlTree XmlTree
checkForUniqueElement al
= checkName name $
err ( "Element type " ++ show name ++
" must not be declared more than once." )
where
name = dtd_name al
-- Validates that an element name only appears once in a mixed-content declaration
checkMixedContent :: Attributes -> XmlArrow
checkMixedContent al
= fromSLA [] ( getChildren
>>>
getChildren
>>>
isDTDName
>>>
(check $< getDTDAttrl)
)
where
elemName = dtd_name al
check al'
= checkName name $
err ( "The element type " ++ show name ++
" was already specified in the mixed-content model of the element declaration " ++
show elemName ++ "." )
where
name = dtd_name al'
-- Issues a warning if an element mentioned in a content model is not
-- declared in the DTD.
checkContentModel :: [String] -> Attributes -> XmlArrow
checkContentModel names al
| cm `elem` [v_children, v_mixed]
= getChildren >>> checkContent
| otherwise
= none
where
elemName = dtd_name al
cm = dtd_type al
checkContent :: XmlArrow
checkContent
= choiceA
[ isDTDName :-> ( checkName' $< getDTDAttrl )
, isDTDContent :-> ( getChildren >>> checkContent )
, this :-> none
]
where
checkName' al'
| childElemName `elem` names
= none
| otherwise
= warn ( "The element type "++ show childElemName ++
", used in content model of element "++ show elemName ++
", is not declared."
)
where
childElemName = dtd_name al'
-- |
-- Validation of Attribute declarations.
--
-- (1) Issues a warning if an attribute is declared for an element type not itself
-- decared.
--
-- Optinal warning: (3.3 \/ p. 24 in Spec)
--
--
-- 2. Issues a warning if more than one definition is provided for the same
-- attribute of a given element type. Fist declaration is binding, later
-- definitions are ignored.
--
-- Optional warning: (3.3 \/ p.24 in Spec)
--
--
-- 3. Issues a warning if the same Nmtoken occures more than once in enumerated
-- attribute types of a single element type.
--
-- Optional warning: (3.3.1 \/ p.27 in Spec)
--
--
-- 4. Validates that an element type has not more than one ID attribute defined.
--
-- Validity constraint: One ID per Element Type (3.3.1 \/ p.26 in Spec)
--
--
-- 5. Validates that an element type has not more than one NOTATION attribute defined.
--
-- Validity constraint: One Notation per Element Type (3.3.1 \/ p.27 in Spec)
--
--
-- 6. Validates that an ID attributes has the type #IMPLIED or #REQUIRED.
--
-- Validity constraint: ID Attribute Default (3.3.1 \/ p.26 in Spec)
--
--
-- 7. Validates that all referenced notations are declared.
--
-- Validity constraint: Notation Attributes (3.3.1 \/ p.27 in Spec)
--
--
-- 8. Validates that notations are not declared for EMPTY elements.
--
-- Validity constraint: No Notation on Empty Element (3.3.1 \/p.27 in Spec)
--
--
-- 9. Validates that the default value matches the lexical constraints of it's type.
--
-- Validity constraint: Attribute default legal (3.3.2 \/ p.28 in Spec)
--
--
-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node
--
-- - 2.parameter elemNames : list of all element names declared in the DTD
--
-- - 3.parameter notationNames : list of all notation names declared in the DTD
--
-- - returns : a list of errors
validateAttributes :: [String] -> [String] -> LA XmlTrees XmlTree
validateAttributes elemNames notationNames
= -- 1. Find attributes for which no elements are declared
( runCheck this (checkDeclaredElements elemNames) )
<+>
-- 2. Find attributes which are declared more than once
( runNameCheck this checkForUniqueAttributeDeclaration )
<+>
-- 3. Find enumerated attribute types which nmtokens are declared more than once
( runCheck (isEnumAttrType `orElse` isNotationAttrType) checkEnumeratedTypes )
<+>
-- 4. Validate that there exists only one ID attribute for an element
( runNameCheck isIdAttrType checkForUniqueId )
<+>
-- 5. Validate that there exists only one NOTATION attribute for an element
( runNameCheck isNotationAttrType checkForUniqueNotation )
<+>
-- 6. Validate that ID attributes have the type #IMPLIED or #REQUIRED
( runCheck isIdAttrType checkIdKindConstraint )
<+>
-- 7. Validate that all referenced notations are declared
( runCheck isNotationAttrType (checkNotationDeclaration notationNames) )
<+>
-- 8. Validate that notations are not declared for EMPTY elements
( checkNoNotationForEmptyElements $< listA ( unlistA
>>>
isEmptyElement
>>>
getDTDAttrValue a_name
)
)
<+>
-- 9. Validate that the default value matches the lexical constraints of it's type
( checkDefaultValueTypes $< this )
where
-- ------------------------------------------------------------
-- control structures
runCheck select check
= unlistA >>> isDTDAttlist
>>>
select
>>>
(check $< getDTDAttrl)
runNameCheck select check
= fromSLA [] $ runCheck select check
--------------------------------------------------------------------------
-- 1. Find attributes for which no elements are declared
checkDeclaredElements :: [String] -> Attributes -> XmlArrow
checkDeclaredElements elemNames' al
| en `elem` elemNames'
= none
| otherwise
= warn ( "The element type \""++ en ++ "\" used in dclaration "++
"of attribute \""++ an ++"\" is not declared."
)
where
en = dtd_name al
an = dtd_value al
--------------------------------------------------------------------------
-- 2. Find attributes which are declared more than once
checkForUniqueAttributeDeclaration :: Attributes -> SLA [String] XmlTree XmlTree
checkForUniqueAttributeDeclaration al
= checkName name $
warn ( "Attribute \""++ aname ++"\" for element type \""++
ename ++"\" is already declared. First "++
"declaration will be used." )
where
ename = dtd_name al
aname = dtd_value al
name = ename ++ "|" ++ aname
--------------------------------------------------------------------------
-- 3. Find enumerated attribute types which nmtokens are declared more than once
checkEnumeratedTypes :: Attributes -> XmlArrow
checkEnumeratedTypes al
= fromSLA [] ( getChildren
>>>
isDTDName
>>>
(checkForUniqueType $< getDTDAttrl)
)
where
checkForUniqueType :: Attributes -> SLA [String] XmlTree XmlTree
checkForUniqueType al'
= checkName nmtoken $
warn ( "Nmtoken \""++ nmtoken ++"\" should not "++
"occur more than once in attribute \""++ dtd_value al ++
"\" for element \""++ dtd_name al ++ "\"." )
where
nmtoken = dtd_name al'
--------------------------------------------------------------------------
-- 4. Validate that there exists only one ID attribute for an element
checkForUniqueId :: Attributes -> SLA [String] XmlTree XmlTree
checkForUniqueId al
= checkName ename $
err ( "Element \""++ ename ++ "\" already has attribute of type "++
"ID, another attribute \""++ dtd_value al ++ "\" of type ID is "++
"not permitted." )
where
ename = dtd_name al
--------------------------------------------------------------------------
-- 5. Validate that there exists only one NOTATION attribute for an element
checkForUniqueNotation :: Attributes -> SLA [String] XmlTree XmlTree
checkForUniqueNotation al
= checkName ename $
err ( "Element \""++ ename ++ "\" already has attribute of type "++
"NOTATION, another attribute \""++ dtd_value al ++ "\" of type NOTATION "++
"is not permitted." )
where
ename = dtd_name al
--------------------------------------------------------------------------
-- 6. Validate that ID attributes have the type #IMPLIED or #REQUIRED
checkIdKindConstraint :: Attributes -> XmlArrow
checkIdKindConstraint al
| attKind `elem` [k_implied, k_required]
= none
| otherwise
= err ( "ID attribute \""++ dtd_value al ++"\" must have a declared default "++
"of \"#IMPLIED\" or \"REQUIRED\"")
where
attKind = dtd_kind al
--------------------------------------------------------------------------
-- 7. Validate that all referenced notations are declared
checkNotationDeclaration :: [String] -> Attributes -> XmlArrow
checkNotationDeclaration notations al
= getChildren
>>>
isDTDName
>>>
(checkNotations $< getDTDAttrl)
where
checkNotations :: Attributes -> XmlArrow
checkNotations al'
| notation `elem` notations
= none
| otherwise
= err ( "The notation \""++ notation ++"\" must be declared when "++
"referenced in the notation type list for attribute \""++ dtd_value al ++
"\" of element \""++ dtd_name al ++"\"."
)
where
notation = dtd_name al'
--------------------------------------------------------------------------
-- 8. Validate that notations are not declared for EMPTY elements
checkNoNotationForEmptyElements :: [String] -> LA XmlTrees XmlTree
checkNoNotationForEmptyElements emptyElems
= unlistA
>>>
isDTDAttlist
>>>
isNotationAttrType
>>>
(checkNoNotationForEmptyElement $< getDTDAttrl)
where
checkNoNotationForEmptyElement :: Attributes -> XmlArrow
checkNoNotationForEmptyElement al
| ename `elem` emptyElems
= err ( "Attribute \""++ dtd_value al ++"\" of type NOTATION must not be "++
"declared on the element \""++ ename ++"\" declared EMPTY."
)
| otherwise
= none
where
ename = dtd_name al
--------------------------------------------------------------------------
-- 9. Validate that default values meet the lexical constraints of the attribute types
checkDefaultValueTypes :: XmlTrees -> LA XmlTrees XmlTree
checkDefaultValueTypes dtdPart'
= unlistA >>> isDTDAttlist
>>>
isDefaultAttrKind
>>>
(checkAttributeValue dtdPart' $< this)
-- ------------------------------------------------------------
-- |
-- Removes doublicate declarations from the DTD, which first declaration is
-- binding. This is the case for ATTLIST and ENTITY declarations.
--
-- - returns : A function that replaces the children of DOCTYPE nodes by a list
-- where all multiple declarations are removed.
removeDoublicateDefs :: XmlArrow
removeDoublicateDefs
= replaceChildren
( fromSLA [] ( getChildren
>>>
choiceA [ isDTDAttlist :-> (removeDoubleAttlist $< getDTDAttrl)
, isDTDEntity :-> (removeDoubleEntity $< getDTDAttrl)
, this :-> this
]
)
)
`when`
isDTDDoctype
where
checkName' n'
= ifA ( getState
>>>
isA (n' `elem`)
)
none
(this >>> perform (nextState (n':)))
removeDoubleAttlist :: Attributes -> SLA [String] XmlTree XmlTree
removeDoubleAttlist al
= checkName' elemAttr
where
elemAttr = elemName ++ "|" ++ attrName
attrName = dtd_value al
elemName = dtd_name al
removeDoubleEntity :: Attributes -> SLA [String] XmlTree XmlTree
removeDoubleEntity al
= checkName' (dtd_name al)
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/DTDValidation/DocTransformation.hs 0000644 0000000 0000000 00000015034 12752557014 022206 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.DTDValidation.DocTransformation
Copyright : Copyright (C) 2008 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
This module provides functions for transforming XML documents represented as
XmlTree with respect to its DTD.
Transforming an XML document with respect to its DTD means:
- add all attributes with default values
- normalize all attribute values
- sort all attributes in lexical order
Note: Transformation should be started after validation.
Before the document is validated, a lookup-table is build on the basis of
the DTD which maps element names to their transformation functions.
After this initialization phase the whole document is traversed in preorder
and every element is transformed by the XmlFilter from the lookup-table.
-}
-- ------------------------------------------------------------
module Text.XML.HXT.DTDValidation.DocTransformation
( transform
)
where
import Text.XML.HXT.DTDValidation.TypeDefs
import Text.XML.HXT.DTDValidation.AttributeValueValidation
import Data.Maybe
import Data.List
import Data.Ord
import qualified Data.Map as M
-- ------------------------------------------------------------
-- |
-- Lookup-table which maps element names to their transformation functions. The
-- transformation functions are XmlArrows.
type TransEnvTable = M.Map ElemName TransFct
type ElemName = String
type TransFct = XmlArrow
-- ------------------------------------------------------------
-- |
-- filter for transforming the document.
--
-- * 1.parameter dtdPart : the DTD subset (Node @DOCTYPE@) of the XmlTree
--
-- - 2.parameter doc : the document subset of the XmlTree
--
-- - returns : a list of errors
transform :: XmlTree -> XmlArrow
transform dtdPart
= traverseTree transTable
where
transTable = buildAllTransformationFunctions (runLA getChildren dtdPart)
-- |
-- Traverse the XmlTree in preorder.
--
-- * 1.parameter transEnv : lookup-table which maps element names to their transformation functions
--
-- - returns : the whole transformed document
traverseTree :: TransEnvTable -> XmlArrow
traverseTree transEnv
= processTopDown ( (transFct $< getName)
`when`
isElem
)
where
transFct :: String -> XmlArrow
transFct name = fromMaybe this . M.lookup name $ transEnv
-- |
-- Build all transformation functions.
--
-- * 1.parameter dtdPart : the DTD subset, root node should be of type @DOCTYPE@
--
-- - returns : lookup-table which maps element names to their transformation functions
buildAllTransformationFunctions :: XmlTrees -> TransEnvTable
buildAllTransformationFunctions dtdNodes
= M.fromList $
(t_root, this)
:
concatMap (buildTransformationFunctions dtdNodes) dtdNodes
-- |
-- Build transformation functions for an element.
--
-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node
--
-- * 1.parameter nd : element declaration for which the transformation functions are
-- created
--
-- - returns : entry for the lookup-table
buildTransformationFunctions :: XmlTrees -> XmlTree -> [(ElemName, TransFct)]
buildTransformationFunctions dtdPart dn
| isDTDElementNode dn = [(name, transFct)]
| otherwise = []
where
al = getDTDAttributes dn
name = dtd_name al
transFct = setDefaultAttributeValues dtdPart dn
>>>
normalizeAttributeValues dtdPart dn
>>>
lexicographicAttributeOrder
-- ------------------------------------------------------------
-- |
-- Sort the attributes of an element in lexicographic order.
--
-- * returns : a function which takes an element (XTag), sorts its
-- attributes in lexicographic order and returns the changed element
lexicographicAttributeOrder :: XmlArrow
lexicographicAttributeOrder
= setAttrl (getAttrl >>. sortAttrl)
where
sortAttrl :: XmlTrees -> XmlTrees
sortAttrl = sortBy (comparing nameOfAttr)
-- |
-- Normalize attribute values.
--
-- * returns : a function which takes an element (XTag), normalizes its
-- attribute values and returns the changed element
normalizeAttributeValues :: XmlTrees -> XmlTree -> XmlArrow
normalizeAttributeValues dtdPart dn
| isDTDElementNode dn = processAttrl (normalizeAttr $< getName)
| otherwise = this
where
al = getDTDAttributes dn
elemName = dtd_name al
declaredAtts = isAttlistOfElement elemName $$ dtdPart
normalizeAttr :: String -> XmlArrow
normalizeAttr nameOfAtt
= normalizeAttrValue ( if null attDescr
then Nothing
else Just (head attDescr)
)
where
attDescr = filter ((== nameOfAtt) . valueOfDTD a_value) declaredAtts
normalizeAttrValue :: Maybe XmlTree -> XmlArrow
normalizeAttrValue descr
= replaceChildren ((xshow getChildren >>^ normalizeAttributeValue descr) >>> mkText)
-- |
-- Set default attribute values if they are not set.
--
-- * returns : a function which takes an element (XTag), adds missing attribute
-- defaults and returns the changed element
setDefaultAttributeValues :: XmlTrees -> XmlTree -> XmlArrow
setDefaultAttributeValues dtdPart dn
| isDTDElementNode dn = seqA (map setDefault defaultAtts)
| otherwise = this
where
elemName = dtd_name . getDTDAttributes $ dn
defaultAtts = ( isAttlistOfElement elemName
>>>
( isFixedAttrKind -- select attributes with default values
`orElse`
isDefaultAttrKind
)
) $$ dtdPart
setDefault :: XmlTree -> XmlArrow
setDefault attrDescr -- add the default attributes
= ( addAttr attName defaultValue -- to tag nodes with missing attributes
`whenNot`
hasAttr attName
)
`when`
isElem
where
al = getDTDAttributes attrDescr
attName = dtd_value al
defaultValue = dtd_default al
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/DTDValidation/DocValidation.hs 0000644 0000000 0000000 00000042716 12752557014 021301 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.DTDValidation.TypeDefs
Copyright : Copyright (C) 2008 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
This module provides functions for validating XML Documents represented as
XmlTree.
Unlike other popular XML validation tools the validation process returns
a list of errors instead of aborting after the first error was found.
Before the document is validated, a lookup-table is build on the basis of
the DTD which maps element names to their validation functions.
After this initialization phase the whole document is traversed in preorder
and every element is validated by the XmlFilter from the lookup-table.
-}
-- ------------------------------------------------------------
module Text.XML.HXT.DTDValidation.DocValidation
( validateDoc
)
where
import Text.XML.HXT.DTDValidation.TypeDefs
import Text.XML.HXT.DTDValidation.AttributeValueValidation
import Text.XML.HXT.DTDValidation.XmlRE
-- ------------------------------------------------------------
-- |
-- Lookup-table which maps element names to their validation functions. The
-- validation functions are XmlArrows.
type ValiEnvTable = [ValiEnv]
type ValiEnv = (ElemName, ValFct)
type ElemName = String
type ValFct = XmlArrow
-- ------------------------------------------------------------
-- |
-- Validate a document.
--
-- * 1.parameter dtdPart : the DTD subset (Node @DOCTYPE@) of the XmlTree
--
-- - 2.parameter doc : the document subset of the XmlTree
--
-- - returns : a list of errors
validateDoc :: XmlTree -> XmlArrow
validateDoc dtdPart
= traverseTree valTable
where
valTable = buildAllValidationFunctions dtdPart
-- |
-- Traverse the XmlTree in preorder.
--
-- * 1.parameter valiEnv : lookup-table which maps element names to their validation functions
--
-- - returns : list of errors
traverseTree :: ValiEnvTable -> XmlArrow
traverseTree valiEnv
= choiceA [ isElem :-> (valFct $< getQName)
, this :-> none
]
<+>
( getChildren >>> traverseTree valiEnv )
where
valFct :: QName -> XmlArrow
valFct name = case (lookup (qualifiedName name) valiEnv) of
Nothing -> err ("Element " ++ show (qualifiedName name) ++ " not declared in DTD.")
Just f -> f
-- ------------------------------------------------------------
-- |
-- Build all validation functions.
--
-- * 1.parameter dtdPart : DTD subset, root node should be of type @DOCTYPE@
--
-- - returns : lookup-table which maps element names to their validation functions
buildAllValidationFunctions :: XmlTree -> ValiEnvTable
buildAllValidationFunctions dtdPart
= concat $
buildValidateRoot dtdPart : -- construct a list of validation filters for all element declarations
map (buildValidateFunctions dtdNodes) dtdNodes
where
dtdNodes = runLA getChildren dtdPart
-- |
-- Build a validation function for the document root. By root node @\/@
-- is meant, which is the topmost dummy created by the parser.
--
-- * 1.parameter dtdPart : DTD subset, root node should be of type @DOCTYPE@
--
-- - returns : entry for the lookup-table
buildValidateRoot :: XmlTree -> [ValiEnv]
buildValidateRoot dn
| isDTDDoctypeNode dn = [(t_root, valFct)]
| otherwise = []
where
name = dtd_name . getDTDAttributes $ dn
valFct :: XmlArrow
valFct = isElem
`guards`
( checkRegex (re_sym name)
>>>
msgToErr (("Root Element must be " ++ show name ++ ". ") ++)
)
checkRegex :: RE String -> LA XmlTree String
checkRegex re = listA getChildren
>>> arr (\ cs -> checkRE (matches re cs))
-- |
-- Build validation functions for an element.
--
-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node
--
-- - 2.parameter nd : element declaration for which the validation functions are
-- created
--
-- - returns : entry for the lookup-table
buildValidateFunctions :: XmlTrees -> XmlTree -> [ValiEnv]
buildValidateFunctions dtdPart dn
| isDTDElementNode dn = [(elemName, valFct)]
| otherwise = []
where
elemName = dtd_name . getDTDAttributes $ dn
valFct :: XmlArrow
valFct = buildContentValidation dn
<+>
buildAttributeValidation dtdPart dn
-- ------------------------------------------------------------
-- |
-- Build validation functions for the content model of an element.
-- Validity constraint: Element Valid (3 \/ p.18 in Spec)
--
-- * 1.parameter nd : element declaration for which the content validation functions
-- are built
--
-- - returns : a function which takes an element (XTag), checks if its
-- children match its content model and returns a list of errors
buildContentValidation :: XmlTree -> XmlArrow
buildContentValidation nd
= contentValidation attrType nd
where
attrType = dtd_type . getDTDAttributes $ nd
-- Delegates construction of the validation function on the basis of the
-- content model type
contentValidation :: String -> XmlTree -> XmlArrow
contentValidation typ dn
| typ == k_pcdata = contentValidationPcdata
| typ == k_empty = contentValidationEmpty
| typ == k_any = contentValidationAny
| typ == v_children = contentValidationChildren cs
| typ == v_mixed = contentValidationMixed cs
| otherwise = none
where
cs = runLA getChildren dn
-- Checks #PCDATA content models
contentValidationPcdata :: XmlArrow
contentValidationPcdata
= isElem `guards` (contentVal $< getQName)
where
contentVal name
= checkRegex (re_rep (re_sym k_pcdata))
>>>
msgToErr ( ( "The content of element " ++
show (qualifiedName name) ++
" must match (#PCDATA). "
) ++
)
-- Checks EMPTY content models
contentValidationEmpty :: XmlArrow
contentValidationEmpty
= isElem `guards` (contentVal $< getQName)
where
contentVal name
= checkRegex re_unit
>>>
msgToErr ( ( "The content of element " ++
show (qualifiedName name) ++
" must match EMPTY. "
) ++
)
-- Checks ANY content models
contentValidationAny :: XmlArrow
contentValidationAny
= isElem `guards` (contentVal $< getName)
where
contentVal name
= checkRegex (re_rep (re_dot))
>>>
msgToErr ( ( "The content of element " ++
show name ++
" must match ANY. "
) ++
)
-- Checks "children" content models
contentValidationChildren :: XmlTrees -> XmlArrow
contentValidationChildren cm
= isElem `guards` (contentVal $< getName)
where
contentVal name
= checkRegex re
>>>
msgToErr ( ( "The content of element " ++
show name ++
" must match " ++ printRE re ++ ". "
) ++
)
re = createRE (head cm)
-- Checks "mixed content" content models
contentValidationMixed :: XmlTrees -> XmlArrow
contentValidationMixed cm
= isElem `guards` (contentVal $< getName)
where
contentVal name
= checkRegex re
>>>
msgToErr ( ( "The content of element " ++
show name ++
" must match " ++ printRE re ++ ". "
) ++
)
re = re_rep (re_alt (re_sym k_pcdata) (createRE (head cm)))
-- |
-- Build a regular expression from the content model. The regular expression
-- is provided by the module XmlRE.
--
-- * 1.parameter nd : node of the content model. Expected: @CONTENT@ or
-- @NAME@
--
-- - returns : regular expression of the content model
createRE :: XmlTree -> RE String
createRE dn
| isDTDContentNode dn
= processModifier modifier
| isDTDNameNode dn
= re_sym name
| otherwise
= error ("createRE: illegeal parameter:\n" ++ show dn)
where
al = getDTDAttributes dn
name = dtd_name al
modifier = dtd_modifier al
kind = dtd_kind al
cs = runLA getChildren dn
processModifier :: String -> RE String
processModifier m
| m == v_plus = re_plus (processKind kind)
| m == v_star = re_rep (processKind kind)
| m == v_option = re_opt (processKind kind)
| m == v_null = processKind kind
| otherwise = error ("Unknown modifier: " ++ show m)
processKind :: String -> RE String
processKind k
| k == v_seq = makeSequence cs
| k == v_choice = makeChoice cs
| otherwise = error ("Unknown kind: " ++ show k)
makeSequence :: XmlTrees -> RE String
makeSequence [] = re_unit
makeSequence (x:xs) = re_seq (createRE x) (makeSequence xs)
makeChoice :: XmlTrees -> RE String
makeChoice [] = re_zero ""
makeChoice (x:xs) = re_alt (createRE x) (makeChoice xs)
-- ------------------------------------------------------------
-- |
-- Build validation functions for the attributes of an element.
--
-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node
--
-- - 2.parameter nd : element declaration for which the attribute validation functions
-- are created
--
-- - returns : a function which takes an element (XTag), checks if its
-- attributes are valid and returns a list of errors
buildAttributeValidation :: XmlTrees -> XmlTree -> XmlArrow
buildAttributeValidation dtdPart nd =
noDoublicateAttributes
<+>
checkNotDeclardAttributes attrDecls nd
<+>
checkRequiredAttributes attrDecls nd
<+>
checkFixedAttributes attrDecls nd
<+>
checkValuesOfAttributes attrDecls dtdPart nd
where
attrDecls = isDTDAttlist $$ dtdPart
-- |
-- Validate that all attributes of an element are unique.
-- Well-formdness constraint: Unique AttSpec (3.1 \/ p.19 in Spec)
--
-- - returns : a function which takes an element (XTag), checks if its
-- attributes are unique and returns a list of errors
noDoublicateAttributes :: XmlArrow
noDoublicateAttributes
= isElem
`guards`
( noDoubles' $< getName )
where
noDoubles' elemName
= listA (getAttrl >>> getName)
>>> applyA (arr (catA . map toErr . doubles . reverse))
where
toErr n1 = err ( "Attribute " ++ show n1 ++
" was already specified for element " ++
show elemName ++ "."
)
-- |
-- Validate that all \#REQUIRED attributes are provided.
-- Validity constraint: Required Attributes (3.3.2 \/ p.28 in Spec)
--
-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node
--
-- - 2.parameter nd : element declaration which attributes have to be checked
--
-- - returns : a function which takes an element (XTag), checks if all
-- required attributes are provided and returns a list of errors
checkRequiredAttributes :: XmlTrees -> XmlTree -> XmlArrow
checkRequiredAttributes attrDecls dn
| isDTDElementNode dn
= isElem
`guards`
( checkRequired $< getName )
| otherwise
= none
where
elemName = dtd_name . getDTDAttributes $ dn
requiredAtts = (isAttlistOfElement elemName >>> isRequiredAttrKind) $$ attrDecls
checkRequired :: String -> XmlArrow
checkRequired name
= catA . map checkReq $ requiredAtts
where
checkReq :: XmlTree -> XmlArrow
checkReq attrDecl
= neg (hasAttr attName)
`guards`
err ( "Attribute " ++ show attName ++ " must be declared for element type " ++
show name ++ "." )
where
attName = dtd_value . getDTDAttributes $ attrDecl
-- |
-- Validate that \#FIXED attributes match the default value.
-- Validity constraint: Fixed Attribute Default (3.3.2 \/ p.28 in Spec)
--
-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node
--
-- - 2.parameter nd : element declaration which attributes have to be checked
--
-- - returns : a function which takes an element (XTag), checks if all
-- fixed attributes match the default value and returns a list of errors
checkFixedAttributes :: XmlTrees -> XmlTree -> XmlArrow
checkFixedAttributes attrDecls dn
| isDTDElementNode dn
= isElem
`guards`
( checkFixed $< getName )
| otherwise
= none
where
elemName = dtd_name . getDTDAttributes $ dn
fixedAtts = (isAttlistOfElement elemName >>> isFixedAttrKind) $$ attrDecls
checkFixed :: String -> XmlArrow
checkFixed name
= catA . map checkFix $ fixedAtts
where
checkFix :: XmlTree -> XmlArrow
checkFix an
| isDTDAttlistNode an
= checkFixedVal $< getAttrValue attName
| otherwise
= none
where
al' = getDTDAttributes an
attName = dtd_value al'
defa = dtd_default al'
fixedValue = normalizeAttributeValue (Just an) defa
checkFixedVal :: String -> XmlArrow
checkFixedVal val
= ( ( hasAttr attName
>>>
isA (const (attValue /= fixedValue))
)
`guards`
err ( "Attribute " ++ show attName ++ " of element " ++ show name ++
" with value " ++ show attValue ++ " must have a value of " ++
show fixedValue ++ "." )
)
where
attValue = normalizeAttributeValue (Just an) val
-- |
-- Validate that an element has no attributes which are not declared.
-- Validity constraint: Attribute Value Type (3.1 \/ p.19 in Spec)
--
-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node
--
-- - 2.parameter nd : element declaration which attributes have to be checked
--
-- - returns : a function which takes an element (XTag), checks if all
-- attributes are declared and returns a list of errors
checkNotDeclardAttributes :: XmlTrees -> XmlTree -> XmlArrow
checkNotDeclardAttributes attrDecls elemDescr
= checkNotDeclared
where
elemName = valueOfDTD a_name elemDescr
decls = isAttlistOfElement elemName $$ attrDecls
checkNotDeclared :: XmlArrow
checkNotDeclared
= isElem
`guards`
( getAttrl >>> searchForDeclaredAtt elemName decls )
searchForDeclaredAtt :: String -> XmlTrees -> XmlArrow
searchForDeclaredAtt name (dn : xs)
| isDTDAttlistNode dn
= ( getName >>> isA ( (dtd_value . getDTDAttributes $ dn) /= ) )
`guards`
searchForDeclaredAtt name xs
| otherwise
= searchForDeclaredAtt name xs
searchForDeclaredAtt name []
= mkErr $< getName
where
mkErr n = err ( "Attribute " ++ show n ++ " of element " ++
show name ++ " is not declared in DTD." )
-- |
-- Validate that the attribute value meets the lexical constraints of its type.
-- Validity constaint: Attribute Value Type (3.1 \/ p.19 in Spec)
--
-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node
--
-- - 2.parameter nd : element declaration which attributes have to be checked
--
-- - returns : a function which takes an element (XTag), checks if all
-- attributes meet the lexical constraints and returns a list of errors
checkValuesOfAttributes :: XmlTrees -> XmlTrees -> XmlTree -> XmlArrow
checkValuesOfAttributes attrDecls dtdPart elemDescr
= checkValues
where
elemName = dtd_name . getDTDAttributes $ elemDescr
decls = isAttlistOfElement elemName $$ attrDecls
checkValues :: XmlArrow
checkValues
= isElem
`guards`
( checkValue $< getAttrl )
checkValue att
= catA . map checkVal $ decls
where
checkVal :: XmlTree -> XmlArrow
checkVal attrDecl
| isDTDAttlistNode attrDecl
&&
nameOfAttr att == dtd_value al'
= checkAttributeValue dtdPart attrDecl
| otherwise
= none
where
al' = getDTDAttributes attrDecl
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/DTDValidation/IdValidation.hs 0000644 0000000 0000000 00000021702 12752557014 021120 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.DTDValidation.IdValidation
Copyright : Copyright (C) 2008 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
This module provides functions for checking special ID/IDREF/IDREFS constraints.
Checking special ID\/IDREF\/IDREFS constraints means:
- checking that all ID values are unique.
- checking that all IDREF\/IDREFS values match the value of some ID attribute
ID-Validation should be started before or after validating the document.
First all nodes with ID attributes are collected from the document, then
it is validated that values of ID attributes do not occure more than once.
During a second iteration over the document it is validated that there exists
an ID attribute value for IDREF\/IDREFS attribute values.
-}
-- ------------------------------------------------------------
module Text.XML.HXT.DTDValidation.IdValidation
( validateIds
)
where
import Data.Maybe
import Text.XML.HXT.DTDValidation.TypeDefs
import Text.XML.HXT.DTDValidation.AttributeValueValidation
-- ------------------------------------------------------------
-- |
-- Lookup-table which maps element names to their validation functions. The
-- validation functions are XmlFilters.
type IdEnvTable = [IdEnv]
type IdEnv = (ElemName, IdFct)
type ElemName = String
type IdFct = XmlArrow
-- ------------------------------------------------------------
-- |
-- Perform the validation of the ID/IDREF/IDREFS constraints.
--
-- * 1.parameter dtdPart : the DTD subset (Node @DOCTYPE@) of the XmlTree
--
-- - 2.parameter doc : the document subset of the XmlTree
--
-- - returns : a list of errors
validateIds :: XmlTree -> XmlArrow
validateIds dtdPart
= validateIds' $< listA (traverseTree idEnv)
where
idAttrTypes = runLA (getChildren >>> isIdAttrType) dtdPart
elements = runLA (getChildren >>> isDTDElement) dtdPart
atts = runLA (getChildren >>> isDTDAttlist) dtdPart
idEnv = buildIdCollectorFcts idAttrTypes
validateIds' :: XmlTrees -> XmlArrow
validateIds' idNodeList
= ( constA idNodeList >>> checkForUniqueIds idAttrTypes )
<+>
checkIdReferences idRefEnv
where
idRefEnv = buildIdrefValidationFcts idAttrTypes elements atts idNodeList
-- |
-- Traverse the XmlTree in preorder.
--
-- * 1.parameter idEnv : lookup-table which maps element names to their validation functions
--
-- - returns : list of errors
traverseTree :: IdEnvTable -> XmlArrow
traverseTree idEnv
= multi (isElem `guards` (idFct $< getName))
where
idFct :: String -> XmlArrow
idFct name = fromMaybe none . lookup name $ idEnv
-- |
-- Returns the value of an element's ID attribute. The attribute name has to be
-- retrieved first from the DTD.
--
-- * 1.parameter dtdPart : list of ID attribute definitions from the DTD
--
-- - 2.parameter n : element which ID attribute value should be returned
--
-- - returns : normalized value of the ID attribute
getIdValue :: XmlTrees -> XmlTree -> String
getIdValue dns
= concat . runLA (single getIdValue')
where
getIdValue' :: LA XmlTree String
getIdValue'
= isElem `guards` catA (map getIdVal dns)
where
getIdVal dn
| isDTDAttlistNode dn = hasName elemName
`guards`
( getAttrValue0 attrName
>>>
arr (normalizeAttributeValue (Just dn))
)
| otherwise = none
where
al = getDTDAttributes dn
elemName = dtd_name al
attrName = dtd_value al
-- ------------------------------------------------------------
-- |
-- Build collector functions which return XTag nodes with ID attributes from
-- a document.
--
-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node
--
-- - returns : lookup-table which maps element names to their collector function
buildIdCollectorFcts :: XmlTrees -> IdEnvTable
buildIdCollectorFcts idAttrTypes
= concatMap buildIdCollectorFct idAttrTypes
where
buildIdCollectorFct :: XmlTree -> [IdEnv]
buildIdCollectorFct dn
| isDTDAttlistNode dn = [(elemName, hasAttr attrName)]
| otherwise = []
where
al = getDTDAttributes dn
elemName = dtd_name al
attrName = dtd_value al
-- |
-- Build validation functions for checking if IDREF\/IDREFS values match a value
-- of some ID attributes.
--
-- * 1.parameter dtdPart : the children of the @DOCTYPE@ node
--
-- - 2.parameter idNodeList : list of all XTag nodes with ID attributes
--
-- - returns : lookup-table which maps element names to their validation function
buildIdrefValidationFcts :: XmlTrees -> XmlTrees -> XmlTrees -> XmlTrees -> IdEnvTable
buildIdrefValidationFcts idAttrTypes elements atts idNodeList
= concatMap buildElemValidationFct elements
where
idValueList = map (getIdValue idAttrTypes) idNodeList
buildElemValidationFct :: XmlTree -> [IdEnv]
buildElemValidationFct dn
| isDTDElementNode dn = [(elemName, buildIdrefValidationFct idRefAttrTypes)]
| otherwise = []
where
al = getDTDAttributes dn
elemName = dtd_name al
idRefAttrTypes = (isAttlistOfElement elemName >>> isIdRefAttrType) $$ atts
buildIdrefValidationFct :: XmlTrees -> XmlArrow
buildIdrefValidationFct
= catA . map buildIdref
buildIdref :: XmlTree -> XmlArrow
buildIdref dn
| isDTDAttlistNode dn = isElem >>> (checkIdref $< getName)
| otherwise = none
where
al = getDTDAttributes dn
attrName = dtd_value al
attrType = dtd_type al
checkIdref :: String -> XmlArrow
checkIdref name
= hasAttr attrName
`guards`
( checkIdVal $< getAttrValue attrName )
where
checkIdVal :: String -> XmlArrow
checkIdVal av
| attrType == k_idref
= checkValueDeclared attrValue
| null valueList
= err ( "Attribute " ++ show attrName ++
" of Element " ++ show name ++
" must have at least one name."
)
| otherwise
= catA . map checkValueDeclared $ valueList
where
valueList = words attrValue
attrValue = normalizeAttributeValue (Just dn) av
checkValueDeclared :: String -> XmlArrow
checkValueDeclared attrValue
= if attrValue `elem` idValueList
then none
else err ( "An Element with identifier " ++ show attrValue ++
" must appear in the document."
)
-- ------------------------------------------------------------
-- |
-- Validate that all ID values are unique within a document.
-- Validity constraint: ID (3.3.1 \/p. 25 in Spec)
--
-- * 1.parameter idNodeList : list of all XTag nodes with ID attributes
--
-- - 2.parameter dtdPart : the children of the @DOCTYPE@ node
--
-- - returns : a list of errors
checkForUniqueIds :: XmlTrees -> LA XmlTrees XmlTree
checkForUniqueIds idAttrTypes -- idNodeList
= fromSLA [] ( unlistA
>>>
isElem
>>>
(checkForUniqueId $<< getName &&& this)
)
where
checkForUniqueId :: String -> XmlTree -> SLA [String] XmlTree XmlTree
checkForUniqueId name x
= ifA ( getState
>>>
isA (attrValue `elem`)
)
(err ( "Attribute value " ++ show attrValue ++ " of type ID for element " ++
show name ++ " must be unique within the document." ))
(nextState (attrValue:) >>> none)
where
attrValue = getIdValue (isAttlistOfElement name $$ idAttrTypes) x
-- |
-- Validate that all IDREF\/IDREFS values match the value of some ID attribute.
-- Validity constraint: IDREF (3.3.1 \/ p.26 in Spec)
--
-- * 1.parameter idRefEnv : lookup-table which maps element names to their validation function
--
-- - 2.parameter doc : the document to validate
--
-- - returns : a list of errors
checkIdReferences :: IdEnvTable -> LA XmlTree XmlTree
checkIdReferences idRefEnv
= traverseTree idRefEnv
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/DTDValidation/RE.hs 0000644 0000000 0000000 00000027730 12752557014 017066 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.DTDValidation.RE
Copyright : Copyright (C) 2008 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
A module for regular expression matching based on derivatives of regular expressions.
The code was taken from Joe English ().
Tested and extended by Martin Schmidt.
Further references for the algorithm:
Janusz A. Brzozowski.
Derivatives of Regular Expressions. Journal of the ACM, Volume 11, Issue 4, 1964.
Mark Hopkins.
Regular Expression Package. Posted to comp.compilers, 1994.
Available per FTP at .
-}
-- ------------------------------------------------------------
module Text.XML.HXT.DTDValidation.RE
( RE(..)
, re_unit
, re_zero
, re_sym
, re_rep
, re_plus
, re_opt
, re_seq
, re_alt
, re_dot
, checkRE
, matches
, nullable
, printRE
)
where
import Data.List (foldl')
-- |
-- Data type for regular expressions.
data RE a =
RE_ZERO String --' L(0) = {} (empty set)
| RE_UNIT --' L(1) = { [] } (empty sequence)
| RE_SYM a --' L(x) = { [x] }
| RE_DOT --' accept any single symbol
| RE_REP (RE a) --' L(e*) = { [] } `union` L(e+)
| RE_PLUS (RE a) --' L(e+) = { x ++ y | x <- L(e), y <- L(e*) }
| RE_OPT (RE a) --' L(e?) = L(e) `union` { [] }
| RE_SEQ (RE a) (RE a) --' L(e,f) = { x ++ y | x <- L(e), y <- L(f) }
| RE_ALT (RE a) (RE a) --' L(e|f) = L(e) `union` L(f)
deriving (Show, Eq, Ord)
-- ------------------------------------------------------------
-- Constructor functions to simplify regular expressions when constructing them.
-- |
-- Constructs a regular expression for an empty set.
--
-- * 1.parameter errMsg : error message
--
-- - returns : regular expression for an empty set
re_zero :: String -> RE a
re_zero m = RE_ZERO m
-- |
-- Constructs a regular expression for an empty sequence.
--
-- - returns : regular expression for an empty sequence
re_unit :: RE a
re_unit = RE_UNIT
-- |
-- Constructs a regular expression for accepting a symbol
--
-- * 1.parameter sym : the symbol to be accepted
--
-- - returns : regular expression for accepting a symbol
re_sym :: a -> RE a
re_sym x = RE_SYM x
-- |
-- Constructs a regular expression for accepting any singel symbol
--
-- - returns : regular expression for accepting any singel symbol
re_dot :: RE a
re_dot = RE_DOT
-- |
-- Constructs an optional repetition (*) of a regular expression
--
-- * 1.parameter re_a : regular expression to be repeted
--
-- - returns : new regular expression
re_rep :: RE a -> RE a
re_rep RE_UNIT = RE_UNIT
re_rep (RE_ZERO _) = RE_UNIT
re_rep e@(RE_REP _) = RE_REP (rem_rep e) -- remove nested reps
re_rep e@(RE_ALT _ _) = RE_REP (rem_rep e) -- remove nested reps in alternatives
re_rep e = RE_REP e
-- |
-- remove redundant nested *'s in RE
-- theoretically this is unneccessary,
-- but without this simplification the runtime can increase exponentally
-- when computing deltas, e.g. for a** or (a|b*)* which is the same as (a|b)*
rem_rep :: RE a -> RE a
rem_rep (RE_ALT RE_UNIT e2) = e2
rem_rep (RE_ALT e1 e2) = RE_ALT (rem_rep e1) (rem_rep e2)
rem_rep (RE_REP e1) = rem_rep e1
rem_rep e1 = e1
-- |
-- Constructs a repetition (+) of a regular expression
--
-- * 1.parameter re_a : regular expression to be repeted
--
-- - returns : new regular expression
re_plus :: RE a -> RE a
re_plus RE_UNIT = RE_UNIT
re_plus (RE_ZERO m) = RE_ZERO m
re_plus e
| nullable e = re_rep e -- nullable e => e+ == e*
| otherwise = re_seq e (re_rep e)
-- |
-- Constructs an option (?) of a regular expression
--
-- * 1.parameter re_a : regular expression to be optional
--
-- - returns : new regular expression
re_opt :: (Ord a) => RE a -> RE a
re_opt RE_UNIT = RE_UNIT
re_opt (RE_ZERO _) = RE_UNIT
re_opt e = re_alt RE_UNIT e
-- |
-- Constructs a sequence (,) of two regular expressions
--
-- * 1.parameter re_a : first regular expression in sequence
--
-- - 2.parameter re_b : second regular expression in sequence
--
-- - returns : new regular expression
re_seq :: RE a -> RE a -> RE a
re_seq e1@(RE_ZERO _) _ = e1 -- simplification
re_seq RE_UNIT e2 = e2 -- simplification
re_seq _ e2@(RE_ZERO _) = e2 -- simplification
re_seq e1 RE_UNIT = e1 -- simplification
re_seq (RE_SEQ e11 e12) e2 = re_seq e11 (re_seq e12 e2) -- right assoc.
re_seq e1 e2 = RE_SEQ e1 e2
-- |
-- Constructs an alternative (|) of two regular expressions
--
-- * 1.parameter re_a : first regular expression of alternative
--
-- - 2.parameter re_b : second regular expression of alternative
--
-- - returns : new regular expression
re_alt :: (Ord a) => RE a -> RE a -> RE a
re_alt (RE_ZERO _) e2 = e2
re_alt e1 (RE_ZERO _) = e1
re_alt (RE_ALT e11 e12) e2 = re_alt e11 (re_alt e12 e2) -- is right assoc
re_alt e1 e2@(RE_ALT e21 e22)
| e1 == e21 = e2 -- duplicates removed, the effective rule
| e1 > e21 = re_alt e21 (re_alt e1 e22) -- sort alternatives
| otherwise = RE_ALT e1 e2
re_alt e1 e2
| e1 == e2 = e2 -- simplification, the effective rule
| e1 > e2 = re_alt e2 e1 -- sort alts for unique repr.
| otherwise = RE_ALT e1 e2
-- ------------------------------------------------------------
-- |
-- Checks if a regular expression matches the empty sequence.
--
-- nullable e == [] `in` L(e)
--
-- This check indicates if a regular expression fits to a sentence or not.
--
-- * 1.parameter re : regular expression to be checked
--
-- - returns : true if regular expression matches the empty sequence,
-- otherwise false
nullable :: RE a -> Bool
nullable (RE_ZERO _) = False
nullable RE_UNIT = True
nullable (RE_SYM _) = False
nullable (RE_REP _) = True
nullable (RE_PLUS e) = nullable e
nullable (RE_OPT _) = True
nullable (RE_SEQ e f) = nullable e && nullable f
nullable (RE_ALT e f) = nullable e || nullable f
nullable RE_DOT = False
-- |
-- Derives a regular expression with respect to one symbol.
--
-- L(delta e x) = x \ L(e)
--
-- * 1.parameter re : regular expression to be derived
--
-- - 2.parameter sym : the symbol on which the regular expression is applied
--
-- - returns : the derived regular expression
delta :: (Ord a, Show a) => RE a -> a -> RE a
delta re x = case re of
RE_ZERO _ -> re -- re_zero m
RE_UNIT -> re_zero ("Symbol " ++ show x ++ " unexpected.")
RE_SYM sym
| x == sym -> re_unit
| otherwise -> re_zero ("Symbol " ++ show sym ++ " expected, but symbol " ++ show x ++ " found.")
RE_REP e -> re_seq (delta e x) re -- (re_rep e)
RE_PLUS e -> re_seq (delta e x) (re_rep e)
RE_OPT e -> delta e x
RE_SEQ e f
| nullable e -> re_alt (re_seq (delta e x) f) (delta f x)
| otherwise -> re_seq (delta e x) f
RE_ALT e f -> re_alt (delta e x) (delta f x)
RE_DOT -> re_unit
-- |
-- Derives a regular expression with respect to a sentence.
--
-- * 1.parameter re : regular expression
--
-- - 2.parameter s : sentence to which the regular expression is applied
--
-- - returns : the derived regular expression
matches :: (Ord a, Show a) => RE a -> [a] -> RE a
matches e = foldl' delta e
-- |
-- Checks if an input matched a regular expression. The function should be
-- called after matches.
--
-- Was the sentence used in @matches@ in the language of the regular expression?
-- -> matches e s == s `in` L(e)?
--
-- * 1.parameter re : the derived regular expression
--
-- - returns : empty String if input matched the regular expression, otherwise
-- an error message is returned
checkRE :: (Eq a, Show a) => RE a -> String
checkRE (RE_UNIT) = ""
checkRE (RE_ZERO m) = m
checkRE re
| nullable re = ""
| otherwise = "Input must match " ++ printRE re
-- ------------------------------------------------------------
-- |
-- Constructs a string representation of a regular expression.
--
-- * 1.parameter re : a regular expression
--
-- - returns : the string representation of the regular expression
printRE :: (Eq a, Show a) => RE a -> String
printRE re'
= "( " ++ printRE1 re' ++ " )"
where
-- printRE1 :: (Eq a, Show a) => RE a -> String
printRE1 re = case re of
RE_ZERO m -> "ERROR: " ++ m
RE_UNIT -> ""
RE_SYM sym -> show sym
RE_DOT -> "."
RE_REP e
| isSingle e -> printRE1 e ++ "*"
| otherwise -> "(" ++ printRE1 e ++ ")*"
RE_PLUS e
| isSingle e -> printRE1 e ++ "+"
| otherwise -> "(" ++ printRE1 e ++ ")+"
RE_OPT e
| isSingle e -> printRE1 e ++ "?"
| otherwise -> "(" ++ printRE1 e ++ ")?"
RE_SEQ e1 (RE_REP e2)
| e1 == e2 -> printRE1 (RE_PLUS e1)
RE_SEQ e1 (RE_SEQ (RE_REP e2) e3)
| e1 == e2 -> printRE1 (RE_SEQ (RE_PLUS e1) e3)
RE_SEQ e f
| isAlt e && not (isAlt f) -> "(" ++ printRE1 e ++ ") , " ++ printRE1 f
| not (isAlt e) && isAlt f -> printRE1 e ++ " , (" ++ printRE1 f ++ ")"
| isAlt e && isAlt f -> "(" ++ printRE1 e ++ ") , (" ++ printRE1 f ++ ")"
| otherwise -> printRE1 e ++ " , " ++ printRE1 f
RE_ALT RE_UNIT f -> printRE1 (RE_OPT f)
RE_ALT e f
| isSeq e && not (isSeq f) -> "(" ++ printRE1 e ++ ") | " ++ printRE1 f
| not (isSeq e) && isSeq f -> printRE1 e ++ " | (" ++ printRE1 f ++ ")"
| isSeq e && isSeq f -> "(" ++ printRE1 e ++ ") | (" ++ printRE1 f ++ ")"
| otherwise -> printRE1 e ++ " | " ++ printRE1 f
isSingle :: RE a -> Bool
isSingle (RE_ZERO _) = True
isSingle RE_UNIT = True
isSingle (RE_SYM _) = True
isSingle _ = False
isSeq :: (Eq a) => RE a -> Bool
isSeq (RE_SEQ e1 (RE_REP e2))
| e1 == e2 = False -- is transformed back into RE_PLUS
isSeq (RE_SEQ _ _) = True
isSeq _ = False
isAlt :: RE a -> Bool
isAlt (RE_ALT RE_UNIT _)= False -- is transformed back into a RE_OPT
isAlt (RE_ALT _ _) = True
isAlt _ = False
hxt-9.3.1.22/src/Text/XML/HXT/DTDValidation/TypeDefs.hs 0000644 0000000 0000000 00000012147 12752557014 020277 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.DTDValidation.TypeDefs
Copyright : Copyright (C) 2008 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
This module provides all datatypes for DTD validation
-}
-- ------------------------------------------------------------
module Text.XML.HXT.DTDValidation.TypeDefs
( module Text.XML.HXT.DTDValidation.TypeDefs
, module Text.XML.HXT.DOM.Interface
, module Text.XML.HXT.Arrow.XmlArrow
, module Control.Arrow
, module Control.Arrow.ArrowList
, module Control.Arrow.ArrowIf
, module Control.Arrow.ArrowState
, module Control.Arrow.ArrowTree
, module Control.Arrow.ListArrow
, module Control.Arrow.StateListArrow
)
where
import Control.Arrow -- classes
import Control.Arrow.ArrowList
import Control.Arrow.ArrowIf
import Control.Arrow.ArrowState
import Control.Arrow.ArrowTree
import Control.Arrow.ListArrow -- arrow types
import Control.Arrow.StateListArrow
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.DOM.Interface
-- ------------------------------------------------------------
infixr 0 $$
type XmlArrow = LA XmlTree XmlTree
type XmlArrowS = LA XmlTree XmlTrees
-- ------------------------------------------------------------
dtd_name
, dtd_value
, dtd_type
, dtd_kind
, dtd_modifier
, dtd_default :: Attributes -> String
dtd_name = lookup1 a_name
dtd_value = lookup1 a_value
dtd_type = lookup1 a_type
dtd_kind = lookup1 a_kind
dtd_modifier = lookup1 a_modifier
dtd_default = lookup1 a_default
-- ------------------------------------------------------------
isUnparsedEntity :: ArrowDTD a => a XmlTree XmlTree
isUnparsedEntity = filterA $
getDTDAttrl >>> isA (hasEntry k_ndata)
hasDTDAttrValue :: ArrowDTD a => String -> (String -> Bool) -> a XmlTree XmlTree
hasDTDAttrValue an p = filterA $
getDTDAttrl >>> isA (p . lookup1 an)
isRequiredAttrKind :: ArrowDTD a => a XmlTree XmlTree
isRequiredAttrKind = hasDTDAttrValue a_kind (== k_required)
isDefaultAttrKind :: ArrowDTD a => a XmlTree XmlTree
isDefaultAttrKind = hasDTDAttrValue a_kind (== k_default)
isFixedAttrKind :: ArrowDTD a => a XmlTree XmlTree
isFixedAttrKind = hasDTDAttrValue a_kind (== k_fixed)
isMixedContentElement :: ArrowDTD a => a XmlTree XmlTree
isMixedContentElement = hasDTDAttrValue a_type (== v_mixed)
isEmptyElement :: ArrowDTD a => a XmlTree XmlTree
isEmptyElement = hasDTDAttrValue a_type (== k_empty)
isEnumAttrType :: ArrowDTD a => a XmlTree XmlTree
isEnumAttrType = hasDTDAttrValue a_type (== k_enumeration)
isIdAttrType :: ArrowDTD a => a XmlTree XmlTree
isIdAttrType = hasDTDAttrValue a_type (== k_id)
isIdRefAttrType :: ArrowDTD a => a XmlTree XmlTree
isIdRefAttrType = hasDTDAttrValue a_type (`elem` [k_idref, k_idrefs])
isNotationAttrType :: ArrowDTD a => a XmlTree XmlTree
isNotationAttrType = hasDTDAttrValue a_type (== k_notation)
isAttlistOfElement :: ArrowDTD a => String -> a XmlTree XmlTree
isAttlistOfElement el = isDTDAttlist
>>>
hasDTDAttrValue a_name (== el)
valueOfDTD :: String -> XmlTree -> String
valueOfDTD n = concat . runLA ( getDTDAttrl >>^ lookup1 n )
valueOf :: String -> XmlTree -> String
valueOf n = concat . runLA ( getAttrValue n )
getDTDAttributes :: XmlTree -> Attributes
getDTDAttributes = concat . runLA getDTDAttrl
isDTDDoctypeNode :: XmlTree -> Bool
isDTDDoctypeNode = not . null . runLA isDTDDoctype
isDTDElementNode :: XmlTree -> Bool
isDTDElementNode = not . null . runLA isDTDElement
isDTDAttlistNode :: XmlTree -> Bool
isDTDAttlistNode = not . null . runLA isDTDAttlist
isDTDContentNode :: XmlTree -> Bool
isDTDContentNode = not . null . runLA isDTDContent
isDTDNameNode :: XmlTree -> Bool
isDTDNameNode = not . null . runLA isDTDName
isElemNode :: XmlTree -> Bool
isElemNode = not . null . runLA isElem
nameOfAttr :: XmlTree -> String
nameOfAttr = concat . runLA (getAttrName >>^ qualifiedName)
nameOfElem :: XmlTree -> String
nameOfElem = concat . runLA (getElemName >>^ qualifiedName)
-- |
-- infix operator for applying an arrow to a list of trees
--
-- * 1.parameter f : the arrow
--
-- - 2.parameter ts : the list of trees
--
-- - returns : list of results
($$) :: XmlArrow -> XmlTrees -> XmlTrees
f $$ l = runLA (unlistA >>> f) l
-- | create an error message
msgToErr :: (String -> String) -> LA String XmlTree
msgToErr f = mkErr $< this
where
mkErr "" = none
mkErr s = err (f s)
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/DTDValidation/Validation.hs 0000644 0000000 0000000 00000011026 12752557014 020641 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.DTDValidation.Validation
Copyright : Copyright (C) 2008 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
This module provides functions for validating XML documents represented as
XmlTree.
Unlike other popular XML validation tools the validation functions return
a list of errors instead of aborting after the first error was found.
Note: The validation process has been split into validation and transformation!
If @validate@ did not report any errors, @transform@
should be called, to change the document the way a validating parser
is expected to do.
-}
-- ------------------------------------------------------------
module Text.XML.HXT.DTDValidation.Validation
( getDTDSubset
, generalEntitiesDefined
, validate
, validateDTD
, validateDoc
, removeDoublicateDefs
, transform
)
where
import Text.XML.HXT.DTDValidation.TypeDefs
import qualified Text.XML.HXT.DTDValidation.DocTransformation as DocTransformation
import qualified Text.XML.HXT.DTDValidation.DocValidation as DocValidation
import qualified Text.XML.HXT.DTDValidation.DTDValidation as DTDValidation
import qualified Text.XML.HXT.DTDValidation.IdValidation as IdValidation
-- |
-- Main validation filter. Check if the DTD and the document are valid.
--
--
-- - returns : a function which expects a complete document as XmlTree input
-- and returns a list of all errors found.
validate :: XmlArrow
validate = validateDTD <+> validateDoc
-- |
-- Check if the DTD is valid.
--
--
-- - returns : a function which expects an XmlTree from the parser as input
-- and returns a list of all errors found in the DTD.
validateDTD :: XmlArrow
validateDTD = choiceA
[ getDTDSubset :-> DTDValidation.validateDTD
, this :-> err "Can't validate DTD: There is no DOCTYPE declaration in the document."
]
-- |
-- Check if the document corresponds to the given DTD.
--
--
-- - returns : a function which expects a complete document as XmlTree input
-- and returns a list of all errors found in the content part.
validateDoc :: XmlArrow
validateDoc
= validateDoc' $< getDTD
where
validateDoc' [] = err "Can't validate document: There is no DOCTYPE declaration in the document."
validateDoc' (dtdPart:_) = DocValidation.validateDoc dtdPart
<+>
IdValidation.validateIds dtdPart
getDTD :: XmlArrowS
getDTD = listA ( getDTDSubset
>>>
removeDoublicateDefs
)
-- |
-- filter for transforming a document with respect to the given DTD.
--
-- Validating parsers
-- are expected to normalize attribute values and add default values.
-- This function should be called after a successful validation.
--
--
-- - returns : a function which expects a complete XML document tree
-- and returns the transformed XmlTree
transform :: XmlArrow
transform = choiceA
[ isRoot :-> (transformDoc $< getDTD)
, this :-> fatal "Can't transform document: No document root given"
]
where
transformDoc [] = this
transformDoc dtd = DocTransformation.transform (head dtd)
-- |
-- Removes doublicate declarations from the DTD which first declaration is
-- binding. This is the case for ATTLIST and ENTITY declarations.
--
--
-- - returns : A function that replaces the children of DOCTYPE nodes by a list
-- where all multiple declarations are removed.
removeDoublicateDefs :: XmlArrow
removeDoublicateDefs = DTDValidation.removeDoublicateDefs
--
-- selects the DTD part of a document
-- but only, if there is more than the internal part for the 4 predefined XML entities
getDTDSubset :: XmlArrow
getDTDSubset = getChildren
>>>
( filterA $ isDTDDoctype >>> getDTDAttrl >>> isA (hasEntry a_name) )
generalEntitiesDefined :: XmlArrow
generalEntitiesDefined = getDTDSubset
>>>
deep isDTDEntity
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/DTDValidation/XmlRE.hs 0000644 0000000 0000000 00000007140 12752557014 017540 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.DTDValidation.XmlRE
Copyright : Copyright (C) 2008 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
A module for regular expression matching, adapted for XML DTDs.
This module is based on the module RE.
-}
-- ------------------------------------------------------------
module Text.XML.HXT.DTDValidation.XmlRE
( RE
, checkRE
, matches
, printRE
, re_unit
, re_zero
, re_sym
, re_rep
, re_plus
, re_opt
, re_seq
, re_alt
, re_dot
)
where
-- import Debug.Trace (trace)
import Data.List (foldl')
import Text.XML.HXT.DTDValidation.RE hiding (matches)
import Text.XML.HXT.Arrow.Edit (removeComment,
removeWhiteSpace)
import qualified Text.XML.HXT.DOM.XmlNode as XN
import Text.XML.HXT.DTDValidation.TypeDefs
-- |
-- Derives a regular expression with respect to a list of elements.
--
-- * 1.parameter re : regular expression
--
-- - 2.parameter list : list of elements to which the regular expression is applied
--
-- - returns : the derived regular expression
matches :: RE String -> XmlTrees -> RE String
matches re list
= foldl' delta re (removeUnimportantStuff $$ list)
where
removeUnimportantStuff :: XmlArrow
removeUnimportantStuff = processBottomUp (removeWhiteSpace >>> removeComment)
-- trace of growth of REs
-- delta' re el = delta (trace (("RE : " ++) . (++ "\n" ) . show $ re) re) el
-- |
-- Derives a regular expression with respect to one element.
--
-- L(delta e x) = x \ L(e)
--
-- * 1.parameter re : regular expression to be derived
--
-- - 2.parameter el : the element on which the regular expression is applied
--
-- - returns : the derived regular expression
delta :: RE String -> XmlTree -> RE String
delta re el
| not (allowed el) = re
| otherwise = case re of
RE_ZERO m -> re_zero m
RE_UNIT -> re_zero (elemName el ++" unexpected.")
RE_SYM sym
| sym == k_pcdata -> if ((XN.isText el) || (XN.isCdata el))
then re_unit
else re_zero ("Character data expected, but "++ elemName el ++" found.")
| expectedNode el sym -> re_unit
| otherwise -> re_zero ("Element "++ show sym ++" expected, but "++ elemName el ++" found.")
RE_REP e -> re_seq (delta e el) (re_rep e)
RE_PLUS e -> re_seq (delta e el) (re_rep e)
RE_OPT e -> delta e el
RE_SEQ e f
| nullable e -> re_alt (re_seq (delta e el) f) (delta f el)
| otherwise -> re_seq (delta e el) f
RE_ALT e f -> re_alt (delta e el) (delta f el)
RE_DOT -> re_unit
where
expectedNode :: XmlTree -> String -> Bool
expectedNode n sym
| XN.isElem n = nameOfElem n == sym
| otherwise = False
elemName :: XmlTree -> String
elemName n
| XN.isElem n = "element "++ show (nameOfElem n)
| otherwise = "character data"
allowed :: XmlTree -> Bool
allowed n = XN.isElem n || XN.isText n || XN.isCdata n
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/IO/GetFILE.hs 0000644 0000000 0000000 00000007361 12752557014 015616 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.IO.GetFILE
Copyright : Copyright (C) 2008 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
The GET method for file protocol
-}
-- ------------------------------------------------------------
module Text.XML.HXT.IO.GetFILE
( getStdinCont
, getCont
)
where
import Control.Exception ( try )
import qualified Data.ByteString.Lazy as B
import Network.URI ( unEscapeString
)
import System.IO.Error ( ioeGetErrorString
)
import System.Directory ( doesFileExist
-- , getPermissions
-- , readable
)
import Text.XML.HXT.DOM.XmlKeywords
-- ------------------------------------------------------------
getStdinCont :: Bool -> IO (Either ([(String, String)], String) B.ByteString)
getStdinCont strictInput
= do
c <- try ( do
cb <- B.getContents
if strictInput
then B.length cb `seq` return cb
else return cb
)
return (either readErr Right c)
where
readErr e
= Left ( [ (transferStatus, "999")
, (transferMessage, msg)
]
, msg
)
where
msg = "stdin read error: " ++ es
es = ioeGetErrorString e
getCont :: Bool -> String -> IO (Either ([(String, String)], String) B.ByteString)
getCont strictInput source
= do -- preliminary
source'' <- checkFile source'
case source'' of
Nothing -> return $ fileErr "file not found"
Just fn -> do
-- perm <- getPermissions fn -- getPermission may fail
-- if not (readable perm)
if False
then return $ fileErr "file not readable"
else do
c <- try $
do
cb <- B.readFile fn
if strictInput
then B.length `seq` return cb
else return cb
return (either readErr Right c)
where
source' = drivePath $ source
readErr e
= fileErr (ioeGetErrorString e)
fileErr msg0
= Left ( [ (transferStatus, "999")
, (transferMessage, msg)
]
, msg
)
where
msg = "file read error: " ++ show msg0 ++ " when accessing " ++ show source'
-- remove leading / if file starts with windows drive letter, e.g. /c:/windows -> c:/windows
drivePath ('/' : file@(d : ':' : _more))
| d `elem` ['A'..'Z'] || d `elem` ['a'..'z']
= file
drivePath file
= file
-- | check whether file exists, if not
-- try to unescape filename and check again
-- return the existing filename
checkFile :: String -> IO (Maybe String)
checkFile fn
= do
exists <- doesFileExist fn
if exists
then return (Just fn)
else do
exists' <- doesFileExist fn'
return ( if exists'
then Just fn'
else Nothing
)
where
fn' = unEscapeString fn
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Parser/HtmlParsec.hs 0000644 0000000 0000000 00000052756 13001373351 017422 0 ustar 00 0000000 0000000 {-# LANGUAGE CPP #-}
-- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Parser.HtmlParsec
Copyright : Copyright (C) 2005 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
This parser tries to interprete everything as HTML
no errors are emitted during parsing. If something looks
weired, warning messages are inserted in the document tree.
All filter are pure XmlFilter,
errror handling and IO is done in 'Text.XML.HXT.Parser.HtmlParser'
or other modules
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Parser.HtmlParsec
( parseHtmlText
, parseHtmlDocument
, parseHtmlContent
, isEmptyHtmlTag
, isInnerHtmlTagOf
, closesHtmlTag
, emptyHtmlTags
)
where
#if MIN_VERSION_base(4,8,2)
#else
import Control.Applicative ((<$>))
#endif
import Data.Char ( toLower
, toUpper
)
import Data.Char.Properties.XMLCharProps ( isXmlChar
)
import Data.Maybe ( fromMaybe
, fromJust
)
import qualified Data.Map as M
import Text.ParserCombinators.Parsec ( SourcePos
, anyChar
, between
-- , char
, eof
, getPosition
, many
, many1
, noneOf
, option
, runParser
, satisfy
, string
, try
, (<|>)
)
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.DOM.XmlNode ( mkText'
, mkError'
, mkCdata'
, mkCmt'
, mkCharRef'
, mkElement'
, mkAttr'
, mkDTDElem'
, mkPi'
, isEntityRef
, getEntityRef
)
import Text.XML.HXT.Parser.XmlTokenParser ( allBut
, amp
, dq
, eq
, gt
, lt
, name
, pubidLiteral
, skipS
, skipS0
, sPace
, sq
, systemLiteral
, checkString
, singleCharsT
, referenceT
, mergeTextNodes
)
import Text.XML.HXT.Parser.XmlParsec ( misc
, parseXmlText
, xMLDecl'
)
import Text.XML.HXT.Parser.XmlCharParser ( xmlChar
, SimpleXParser
, withNormNewline
)
import Text.XML.HXT.Parser.XhtmlEntities ( xhtmlEntities
)
-- ------------------------------------------------------------
parseHtmlText :: String -> XmlTree -> XmlTrees
parseHtmlText loc t = parseXmlText htmlDocument (withNormNewline ()) loc $ t
-- ------------------------------------------------------------
parseHtmlFromString :: SimpleXParser XmlTrees -> String -> String -> XmlTrees
parseHtmlFromString parser loc
= either ((:[]) . mkError' c_err . (++ "\n") . show) id . runParser parser (withNormNewline ()) loc
parseHtmlDocument :: String -> String -> XmlTrees
parseHtmlDocument = parseHtmlFromString htmlDocument
parseHtmlContent :: String -> XmlTrees
parseHtmlContent = parseHtmlFromString htmlContent "string"
-- ------------------------------------------------------------
type Context = (XmlTreeFl, OpenTags)
type XmlTreeFl = XmlTrees -> XmlTrees
type OpenTags = [(String, XmlTrees, XmlTreeFl)]
-- ------------------------------------------------------------
htmlDocument :: SimpleXParser XmlTrees
htmlDocument
= do
pl <- htmlProlog
el <- htmlContent
eof
return (pl ++ el)
htmlProlog :: SimpleXParser XmlTrees
htmlProlog
= do
xml <- option []
( try xMLDecl'
<|>
( do
pos <- getPosition
checkString ""
return $ [mkError' c_warn (show pos ++ " wrong XML declaration")]
)
)
misc1 <- many misc
dtdPart <- option []
( try doctypedecl
<|>
( do
pos <- getPosition
upperCaseString " htmlContent'
htmlContent' :: SimpleXParser XmlTrees
htmlContent'
= option []
( do
context <- hContent (id, [])
pos <- getPosition
return $ closeTags pos context
)
where
closeTags _pos (body, [])
= body []
closeTags pos' (body, ((tn, al, body1) : restOpen))
= closeTags pos'
( addHtmlWarn (show pos' ++ ": no closing tag found for \"<" ++ tn ++ " ...>\"")
.
addHtmlTag tn al body
$
(body1, restOpen)
)
-- ------------------------------------------------------------
hElement :: Context -> SimpleXParser Context
hElement context
= ( do
t <- hSimpleData
return (addHtmlElem t context)
)
<|>
hCloseTag context
<|>
hOpenTag context
<|>
( do -- wrong tag, take it as text
pos <- getPosition
c <- xmlChar
return ( addHtmlWarn (show pos ++ " markup char " ++ show c ++ " not allowed in this context")
.
addHtmlElem (mkText' [c])
$
context
)
)
<|>
( do
pos <- getPosition
c <- anyChar
return ( addHtmlWarn ( show pos
++ " illegal data in input or illegal XML char "
++ show c
++ " found and ignored, possibly wrong encoding scheme used")
$
context
)
)
hSimpleData :: SimpleXParser XmlTree
hSimpleData
= charData''
<|>
hReference'
<|>
hComment
<|>
hpI
<|>
hcDSect
where
charData''
= do
t <- many1 (satisfy (\ x -> isXmlChar x && not (x == '<' || x == '&')))
return (mkText' t)
hCloseTag :: Context -> SimpleXParser Context
hCloseTag context
= do
checkString ""
n <- lowerCaseName
skipS0
pos <- getPosition
checkSymbol gt ("closing > in tag \"" ++ n ++ "\" expected") (closeTag pos n context)
hOpenTag :: Context -> SimpleXParser Context
hOpenTag context
= ( do
e <- hOpenTagStart
hOpenTagRest e context
)
hOpenTagStart :: SimpleXParser ((SourcePos, String), XmlTrees)
hOpenTagStart
= do
np <- try ( do
lt
pos <- getPosition
n <- lowerCaseName
return (pos, n)
)
skipS0
as <- hAttrList
return (np, as)
hOpenTagRest :: ((SourcePos, String), XmlTrees) -> Context -> SimpleXParser Context
hOpenTagRest ((pos, tn), al) context
= ( do
checkString "/>"
return (addHtmlTag tn al id context)
)
<|>
( do
context1 <- checkSymbol gt ("closing > in tag \"<" ++ tn ++ "...\" expected") context
return ( let context2 = closePrevTag pos tn context1
in
( if isEmptyHtmlTag tn
then addHtmlTag tn al id
else openTag tn al
) context2
)
)
hAttrList :: SimpleXParser XmlTrees
hAttrList
= many (try hAttribute)
where
hAttribute
= do
n <- lowerCaseName
v <- hAttrValue
skipS0
return $ mkAttr' (mkName n) v
hAttrValue :: SimpleXParser XmlTrees
hAttrValue
= option []
( eq >> hAttrValue' )
hAttrValue' :: SimpleXParser XmlTrees
hAttrValue'
= try ( between dq dq (hAttrValue'' "&\"") )
<|>
try ( between sq sq (hAttrValue'' "&\'") )
<|>
( do -- HTML allows unquoted attribute values
cs <- many (noneOf " \r\t\n>\"\'")
return [mkText' cs]
)
hAttrValue'' :: String -> SimpleXParser XmlTrees
hAttrValue'' notAllowed
= many ( hReference' <|> singleCharsT notAllowed)
hReference' :: SimpleXParser XmlTree
hReference'
= try hReferenceT
<|>
( do
amp
return (mkText' "&")
)
hReferenceT :: SimpleXParser XmlTree
hReferenceT
= do
r <- referenceT
return ( if isEntityRef r
then substRef r
else r
)
where
-- optimization: HTML entity refs are substituted by char refs, so a later entity ref substituion isn't required
substRef r
= case (lookup en xhtmlEntities) of
Just i -> mkCharRef' i
Nothing -> r -- not found: the entity ref remains as it is
-- this is also done in the XML parser
{- alternative def
Nothing -> mkText' ("&" ++ en ++ ";") -- not found: the entity ref is taken as text
-}
where
en = fromJust . getEntityRef $ r
hContent :: Context -> SimpleXParser Context
hContent context
= option context
( hElement context
>>=
hContent
)
-- ------------------------------------------------------------
-- hComment allows "--" in comments
-- comment from XML spec does not
hComment :: SimpleXParser XmlTree
hComment
= do
checkString ""
closeCmt pos c
where
closeCmt pos c
= ( do
checkString "-->"
return (mkCmt' c)
)
<|>
( return $
mkError' c_warn (show pos ++ " no closing comment sequence \"-->\" found")
)
-- ------------------------------------------------------------
hpI :: SimpleXParser XmlTree
hpI = checkString ""
>>
( try ( do
n <- name
p <- sPace >> allBut many "?>"
string "?>" >>
return (mkPi' (mkName n) [mkAttr' (mkName a_value) [mkText' p]])
)
<|>
( do
pos <- getPosition
return $
mkError' c_warn (show pos ++ " illegal PI found")
)
)
-- ------------------------------------------------------------
hcDSect :: SimpleXParser XmlTree
hcDSect
= do
checkString ""
closeCD pos t
where
closeCD pos t
= ( do
checkString "]]>"
return (mkCdata' t)
)
<|>
( return $
mkError' c_warn (show pos ++ " no closing CDATA sequence \"]]>\" found")
)
-- ------------------------------------------------------------
checkSymbol :: SimpleXParser () -> String -> Context -> SimpleXParser Context
checkSymbol p msg context
= ( p
>>
return context
)
<|>
( do
pos <- getPosition
return $ addHtmlWarn (show pos ++ " " ++ msg) context
)
lowerCaseName :: SimpleXParser String
lowerCaseName
= do
n <- name
return (map toLower n)
upperCaseString :: String -> SimpleXParser ()
upperCaseString s
= try (sequence (map (\ c -> satisfy (( == c) . toUpper)) s)) >> return ()
-- ------------------------------------------------------------
addHtmlTag :: String -> XmlTrees -> XmlTreeFl -> Context -> Context
addHtmlTag tn al body context
= e `seq`
addHtmlElem e context
where
e = mkElement' (mkName tn) al (body [])
addHtmlWarn :: String -> Context -> Context
addHtmlWarn msg
= addHtmlElem (mkError' c_warn msg)
addHtmlElem :: XmlTree -> Context -> Context
addHtmlElem elem' (body, openTags)
= (body . (elem' :), openTags)
openTag :: String -> XmlTrees -> Context -> Context
openTag tn al (body, openTags)
= (id, (tn, al, body) : openTags)
closeTag :: SourcePos -> String -> Context -> Context
closeTag pos n context
| n `elem` (map ( \ (n1, _, _) -> n1) $ snd context)
= closeTag' n context
| otherwise
= addHtmlWarn (show pos ++ " no opening tag found for " ++ n ++ ">")
.
addHtmlTag n [] id
$
context
where
closeTag' n' (body', (n1, al1, body1) : restOpen)
= close context1
where
context1
= addHtmlTag n1 al1 body' (body1, restOpen)
close
| n' == n1
= id
| n1 `isInnerHtmlTagOf` n'
= closeTag pos n'
| otherwise
= addHtmlWarn (show pos ++ " no closing tag found for \"<" ++ n1 ++ " ...>\"")
.
closeTag' n'
closeTag' _ _
= error "illegal argument for closeTag'"
closePrevTag :: SourcePos -> String -> Context -> Context
closePrevTag _pos _n context@(_body, [])
= context
closePrevTag pos n context@(body, (n1, al1, body1) : restOpen)
| n `closesHtmlTag` n1
= closePrevTag pos n
( addHtmlWarn (show pos ++ " tag \"<" ++ n1 ++ " ...>\" implicitly closed by opening tag \"<" ++ n ++ " ...>\"")
.
addHtmlTag n1 al1 body
$
(body1, restOpen)
)
| otherwise
= context
-- ------------------------------------------------------------
--
-- taken from HaXml and extended
isEmptyHtmlTag :: String -> Bool
isEmptyHtmlTag n
= n `elem`
emptyHtmlTags
emptyHtmlTags :: [String]
emptyHtmlTags
= [ "area"
, "base"
, "br"
, "col"
, "frame"
, "hr"
, "img"
, "input"
, "link"
, "meta"
, "param"
]
{-# INLINE emptyHtmlTags #-}
isInnerHtmlTagOf :: String -> String -> Bool
n `isInnerHtmlTagOf` tn
= n `elem`
( fromMaybe [] . lookup tn
$ [ ("body", ["p"])
, ("caption", ["p"])
, ("dd", ["p"])
, ("div", ["p"])
, ("dl", ["dt","dd"])
, ("dt", ["p"])
, ("li", ["p"])
, ("map", ["p"])
, ("object", ["p"])
, ("ol", ["li"])
, ("table", ["th","tr","td","thead","tfoot","tbody"])
, ("tbody", ["th","tr","td"])
, ("td", ["p"])
, ("tfoot", ["th","tr","td"])
, ("th", ["p"])
, ("thead", ["th","tr","td"])
, ("tr", ["th","td"])
, ("ul", ["li"])
]
)
-- a bit more efficient implementation of closes
closesHtmlTag :: String -> String -> Bool
closesHtmlTag t t2
= fromMaybe False . fmap ($ t) . M.lookup t2 $ closedByTable
{-# INLINE closesHtmlTag #-}
closedByTable :: M.Map String (String -> Bool)
closedByTable
= M.fromList $
[ ("a", (== "a"))
, ("li", (== "li" ))
, ("th", (`elem` ["th", "td", "tr"] ))
, ("td", (`elem` ["th", "td", "tr"] ))
, ("tr", (== "tr"))
, ("dt", (`elem` ["dt", "dd"] ))
, ("dd", (`elem` ["dt", "dd"] ))
, ("p", (`elem` ["hr"
, "h1", "h2", "h3", "h4", "h5", "h6", "dl", "ol", "ul", "table", "div", "p"] ))
, ("colgroup", (`elem` ["colgroup", "thead", "tfoot", "tbody"] ))
, ("form", (`elem` ["form"] ))
, ("label", (`elem` ["label"] ))
, ("map", (`elem` ["map"] ))
, ("option", const True)
, ("script", const True)
, ("style", const True)
, ("textarea", const True)
, ("title", const True)
, ("select", ( /= "option"))
, ("thead", (`elem` ["tfoot","tbody"] ))
, ("tbody", (== "tbody" ))
, ("tfoot", (== "tbody" ))
, ("h1", (`elem` ["h1", "h2", "h3", "h4", "h5", "h6", "dl", "ol", "ul", "table", "div", "p"] ))
, ("h2", (`elem` ["h1", "h2", "h3", "h4", "h5", "h6", "dl", "ol", "ul", "table", "div", "p"] ))
, ("h3", (`elem` ["h1", "h2", "h3", "h4", "h5", "h6", "dl", "ol", "ul", "table", "div", "p"] ))
, ("h4", (`elem` ["h1", "h2", "h3", "h4", "h5", "h6", "dl", "ol", "ul", "table", "div", "p"] ))
, ("h5", (`elem` ["h1", "h2", "h3", "h4", "h5", "h6", "dl", "ol", "ul", "table", "div", "p"] ))
, ("h6", (`elem` ["h1", "h2", "h3", "h4", "h5", "h6", "dl", "ol", "ul", "table", "div", "p"] ))
]
{-
closesHtmlTag :: String -> String -> Bool
closesHtmlTag = closes
closes :: String -> String -> Bool
"a" `closes` "a" = True
"li" `closes` "li" = True
"th" `closes` t | t `elem` ["th","td"] = True
"td" `closes` t | t `elem` ["th","td"] = True
"tr" `closes` t | t `elem` ["th","td","tr"] = True
"dt" `closes` t | t `elem` ["dt","dd"] = True
"dd" `closes` t | t `elem` ["dt","dd"] = True
"hr" `closes` "p" = True
"colgroup"
`closes` "colgroup" = True
"form" `closes` "form" = True
"label" `closes` "label" = True
"map" `closes` "map" = True
"object"
`closes` "object" = True
_ `closes` t | t `elem` ["option"
,"script"
,"style"
,"textarea"
,"title"
] = True
t `closes` "select" | t /= "option" = True
"thead" `closes` t | t `elem` ["colgroup"] = True
"tfoot" `closes` t | t `elem` ["thead"
,"colgroup"] = True
"tbody" `closes` t | t `elem` ["tbody"
,"tfoot"
,"thead"
,"colgroup"] = True
t `closes` t2 | t `elem` ["h1","h2","h3"
,"h4","h5","h6"
,"dl","ol","ul"
,"table"
,"div","p"
]
&&
t2 `elem` ["h1","h2","h3"
,"h4","h5","h6"
,"p" -- not "div"
] = True
_ `closes` _ = False
-}
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Parser/ProtocolHandlerUtil.hs 0000644 0000000 0000000 00000003524 12752557014 021316 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{-
Module : Text.XML.HXT.Parser.ProtocolHandlerUtil
Copyright : Copyright (C) 2008 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
Protocol handler utility functions
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Parser.ProtocolHandlerUtil
( parseContentType
)
where
import Text.XML.HXT.DOM.XmlKeywords
import Text.XML.HXT.DOM.Util ( stringToUpper
, stringTrim
)
import qualified Text.ParserCombinators.Parsec as P
-- ------------------------------------------------------------
-- |
-- Try to extract charset spec from Content-Type header
-- e.g. \"text\/html; charset=ISO-8859-1\"
--
-- Sometimes the server deliver the charset spec in quotes
-- these are removed
parseContentType :: P.Parser [(String, String)]
parseContentType
= P.try ( do
mimeType <- ( do
mt <- P.many (P.noneOf ";")
rtMT mt
)
charset <- ( do
_ <- P.char ';'
_ <- P.many (P.oneOf " \t'")
_ <- P.string "charset="
_ <- P.option '"' (P.oneOf "\"'")
cs <- P.many1 (P.noneOf "\"'")
return [ (transferEncoding, stringToUpper cs) ]
)
return (mimeType ++ charset)
)
P.<|>
( do
mt <- P.many (P.noneOf ";")
rtMT mt
)
where
rtMT mt = return [ (transferMimeType, stringTrim mt) ]
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Parser/XhtmlEntities.hs 0000644 0000000 0000000 00000023271 12752557014 020163 0 ustar 00 0000000 0000000 -- |
-- XHTML Entity References
--
-- This module defines a table of all
-- predefined XHTML entity references
-- for special or none ASCII chars including the
-- predefined XML entity refs
module Text.XML.HXT.Parser.XhtmlEntities
( xhtmlEntities
)
where
import Text.XML.HXT.Parser.XmlEntities
( xmlEntities )
-- ------------------------------------------------------------
-- | table with all XHTML entity refs and corresponding unicode values
xhtmlEntities :: [(String, Int)]
xhtmlEntities = xmlEntities
++
[ ("nbsp", 160)
, ("iexcl", 161)
, ("cent", 162)
, ("pound", 163)
, ("curren", 164)
, ("yen", 165)
, ("brvbar", 166)
, ("sect", 167)
, ("uml", 168)
, ("copy", 169)
, ("ordf", 170)
, ("laquo", 171)
, ("not", 172)
, ("shy", 173)
, ("reg", 174)
, ("macr", 175)
, ("deg", 176)
, ("plusmn", 177)
, ("sup2", 178)
, ("sup3", 179)
, ("acute", 180)
, ("micro", 181)
, ("para", 182)
, ("middot", 183)
, ("cedil", 184)
, ("sup1", 185)
, ("ordm", 186)
, ("raquo", 187)
, ("frac14", 188)
, ("frac12", 189)
, ("frac34", 190)
, ("iquest", 191)
, ("Agrave", 192)
, ("Aacute", 193)
, ("Acirc", 194)
, ("Atilde", 195)
, ("Auml", 196)
, ("Aring", 197)
, ("AElig", 198)
, ("Ccedil", 199)
, ("Egrave", 200)
, ("Eacute", 201)
, ("Ecirc", 202)
, ("Euml", 203)
, ("Igrave", 204)
, ("Iacute", 205)
, ("Icirc", 206)
, ("Iuml", 207)
, ("ETH", 208)
, ("Ntilde", 209)
, ("Ograve", 210)
, ("Oacute", 211)
, ("Ocirc", 212)
, ("Otilde", 213)
, ("Ouml", 214)
, ("times", 215)
, ("Oslash", 216)
, ("Ugrave", 217)
, ("Uacute", 218)
, ("Ucirc", 219)
, ("Uuml", 220)
, ("Yacute", 221)
, ("THORN", 222)
, ("szlig", 223)
, ("agrave", 224)
, ("aacute", 225)
, ("acirc", 226)
, ("atilde", 227)
, ("auml", 228)
, ("aring", 229)
, ("aelig", 230)
, ("ccedil", 231)
, ("egrave", 232)
, ("eacute", 233)
, ("ecirc", 234)
, ("euml", 235)
, ("igrave", 236)
, ("iacute", 237)
, ("icirc", 238)
, ("iuml", 239)
, ("eth", 240)
, ("ntilde", 241)
, ("ograve", 242)
, ("oacute", 243)
, ("ocirc", 244)
, ("otilde", 245)
, ("ouml", 246)
, ("divide", 247)
, ("oslash", 248)
, ("ugrave", 249)
, ("uacute", 250)
, ("ucirc", 251)
, ("uuml", 252)
, ("yacute", 253)
, ("thorn", 254)
, ("yuml", 255)
, ("OElig", 338)
, ("oelig", 339)
, ("Scaron", 352)
, ("scaron", 353)
, ("Yuml", 376)
, ("circ", 710)
, ("tilde", 732)
, ("ensp", 8194)
, ("emsp", 8195)
, ("thinsp", 8201)
, ("zwnj", 8204)
, ("zwj", 8205)
, ("lrm", 8206)
, ("rlm", 8207)
, ("ndash", 8211)
, ("mdash", 8212)
, ("lsquo", 8216)
, ("rsquo", 8217)
, ("sbquo", 8218)
, ("ldquo", 8220)
, ("rdquo", 8221)
, ("bdquo", 8222)
, ("dagger", 8224)
, ("Dagger", 8225)
, ("permil", 8240)
, ("lsaquo", 8249)
, ("rsaquo", 8250)
, ("euro", 8364)
, ("fnof", 402)
, ("Alpha", 913)
, ("Beta", 914)
, ("Gamma", 915)
, ("Delta", 916)
, ("Epsilon", 917)
, ("Zeta", 918)
, ("Eta", 919)
, ("Theta", 920)
, ("Iota", 921)
, ("Kappa", 922)
, ("Lambda", 923)
, ("Mu", 924)
, ("Nu", 925)
, ("Xi", 926)
, ("Omicron", 927)
, ("Pi", 928)
, ("Rho", 929)
, ("Sigma", 931)
, ("Tau", 932)
, ("Upsilon", 933)
, ("Phi", 934)
, ("Chi", 935)
, ("Psi", 936)
, ("Omega", 937)
, ("alpha", 945)
, ("beta", 946)
, ("gamma", 947)
, ("delta", 948)
, ("epsilon", 949)
, ("zeta", 950)
, ("eta", 951)
, ("theta", 952)
, ("iota", 953)
, ("kappa", 954)
, ("lambda", 955)
, ("mu", 956)
, ("nu", 957)
, ("xi", 958)
, ("omicron", 959)
, ("pi", 960)
, ("rho", 961)
, ("sigmaf", 962)
, ("sigma", 963)
, ("tau", 964)
, ("upsilon", 965)
, ("phi", 966)
, ("chi", 967)
, ("psi", 968)
, ("omega", 969)
, ("thetasym", 977)
, ("upsih", 978)
, ("piv", 982)
, ("bull", 8226)
, ("hellip", 8230)
, ("prime", 8242)
, ("Prime", 8243)
, ("oline", 8254)
, ("frasl", 8260)
, ("weierp", 8472)
, ("image", 8465)
, ("real", 8476)
, ("trade", 8482)
, ("alefsym", 8501)
, ("larr", 8592)
, ("uarr", 8593)
, ("rarr", 8594)
, ("darr", 8595)
, ("harr", 8596)
, ("crarr", 8629)
, ("lArr", 8656)
, ("uArr", 8657)
, ("rArr", 8658)
, ("dArr", 8659)
, ("hArr", 8660)
, ("forall", 8704)
, ("part", 8706)
, ("exist", 8707)
, ("empty", 8709)
, ("nabla", 8711)
, ("isin", 8712)
, ("notin", 8713)
, ("ni", 8715)
, ("prod", 8719)
, ("sum", 8721)
, ("minus", 8722)
, ("lowast", 8727)
, ("radic", 8730)
, ("prop", 8733)
, ("infin", 8734)
, ("ang", 8736)
, ("and", 8743)
, ("or", 8744)
, ("cap", 8745)
, ("cup", 8746)
, ("int", 8747)
, ("there4", 8756)
, ("sim", 8764)
, ("cong", 8773)
, ("asymp", 8776)
, ("ne", 8800)
, ("equiv", 8801)
, ("le", 8804)
, ("ge", 8805)
, ("sub", 8834)
, ("sup", 8835)
, ("nsub", 8836)
, ("sube", 8838)
, ("supe", 8839)
, ("oplus", 8853)
, ("otimes", 8855)
, ("perp", 8869)
, ("sdot", 8901)
, ("lceil", 8968)
, ("rceil", 8969)
, ("lfloor", 8970)
, ("rfloor", 8971)
, ("lang", 9001)
, ("rang", 9002)
, ("loz", 9674)
, ("spades", 9824)
, ("clubs", 9827)
, ("hearts", 9829)
, ("diams", 9830)
]
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Parser/XmlCharParser.hs 0000644 0000000 0000000 00000007675 14025461263 020102 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Parser.XmlCharParser
Copyright : Copyright (C) 2010 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
UTF-8 character parser and simple XML token parsers
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Parser.XmlCharParser
( XParser
, SimpleXParser
, XPState(..)
, withNormNewline
, withoutNormNewline
, xmlChar -- xml char parsers
, xmlNameChar
, xmlNameStartChar
, xmlNCNameChar
, xmlNCNameStartChar
, xmlLetter
, xmlSpaceChar
, xmlCRLFChar
)
where
import Data.Char.Properties.XMLCharProps (isXmlCharCR, isXmlLetter,
isXmlNCNameChar,
isXmlNCNameStartChar,
isXmlNameChar,
isXmlNameStartChar,
isXmlSpaceCharCR)
import Data.String.Unicode
import Text.ParserCombinators.Parsec
-- ------------------------------------------------------------
type XParser s a = GenParser Char (XPState s) a
type SimpleXParser a = XParser () a
data XPState s = XPState
{ xps_normalizeNewline :: !Bool
, xps_userState :: s
}
withNormNewline :: a -> XPState a
withNormNewline x = XPState True x
withoutNormNewline :: a -> XPState a
withoutNormNewline x = XPState False x
-- ------------------------------------------------------------
--
-- Char (2.2)
--
-- |
-- parse a single Unicode character
xmlChar :: XParser s Unicode
xmlChar = ( satisfy isXmlCharCR
<|>
xmlCRLFChar
)
> "legal XML character"
{-# INLINE xmlChar #-}
-- |
-- parse a XML name character
xmlNameChar :: XParser s Unicode
xmlNameChar = satisfy isXmlNameChar > "legal XML name character"
{-# INLINE xmlNameChar #-}
-- |
-- parse a XML name start character
xmlNameStartChar :: XParser s Unicode
xmlNameStartChar = satisfy isXmlNameStartChar > "legal XML name start character"
{-# INLINE xmlNameStartChar #-}
-- |
-- parse a XML NCName character
xmlNCNameChar :: XParser s Unicode
xmlNCNameChar = satisfy isXmlNCNameChar > "legal XML NCName character"
{-# INLINE xmlNCNameChar #-}
-- |
-- parse a XML NCName start character
xmlNCNameStartChar :: XParser s Unicode
xmlNCNameStartChar = satisfy isXmlNCNameStartChar > "legal XML NCName start character"
{-# INLINE xmlNCNameStartChar #-}
-- |
-- parse a XML letter character
xmlLetter :: XParser s Unicode
xmlLetter = satisfy isXmlLetter > "legal XML letter"
{-# INLINE xmlLetter #-}
-- |
-- White Space (2.3)
--
-- end of line handling (2.11) will be done before or with 'xmlCRLFChar' parser
xmlSpaceChar :: XParser s Char
xmlSpaceChar = ( satisfy isXmlSpaceCharCR
<|>
xmlCRLFChar
)
> "white space"
{-# INLINE xmlSpaceChar #-}
-- |
-- White Space Normalization
--
-- end of line handling (2.11)
-- \#x0D and \#x0D\#x0A are mapped to \#x0A
xmlCRLFChar :: XParser s Char
xmlCRLFChar = ( do
_ <- char '\r'
s <- getState
if xps_normalizeNewline s
then option '\n' (char '\n')
else return '\r'
)
> "newline"
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Parser/XmlDTDParser.hs 0000644 0000000 0000000 00000045241 12752557014 017634 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Parser.XmlDTDParser
Copyright : Copyright (C) 2005 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
Parsec parser for DTD declarations for ELEMENT, ATTLIST, ENTITY and NOTATION declarations
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Parser.XmlDTDParser
( parseXmlDTDdecl
, parseXmlDTDdeclPart
, parseXmlDTDEntityValue
, elementDecl
, attlistDecl
, entityDecl
, notationDecl
)
where
import Data.Maybe
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Pos
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.DOM.ShowXml
( xshow
)
import Text.XML.HXT.DOM.XmlNode ( mkDTDElem'
, mkText'
, mkError'
, isText
, isDTD
, getText
, getDTDPart
, getDTDAttrl
, getChildren
, setChildren
)
import qualified Text.XML.HXT.Parser.XmlTokenParser as XT
import Text.XML.HXT.Parser.XmlCharParser ( XParser
, XPState(..)
, withoutNormNewline
)
import qualified Text.XML.HXT.Parser.XmlCharParser as XC
( xmlSpaceChar )
import qualified Text.XML.HXT.Parser.XmlDTDTokenParser as XD
( dtdToken )
-----------------------------------------------------------
--
-- all parsers dealing with whitespace will be redefined
-- to handle parameter entity substitution
type LocalState = (Int, [(Int, String, SourcePos)])
type SParser a = XParser LocalState a
initialState :: SourcePos -> XPState LocalState
initialState p = withoutNormNewline (0, [(0, sourceName p, p)])
updateLocalState :: (LocalState -> LocalState) -> SParser ()
updateLocalState upd
= updateState $ \ xps -> xps { xps_userState = upd $ xps_userState xps }
pushPar :: String -> SParser ()
pushPar n = do
p <- getPosition
updateLocalState (\ (i, s) -> (i+1, (i+1, n, p) : s))
setPosition ( newPos (sourceName p ++ " (line " ++ show (sourceLine p) ++ ", column " ++ show (sourceColumn p) ++ ") in content of parameter entity ref %" ++ n ++ ";") 1 1)
popPar :: SParser ()
popPar = do
oldPos <- getPos
updateLocalState pop
setPosition oldPos
where
pop (i, [(_, s, p)]) = (i+1, [(i+1, s, p)]) -- if param entity substitution is correctly implemented, this case does not occur
pop (i, _t:s) = (i, s)
pop (_i, []) = undefined -- stack is never empty
getParNo :: SParser Int
getParNo = do
s <- getState
let (_i, (top, _n, _p) : _s) = xps_userState s
return top
getPos :: SParser SourcePos
getPos = do
s <- getState
let (_i, (_top, _n, p) : _s) = xps_userState s
return p
delPE :: SParser ()
delPE = do
_ <- char '\0'
return ()
startPE :: SParser ()
startPE
= do
try ( do
delPE
n <- many1 (satisfy (/= '\0'))
delPE
pushPar n
)
endPE :: SParser ()
endPE
= do
try (do
delPE
delPE
popPar
)
inSamePE :: SParser a -> SParser a
inSamePE p
= do
i <- getParNo
r <- p
j <- getParNo
if (i == j)
then return r
else fail $ "parameter entity contents does not fit into the structure of a DTD declarations"
-- ------------------------------------------------------------
xmlSpaceChar :: SParser ()
xmlSpaceChar = ( XC.xmlSpaceChar
>>
return ()
)
<|>
startPE
<|>
endPE
> "white space"
skipS :: SParser ()
skipS
= skipMany1 xmlSpaceChar
>>
return ()
skipS0 :: SParser ()
skipS0
= skipMany xmlSpaceChar
>>
return ()
name :: SParser XmlTree
name
= do
n <- XT.name
return (mkDTDElem' NAME [(a_name, n)] [])
nmtoken :: SParser XmlTree
nmtoken
= do
n <- XT.nmtoken
return (mkDTDElem' NAME [(a_name, n)] [])
-- ------------------------------------------------------------
--
-- Element Type Declarations (3.2)
elementDecl :: SParser XmlTrees
elementDecl
= between (try $ string "') elementDeclBody
elementDeclBody :: SParser XmlTrees
elementDeclBody
= do
skipS
n <- XT.name
skipS
(al, cl) <- contentspec
skipS0
return [mkDTDElem' ELEMENT ((a_name, n) : al) cl]
contentspec :: SParser (Attributes, XmlTrees)
contentspec
= simplespec k_empty v_empty
<|>
simplespec k_any v_any
<|>
inSamePE mixed
<|>
inSamePE children
> "content specification"
where
simplespec kw v
= do
_ <- XT.keyword kw
return ([(a_type, v)], [])
-- ------------------------------------------------------------
--
-- Element Content (3.2.1)
children :: SParser (Attributes, XmlTrees)
children
= ( do
(al, cl) <- choiceOrSeq
modifier <- optOrRep
return ([(a_type, v_children)], [mkDTDElem' CONTENT (modifier ++ al) cl])
)
> "element content"
optOrRep :: SParser Attributes
optOrRep
= do
m <- option "" (XT.mkList (oneOf "?*+"))
return [(a_modifier, m)]
choiceOrSeq :: SParser (Attributes, XmlTrees)
choiceOrSeq
= inSamePE $
do
cl <- try ( do
lpar
choiceOrSeqBody
)
rpar
return cl
choiceOrSeqBody :: SParser (Attributes, XmlTrees)
choiceOrSeqBody
= do
cp1 <- cp
choiceOrSeq1 cp1
where
choiceOrSeq1 :: XmlTree -> SParser (Attributes, XmlTrees)
choiceOrSeq1 c1
= ( do
bar
c2 <- cp
cl <- many ( do
bar
cp
)
return ([(a_kind, v_choice)], (c1 : c2 : cl))
)
<|>
( do
cl <- many ( do
comma
cp
)
return ([(a_kind, v_seq)], (c1 : cl))
)
> "sequence or choice"
cp :: SParser XmlTree
cp
= ( do
n <- name
m <- optOrRep
return ( case m of
[(_, "")] -> n
_ -> mkDTDElem' CONTENT (m ++ [(a_kind, v_seq)]) [n]
)
)
<|>
( do
(al, cl) <- choiceOrSeq
m <- optOrRep
return (mkDTDElem' CONTENT (m ++ al) cl)
)
-- ------------------------------------------------------------
--
-- Mixed Content (3.2.2)
mixed :: SParser (Attributes, XmlTrees)
mixed
= ( do
_ <- try ( do
lpar
string k_pcdata
)
nl <- many ( do
bar
name
)
rpar
if null nl
then do
_ <- option ' ' (char '*') -- (#PCDATA) or (#PCDATA)* , both are legal
return ( [ (a_type, v_pcdata) ]
, []
)
else do
_ <- char '*' > "closing parent for mixed content (\")*\")"
return ( [ (a_type, v_mixed) ]
, [ mkDTDElem' CONTENT [ (a_modifier, "*")
, (a_kind, v_choice)
] nl
]
)
)
> "mixed content"
-- ------------------------------------------------------------
--
-- Attribute-List Declarations (3.3)
attlistDecl :: SParser XmlTrees
attlistDecl
= between (try $ string "') attlistDeclBody
attlistDeclBody :: SParser XmlTrees
attlistDeclBody
= do
skipS
n <- XT.name
al <- many attDef
skipS0
return (map (mkDTree n) al)
where
mkDTree n' (al, cl)
= mkDTDElem' ATTLIST ((a_name, n') : al) cl
attDef :: SParser (Attributes, XmlTrees)
attDef
= do
n <- try ( do
skipS
XT.name
) > "attribute name"
skipS
(t, cl) <- attType
skipS
d <- defaultDecl
return (((a_value, n) : d) ++ t, cl)
attType :: SParser (Attributes, XmlTrees)
attType
= tokenizedOrStringType
<|>
enumeration
<|>
notationType
> "attribute type"
tokenizedOrStringType :: SParser (Attributes, XmlTrees)
tokenizedOrStringType
= do
n <- choice $ map XT.keyword typl
return ([(a_type, n)], [])
where
typl = [ k_cdata
, k_idrefs
, k_idref
, k_id
, k_entity
, k_entities
, k_nmtokens
, k_nmtoken
]
enumeration :: SParser (Attributes, XmlTrees)
enumeration
= do
nl <- inSamePE (between lpar rpar (sepBy1 nmtoken bar))
return ([(a_type, k_enumeration)], nl)
notationType :: SParser (Attributes, XmlTrees)
notationType
= do
_ <- XT.keyword k_notation
skipS
nl <- inSamePE (between lpar rpar ( sepBy1 name bar ))
return ([(a_type, k_notation)], nl)
defaultDecl :: SParser Attributes
defaultDecl
= ( do
str <- try $ string k_required
return [(a_kind, str)]
)
<|>
( do
str <- try $ string k_implied
return [(a_kind, str)]
)
<|>
( do
l <- fixed
v <- XT.attrValueT
return ((a_default, xshow v) : l)
)
> "default declaration"
where
fixed = option [(a_kind, k_default)]
( do
_ <- try $ string k_fixed
skipS
return [(a_kind, k_fixed)]
)
-- ------------------------------------------------------------
--
-- Entity Declarations (4.2)
entityDecl :: SParser XmlTrees
entityDecl
= between ( try $ string "') entityDeclBody
entityDeclBody :: SParser XmlTrees
entityDeclBody
= do
skipS
( peDecl
<|>
geDecl
> "entity declaration" ) -- don't move the ) to the next line
geDecl :: SParser XmlTrees
geDecl
= do
n <- XT.name
skipS
(al, cl) <- entityDef
skipS0
return [mkDTDElem' ENTITY ((a_name, n) : al) cl]
entityDef :: SParser (Attributes, XmlTrees)
entityDef
= entityValue
<|>
externalEntitySpec
externalEntitySpec :: SParser (Attributes, XmlTrees)
externalEntitySpec
= do
al <- externalID
nd <- option [] nDataDecl
return ((al ++ nd), [])
peDecl :: SParser XmlTrees
peDecl
= do
_ <- char '%'
skipS
n <- XT.name
skipS
(al, cs) <- peDef
skipS0
return [mkDTDElem' PENTITY ((a_name, n) : al) cs]
peDef :: SParser (Attributes, XmlTrees)
peDef
= entityValue
<|>
do
al <- externalID
return (al, [])
entityValue :: XParser s (Attributes, XmlTrees)
entityValue
= do
v <- XT.entityValueT
return ([], v)
-- ------------------------------------------------------------
--
-- External Entities (4.2.2)
externalID :: SParser Attributes
externalID
= ( do
_ <- XT.keyword k_system
skipS
lit <- XT.systemLiteral
return [(k_system, lit)]
)
<|>
( do
_ <- XT.keyword k_public
skipS
pl <- XT.pubidLiteral
skipS
sl <- XT.systemLiteral
return [ (k_system, sl)
, (k_public, pl) ]
)
> "SYSTEM or PUBLIC declaration"
nDataDecl :: SParser Attributes
nDataDecl
= do
_ <- try ( do
skipS
XT.keyword k_ndata
)
skipS
n <- XT.name
return [(k_ndata, n)]
-- ------------------------------------------------------------
--
-- Notation Declarations (4.7)
notationDecl :: SParser XmlTrees
notationDecl
= between (try $ string "' > "notation declaration") notationDeclBody
notationDeclBody :: SParser XmlTrees
notationDeclBody
= do
skipS
n <- XT.name
skipS
eid <- ( try externalID
<|>
publicID
)
skipS0
return [mkDTDElem' NOTATION ((a_name, n) : eid) []]
publicID :: SParser Attributes
publicID
= do
_ <- XT.keyword k_public
skipS
l <- XT.pubidLiteral
return [(k_public, l)]
-- ------------------------------------------------------------
condSectCondBody :: SParser XmlTrees
condSectCondBody
= do
skipS0
n <- XT.name
skipS0
let n' = stringToUpper n
if n' `elem` [k_include, k_ignore]
then return [mkText' n']
else fail $ "INCLUDE or IGNORE expected in conditional section"
-- ------------------------------------------------------------
separator :: Char -> SParser ()
separator c
= do
_ <- try ( do
skipS0
char c
)
skipS0
> [c]
bar, comma, lpar, rpar :: SParser ()
bar = separator '|'
comma = separator ','
lpar
= do
_ <- char '('
skipS0
rpar
= do
skipS0
_ <- char ')'
return ()
-- ------------------------------------------------------------
parseXmlDTDEntityValue :: XmlTree -> XmlTrees
parseXmlDTDEntityValue t -- (NTree (XDTD PEREF al) cl)
| isDTDPEref t
= ( either
( (:[]) . mkError' c_err . (++ "\n") . show )
( \cl' -> if null cl'
then [mkText' ""]
else cl'
)
.
runParser parser (withoutNormNewline ()) source
) input
| otherwise
= []
where
al = fromMaybe [] . getDTDAttrl $ t
cl = getChildren t
parser = XT.entityTokensT "%&"
source = "value of parameter entity " ++ lookupDef "" a_peref al
input = xshow cl
{-
parseXmlDTDEntityValue n
= error ("parseXmlDTDEntityValue: illegal argument: " ++ show n)
-}
-- ------------------------------------------------------------
parseXmlDTDdeclPart :: XmlTree -> XmlTrees
parseXmlDTDdeclPart t -- @(NTree (XDTD PEREF al) cl)
| isDTDPEref t
= ( (:[])
.
either
( mkError' c_err . (++ "\n") . show )
( flip setChildren $ t ) -- \ cl' -> setChildren cl' t)
.
runParser parser (withoutNormNewline ()) source
) input
| otherwise
= []
where
al = fromMaybe [] . getDTDAttrl $ t
cl = getChildren t
parser = many XD.dtdToken
source = "value of parameter entity " ++ lookupDef "" a_peref al
input = xshow cl
{-
parseXmlDTDdeclPart n
= error ("parseXmlDTDdeclPart: illegal argument: " ++ show n)
-}
-- ------------------------------------------------------------
--
-- the main entry point
-- | parse a tokenized DTD declaration represented by a DTD tree.
-- The content is represented by the children containing text and parameter entity reference nodes.
-- The parameter entity reference nodes contain their value in the children list, consisting of text
-- and possibly again parameter entity reference nodes. This structure is build by the parameter entity
-- substitution.
-- Output is again a DTD declaration node, but this time completely parsed and ready for further DTD processing
parseXmlDTDdecl :: XmlTree -> XmlTrees
parseXmlDTDdecl t -- (NTree (XDTD dtdElem al) cl)
| isDTD t
= ( either ((:[]) . mkError' c_err . (++ "\n") . show) id
.
runParser parser (initialState pos) source
) input
| otherwise
= []
where
dtdElem = fromJust . getDTDPart $ t
al = fromMaybe [] . getDTDAttrl $ t
cl = getChildren t
dtdParsers
= [ (ELEMENT, elementDeclBody)
, (ATTLIST, attlistDeclBody)
, (ENTITY, entityDeclBody)
, (NOTATION, notationDeclBody)
, (CONDSECT, condSectCondBody)
]
source = lookupDef "DTD declaration" a_source al
line = lookupDef "1" a_line al
column = lookupDef "1" a_column al
pos = newPos source (read line) (read column)
parser = do
setPosition pos
res <- fromJust . lookup dtdElem $ dtdParsers
eof
return res
input = concatMap collectText cl
{-
parseXmlDTDdecl _
= []
-}
-- | collect the tokens of a DTD declaration body and build
-- a string ready for parsing. The structure of the parameter entity values
-- is stll stored in this string for checking the scope of the parameter values
collectText :: XmlTree -> String
collectText t
| isText t
= fromMaybe "" . getText $ t
| isDTDPEref t
= prefixPe ++ concatMap collectText (getChildren t) ++ suffixPe
| otherwise
= ""
where
al = fromMaybe [] . getDTDAttrl $ t
delPe = "\0"
prefixPe = delPe ++ lookupDef "???" a_peref al ++ delPe
suffixPe = delPe ++ delPe
{-
collectText (NTree n _)
| isXTextNode n
= textOfXNode n
collectText (NTree (XDTD PEREF al) cl)
= prefixPe ++ concatMap collectText cl ++ suffixPe
where
delPe = "\0"
prefixPe = delPe ++ lookupDef "???" a_peref al ++ delPe
suffixPe = delPe ++ delPe
collectText _
= ""
-}
isDTDPEref :: XmlTree -> Bool
isDTDPEref
= maybe False (== PEREF) . getDTDPart
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Parser/XmlDTDTokenParser.hs 0000644 0000000 0000000 00000006054 12752557014 020634 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Parser.XmlDTDTokenParser
Copyright : Copyright (C) 2005 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
Parsec parser for tokenizing DTD declarations for ELEMENT, ATTLIST, ENTITY and NOTATION
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Parser.XmlDTDTokenParser where
import Text.ParserCombinators.Parsec
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.DOM.XmlNode ( mkDTDElem'
, mkText'
)
import qualified Text.XML.HXT.Parser.XmlTokenParser as XT
import Text.XML.HXT.Parser.XmlCharParser ( XParser )
-- ------------------------------------------------------------
--
-- DTD declaration tokenizer
dtdDeclTokenizer :: XParser s XmlTree
dtdDeclTokenizer
= do
(dcl, al) <- dtdDeclStart
content <- many1 dtdToken
dtdDeclEnd
return $ mkDTDElem' dcl al content
dtdDeclStart :: XParser s (DTDElem, Attributes)
dtdDeclStart
= foldr1 (<|>) $
map (uncurry dtdStart) $
[ ("ELEMENT", ELEMENT )
, ("ATTLIST", ATTLIST )
, ("ENTITY", ENTITY )
, ("NOTATION", NOTATION)
]
where
dtdStart :: String -> DTDElem -> XParser s (DTDElem, Attributes)
dtdStart dcl element
= try ( do
_ <- string "
entityValue
<|>
try peReference -- first try parameter entity ref %xxx;
<|>
percent -- else % may be indicator for parameter entity declaration
> "DTD token"
peReference :: XParser s XmlTree
peReference
= do
r <- XT.peReference
return $! (mkDTDElem' PEREF [(a_peref, r)] [])
entityValue :: XParser s XmlTree
entityValue
= do
v <- XT.entityValue
return $ mkText' v
dtdChars :: XParser s XmlTree
dtdChars
= do
v <- many1 (XT.singleChar "%\"'<>[]") -- everything except string constants, < and >, [ and ] (for cond sections)
return $ mkText' v -- all illegal chars will be detected later during declaration parsing
percent :: XParser s XmlTree
percent
= do
c <- char '%'
return $ mkText' [c]
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Parser/XmlEntities.hs 0000644 0000000 0000000 00000001161 12752557014 017621 0 ustar 00 0000000 0000000 -- |
-- Predefined XML Entity References
--
-- This module defines a table of all
-- predefined XML entity references
module Text.XML.HXT.Parser.XmlEntities
( xmlEntities
)
where
-- ------------------------------------------------------------
-- |
-- list of predefined XML entity names and their unicode values
xmlEntities :: [(String, Int)]
xmlEntities = [ ("quot", 34)
, ("amp", 38)
, ("lt", 60)
, ("gt", 62)
, ("apos", 39)
]
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Parser/XmlParsec.hs 0000644 0000000 0000000 00000050163 13001373304 017242 0 ustar 00 0000000 0000000 {-# LANGUAGE CPP #-}
-- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Parser.XmlParsec
Copyright : Copyright (C) 2005 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
Xml Parsec parser with pure filter interface
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Parser.XmlParsec
( charData
, charData'
, comment
, pI
, cDSect
, document
, document'
, prolog
, xMLDecl
, xMLDecl'
, versionInfo
, misc
, doctypedecl
, markupdecl
, sDDecl
, element
, content
, contentWithTextDecl
, textDecl
, encodingDecl
, xread
, xreadDoc
, parseXmlContent
, parseXmlDocEncodingSpec
, parseXmlDocument
, parseXmlDTDPart
, parseXmlEncodingSpec
, parseXmlEntityEncodingSpec
, parseXmlEntityValueAsAttrValue
, parseXmlEntityValueAsContent
, parseXmlPart
, parseXmlText
, parseNMToken
, parseName
, removeEncodingSpec
)
where
#if MIN_VERSION_base(4,8,2)
#else
import Control.Applicative ((<$>))
#endif
import Text.ParserCombinators.Parsec (between, char, eof,
getInput, getPosition,
many, many1,
notFollowedBy, option,
runParser, sourceName,
string, try, unexpected,
(>), (<|>))
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.DOM.ShowXml (xshow)
import Text.XML.HXT.DOM.XmlNode (changeAttrl,
getAttrName, getAttrl,
getChildren, getText,
isRoot, isText,
mergeAttrl, mkAttr',
mkCdata', mkCmt',
mkDTDElem', mkElement',
mkError', mkPi',
mkRoot', mkText')
import Text.XML.HXT.Parser.XmlCharParser (SimpleXParser, XPState,
XParser,
withNormNewline,
withoutNormNewline,
xmlChar)
import qualified Text.XML.HXT.Parser.XmlDTDTokenParser as XD
import qualified Text.XML.HXT.Parser.XmlTokenParser as XT
import Control.FlatSeq
import Data.Char (toLower)
import Data.Maybe
-- import Debug.Trace
-- ------------------------------------------------------------
--
-- Character Data (2.4)
charData :: XParser s XmlTrees
charData
= many (charData' <|> XT.referenceT)
charData' :: XParser s XmlTree
charData'
= do
t <- XT.allBut1 many1 (\ c -> not (c `elem` "<&")) "]]>"
return (mkText' t)
-- ------------------------------------------------------------
--
-- Comments (2.5)
comment :: XParser s XmlTree
comment
= comment'' $ XT.checkString "")) (XT.allBut many "--")
return (mkCmt' c)
) > "comment"
-- ------------------------------------------------------------
--
-- Processing Instructions
pI :: XParser s XmlTree
pI = pI'' $ XT.checkString ""
-- the leading < is already parsed
pI' :: XParser s XmlTree
pI' = pI'' (char '?' >> return ())
pI'' :: XParser s () -> XParser s XmlTree
pI'' op
= between op (string "?>")
( do
n <- pITarget
p <- option "" (XT.sPace
>>
XT.allBut many "?>"
)
return (mkPi' (mkName n) [mkAttr' (mkName a_value) [mkText' p]])
) > "processing instruction"
where
pITarget :: XParser s String
pITarget = ( do
n <- XT.name
if map toLower n == t_xml
then unexpected n
else return n
)
-- ------------------------------------------------------------
--
-- CDATA Sections (2.7)
cDSect :: XParser s XmlTree
cDSect
= cDSect'' $ XT.checkString "> return ())
cDSect'' :: XParser s () -> XParser s XmlTree
cDSect'' op
= do
t <- between op (string "]]>") (XT.allBut many "]]>")
return (mkCdata' t)
> "CDATA section"
-- ------------------------------------------------------------
--
-- Document (2.1) and Prolog (2.8)
document :: XParser s XmlTree
document
= do
pos <- getPosition
dl <- document'
return (mkRoot' [ mkAttr' (mkName a_source) [mkText' (sourceName pos)]
, mkAttr' (mkName a_status) [mkText' (show c_ok)]
] dl
)
document' :: XParser s XmlTrees
document'
= do
pl <- prolog
el <- element
ml <- many misc
eof
return (pl ++ [el] ++ ml)
prolog :: XParser s XmlTrees
prolog
= do
xml <- option [] xMLDecl'
misc1 <- many misc
dtdPart <- option [] doctypedecl
misc2 <- many misc
return (xml ++ misc1 ++ dtdPart ++ misc2)
xMLDecl :: XParser s XmlTrees
xMLDecl
= between (try $ string "")
( do
vi <- versionInfo
ed <- option [] encodingDecl
sd <- option [] sDDecl
XT.skipS0
return (vi ++ ed ++ sd)
)
> "xml declaration"
xMLDecl' :: XParser s XmlTrees
xMLDecl'
= do
al <- xMLDecl
return [mkPi' (mkName t_xml) al]
xMLDecl'' :: XParser s XmlTree
xMLDecl''
= do
al <- option [] (try xMLDecl)
return (mkRoot' al [])
versionInfo :: XParser s XmlTrees
versionInfo
= ( do
try ( XT.skipS
>>
XT.keyword a_version
>>
return ()
)
XT.eq
vi <- XT.quoted XT.versionNum
return [mkAttr' (mkName a_version) [mkText' vi]]
)
> "version info (with quoted version number)"
misc :: XParser s XmlTree
misc
= comment
<|>
pI
<|>
( ( do
ws <- XT.sPace
return (mkText' ws)
) > ""
)
-- ------------------------------------------------------------
--
-- Document Type definition (2.8)
doctypedecl :: XParser s XmlTrees
doctypedecl
= between (try $ string "')
( do
XT.skipS
n <- XT.name
exId <- option [] ( try ( do
XT.skipS
externalID
)
)
XT.skipS0
markup <- option []
( do
m <- between (char '[' ) (char ']') markupOrDeclSep
XT.skipS0
return m
)
return [mkDTDElem' DOCTYPE ((a_name, n) : exId) markup]
)
markupOrDeclSep :: XParser s XmlTrees
markupOrDeclSep
= ( do
ll <- many ( markupdecl
<|>
declSep
<|>
XT.mkList conditionalSect
)
return (concat ll)
)
declSep :: XParser s XmlTrees
declSep
= XT.mkList XT.peReferenceT
<|>
( do
XT.skipS
return []
)
markupdecl :: XParser s XmlTrees
markupdecl
= XT.mkList
( pI
<|>
comment
<|>
XD.dtdDeclTokenizer
)
-- ------------------------------------------------------------
--
-- Standalone Document Declaration (2.9)
sDDecl :: XParser s XmlTrees
sDDecl
= do
try ( XT.skipS
>>
XT.keyword a_standalone
>>
return ()
)
XT.eq
sd <- XT.quoted (XT.keywords [v_yes, v_no])
return [mkAttr' (mkName a_standalone) [mkText' sd]]
-- ------------------------------------------------------------
--
-- element, tags and content (3, 3.1)
element :: XParser s XmlTree
element
= char '<'
>>
element'
element' :: XParser s XmlTree
element'
= ( do
e <- elementStart
rwnf e `seq` elementRest e -- evaluate name and attribute list before parsing contents
) > "element"
elementStart :: XParser s (QName, XmlTrees)
elementStart
= do
n <- XT.name
al <- attrList
XT.skipS0
return (mkName n, al)
where
attrList
= option [] ( do
XT.skipS
attrList'
)
attrList'
= option [] ( do
a1 <- attribute
al <- attrList
let n = fromJust . getAttrName $ a1
if n `elem` map (fromJust . getAttrName) al
then unexpected
( "attribute name " ++
show (qualifiedName n) ++
" occurs twice in attribute list"
)
else return (a1 : al)
)
elementRest :: (QName, XmlTrees) -> XParser s XmlTree
elementRest (n, al)
= ( do
XT.checkString "/>"
return $ mkElement' n al []
)
<|>
( do
XT.gt
c <- content
eTag n
return $ mkElement' n al c
)
> "proper attribute list followed by \"/>\" or \">\""
eTag :: QName -> XParser s ()
eTag n'
= do
XT.checkString "" > ""
n <- XT.name
XT.skipS0
XT.gt
if n == qualifiedName n'
then return ()
else unexpected ("illegal end tag " ++ n ++ "> found, " ++ qualifiedName n' ++ "> expected")
attribute :: XParser s XmlTree
attribute
= do
n <- XT.name
XT.eq
v <- XT.attrValueT
return $ mkAttr' (mkName n) v
{- this parser corresponds to the XML spec but it's inefficent because of more than 1 char lookahead
content :: XParser s XmlTrees
content
= do
c1 <- charData
cl <- many
( do
l <- ( element
<|>
cDSect
<|>
pI
<|>
comment
)
c <- charData
return (l : c)
)
return (c1 ++ concat cl)
-}
-- this simpler content parser does not need more than a single lookahead
-- so no try parsers (inefficient) are neccessary
content :: XParser s XmlTrees
content
= XT.mergeTextNodes <$>
many
( ( do -- parse markup but no closing tags
try ( XT.lt
>>
notFollowedBy (char '/')
>>
return ()
)
markup
)
<|>
charData'
<|>
XT.referenceT
)
where
markup
= element'
<|>
pI'
<|>
( char '!'
>>
( comment'
<|>
cDSect'
)
)
contentWithTextDecl :: XParser s XmlTrees
contentWithTextDecl
= option [] textDecl
>>
content
-- ------------------------------------------------------------
--
-- Conditional Sections (3.4)
--
-- conditional sections are parsed in two steps,
-- first the whole content is detected,
-- and then, after PE substitution include sections are parsed again
conditionalSect :: XParser s XmlTree
conditionalSect
= do
XT.checkString ""
>>
return ""
)
<|>
( do
XT.checkString "" ++ cs2)
)
<|>
( do
c <- xmlChar
cs <- condSectCont
return (c : cs)
)
-- ------------------------------------------------------------
--
-- External Entities (4.2.2)
externalID :: XParser s Attributes
externalID
= ( do
_ <- XT.keyword k_system
XT.skipS
lit <- XT.systemLiteral
return [(k_system, lit)]
)
<|>
( do
_ <- XT.keyword k_public
XT.skipS
pl <- XT.pubidLiteral
XT.skipS
sl <- XT.systemLiteral
return [ (k_system, sl)
, (k_public, pl) ]
)
> "SYSTEM or PUBLIC declaration"
-- ------------------------------------------------------------
--
-- Text Declaration (4.3.1)
textDecl :: XParser s XmlTrees
textDecl
= between (try $ string "")
( do
vi <- option [] versionInfo
ed <- encodingDecl
XT.skipS0
return (vi ++ ed)
)
> "text declaration"
textDecl'' :: XParser s XmlTree
textDecl''
= do
al <- option [] (try textDecl)
return (mkRoot' al [])
-- ------------------------------------------------------------
--
-- Encoding Declaration (4.3.3)
encodingDecl :: XParser s XmlTrees
encodingDecl
= do
try ( XT.skipS
>>
XT.keyword a_encoding
>>
return ()
)
XT.eq
ed <- XT.quoted XT.encName
return [mkAttr' (mkName a_encoding) [mkText' ed]]
-- ------------------------------------------------------------
--
-- the main entry points:
-- parsing the content of a text node
-- or parsing the text children from a tag node
-- |
-- the inverse function to 'xshow', (for XML content).
--
-- the string parameter is parsed with the XML content parser.
-- result is the list of trees or in case of an error a single element list with the
-- error message as node. No entity or character subtitution is done here,
-- but the XML parser can do this for the predefined XML or the char references for performance reasons
--
-- see also: 'parseXmlContent'
xread :: String -> XmlTrees
xread = xread' content -- take the content parser for parsing the string
xreadDoc :: String -> XmlTrees
xreadDoc = xread' document' -- take the document' parser for parsing the string
xread' :: XParser () XmlTrees -> String -> XmlTrees
xread' content' str
= parseXmlFromString parser (withNormNewline ()) loc str
where
loc = "string: " ++ show (if length str > 40 then take 40 str ++ "..." else str)
parser = do
res <- content'
eof -- test on everything consumed
return res
-- |
-- the filter version of 'xread'
parseXmlContent :: XmlTree -> XmlTrees
parseXmlContent
= xread . xshow . (:[])
-- |
-- a more general version of 'parseXmlContent'.
-- The parser to be used and the context are extra parameter
parseXmlText :: SimpleXParser XmlTrees -> XPState () -> String -> XmlTree -> XmlTrees
parseXmlText p s0 loc = parseXmlFromString p s0 loc . xshow . (:[])
parseXmlDocument :: String -> String -> XmlTrees
parseXmlDocument = parseXmlFromString document' (withNormNewline ())
parseXmlFromString :: SimpleXParser XmlTrees -> XPState () -> String -> String -> XmlTrees
parseXmlFromString parser s0 loc
= either ((:[]) . mkError' c_err . (++ "\n") . show) id
. runParser parser s0 loc
-- ------------------------------------------------------------
--
removeEncodingSpec :: XmlTree -> XmlTrees
removeEncodingSpec t
| isText t
= ( either ((:[]) . mkError' c_err . (++ "\n") . show) ((:[]) . mkText')
. runParser parser (withNormNewline ()) "remove encoding spec"
. fromMaybe ""
. getText
) t
| otherwise
= [t]
where
parser :: XParser s String
parser = option [] textDecl
>>
getInput
-- ------------------------------------------------------------
-- |
-- general parser for parsing arbitray parts of a XML document
parseXmlPart :: SimpleXParser XmlTrees -> String -> String -> XmlTree -> XmlTrees
parseXmlPart parser expected context t
= parseXmlText
( do
res <- parser
eof > expected
return res
) (withoutNormNewline ()) context
$ t
-- ------------------------------------------------------------
-- |
-- Parser for parts of a DTD
parseXmlDTDPart :: String -> XmlTree -> XmlTrees
parseXmlDTDPart
= parseXmlPart markupOrDeclSep "markup declaration"
-- ------------------------------------------------------------
-- |
-- Parser for general entites
parseXmlEntityValueAsContent :: String -> XmlTree -> XmlTrees
parseXmlEntityValueAsContent
= parseXmlPart content "general entity value"
-- ------------------------------------------------------------
-- |
-- Parser for entity substitution within attribute values
parseXmlEntityValueAsAttrValue :: String -> XmlTree -> XmlTrees
parseXmlEntityValueAsAttrValue
= parseXmlPart (XT.attrValueT' "<&") "attribute value"
-- ------------------------------------------------------------
-- |
-- Parser for NMTOKENs
parseNMToken :: String -> XmlTree -> XmlTrees
parseNMToken
= parseXmlPart (many1 XT.nmtokenT) "nmtoken"
-- ------------------------------------------------------------
-- |
-- Parser for XML names
parseName :: String -> XmlTree -> XmlTrees
parseName
= parseXmlPart (many1 XT.nameT) "name"
-- ------------------------------------------------------------
-- |
-- try to parse a xml encoding spec.
--
--
-- * 1.parameter encParse : the parser for the encoding decl
--
-- - 2.parameter root : a document root
--
-- - returns : the same tree, but with an additional
-- attribute \"encoding\" in the root node
-- in case of a valid encoding spec
-- else the unchanged tree
parseXmlEncodingSpec :: SimpleXParser XmlTree -> XmlTree -> XmlTrees
parseXmlEncodingSpec encDecl x
= (:[]) .
( if isRoot x
then parseEncSpec
else id
) $ x
where
parseEncSpec r
= case ( runParser encDecl (withNormNewline ()) source
. xshow
. getChildren
$ r
) of
Right t
-> changeAttrl (mergeAttrl . fromMaybe [] . getAttrl $ t) r
Left _
-> r
where
-- arrow \"getAttrValue a_source\" programmed on the tree level (oops!)
source = xshow
. concat
. map getChildren
. filter ((== a_source)
. maybe "" qualifiedName . getAttrName)
. fromMaybe []
. getAttrl $ r
parseXmlEntityEncodingSpec :: XmlTree -> XmlTrees
parseXmlEntityEncodingSpec = parseXmlEncodingSpec textDecl''
parseXmlDocEncodingSpec :: XmlTree -> XmlTrees
parseXmlDocEncodingSpec = parseXmlEncodingSpec xMLDecl''
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Parser/XmlTokenParser.hs 0000644 0000000 0000000 00000033271 13001373075 020270 0 ustar 00 0000000 0000000 {-# LANGUAGE CPP #-}
-- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Parser.XmlTokenParser
Copyright : Copyright (C) 2010 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
Parsec parser for XML tokens
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Parser.XmlTokenParser
( allBut
, allBut1
, amp
, asciiLetter
, attrChar
, attrValue
, bar
, charRef
, checkString
, comma
, dq
, encName
, entityRef
, entityValue
, eq
, gt
, keyword
, keywords
, lpar
, lt
, name
, names
, ncName
, nmtoken
, nmtokens
, peReference
, pubidLiteral
, qName
, quoted
, reference
, rpar
, semi
, separator
, singleChar
, singleChars
, skipS
, skipS0
, sPace
, sPace0
, sq
, systemLiteral
, versionNum
, concRes
, mkList
, nameT
, nmtokenT
, entityValueT
, entityTokensT
, entityCharT
, attrValueT
, attrValueT'
, referenceT
, charRefT
, entityRefT
, peReferenceT
, singleCharsT
, mergeTextNodes
)
where
#if MIN_VERSION_base(4,8,2)
#else
import Control.Applicative ((<$>))
#endif
import Data.Char.Properties.XMLCharProps ( isXmlChar
, isXmlCharCR
)
import Data.String.Unicode ( intToCharRef
, intToCharRefHex
)
import Text.ParserCombinators.Parsec
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.DOM.XmlNode ( mkDTDElem'
, mkText'
, mkCharRef'
, mkEntityRef'
, mergeText
)
import Text.XML.HXT.Parser.XmlCharParser ( xmlNameChar
, xmlNameStartChar
, xmlNCNameChar
, xmlNCNameStartChar
, xmlSpaceChar
, xmlCRLFChar
, XParser
)
-- ------------------------------------------------------------
--
-- Character (2.2) and White Space (2.3)
--
-- Unicode parsers in module XmlCharParser
-- ------------------------------------------------------------
sPace :: XParser s String
sPace
= many1 xmlSpaceChar
sPace0 :: XParser s String
sPace0
= many xmlSpaceChar
skipS :: XParser s ()
skipS
= skipMany1 xmlSpaceChar
skipS0 :: XParser s ()
skipS0
= skipMany xmlSpaceChar
-- ------------------------------------------------------------
--
-- Names and Tokens (2.3)
asciiLetter :: XParser s Char
asciiLetter
= satisfy (\ c -> ( c >= 'A' && c <= 'Z' ||
c >= 'a' && c <= 'z' )
)
> "ASCII letter"
name :: XParser s String
name
= do
s1 <- xmlNameStartChar
sl <- many xmlNameChar
return (s1 : sl)
> "Name"
-- Namespaces in XML: Rules [4-5] NCName:
ncName :: XParser s String
ncName
= do
s1 <- xmlNCNameStartChar
sl <- many xmlNCNameChar
return (s1 : sl)
> "NCName"
-- Namespaces in XML: Rules [6-8] QName:
qName :: XParser s (String, String)
qName
= do
s1 <- ncName
s2 <- option "" (char ':' >> ncName)
return ( if null s2
then (s2, s1)
else (s1, s2)
)
nmtoken :: XParser s String
nmtoken
= try (many1 xmlNameChar)
> "Nmtoken"
names :: XParser s [String]
names
= sepBy1 name sPace
nmtokens :: XParser s [String]
nmtokens
= sepBy1 nmtoken sPace
-- ------------------------------------------------------------
--
-- Literals (2.3)
singleChar :: String -> XParser s Char
singleChar notAllowed
= satisfy (\ c -> isXmlCharCR c && c `notElem` notAllowed)
<|>
xmlCRLFChar
singleChars :: String -> XParser s String
singleChars notAllowed
= many1 (singleChar notAllowed)
entityValue :: XParser s String
entityValue
= ( do
v <- entityValueDQ
return ("\"" ++ v ++ "\"")
)
<|>
( do
v <- entityValueSQ
return ("'" ++ v ++ "'")
)
> "entity value (in quotes)"
entityValueDQ :: XParser s String
entityValueDQ
= between dq dq (concRes $ many $ attrChar "&\"")
entityValueSQ :: XParser s String
entityValueSQ
= between sq sq (concRes $ many $ attrChar "&\'")
attrValue :: XParser s String
attrValue
= ( do
v <- attrValueDQ
return ("\"" ++ v ++ "\"")
)
<|>
( do
v <- attrValueSQ
return ("'" ++ v ++ "'")
)
> "attribute value (in quotes)"
attrValueDQ :: XParser s String
attrValueDQ
= between dq dq (concRes $ many $ attrChar "<&\"")
attrValueSQ :: XParser s String
attrValueSQ
= between sq sq (concRes $ many $ attrChar "<&\'")
attrChar :: String -> XParser s String
attrChar notAllowed
= reference
<|>
mkList (singleChar notAllowed)
> ("legal attribute or entity character or reference (not allowed: " ++ show notAllowed ++ " )")
systemLiteral :: XParser s String
systemLiteral
= between dq dq (many $ noneOf "\"")
<|>
between sq sq (many $ noneOf "\'")
> "system literal (in quotes)"
pubidLiteral :: XParser s String
pubidLiteral
= between dq dq (many $ pubidChar "\'")
<|>
between sq sq (many $ pubidChar "")
> "pubid literal (in quotes)"
where
pubidChar :: String -> XParser s Char
pubidChar quoteChars
= asciiLetter
<|>
digit
<|>
oneOf " \r\n" -- no "\t" allowed, so xmlSpaceChar parser not used
<|>
oneOf "-()+,./:=?;!*#@$_%"
<|>
oneOf quoteChars
-- ------------------------------------------------------------
--
-- Character and Entity References (4.1)
reference :: XParser s String
reference
= ( do
i <- charRef
return ("" ++ show i ++ ";")
)
<|>
( do
n <- entityRef
return ("&" ++ n ++ ";")
)
checkCharRef :: Int -> XParser s Int
checkCharRef i
= if ( i <= fromEnum (maxBound::Char)
&& isXmlChar (toEnum i)
)
then return i
else unexpected ("illegal value in character reference: " ++ intToCharRef i ++ " , in hex: " ++ intToCharRefHex i)
charRef :: XParser s Int
charRef
= do
checkString ""
d <- many1 hexDigit
semi
checkCharRef (hexStringToInt d)
<|>
do
checkString ""
d <- many1 digit
semi
checkCharRef (decimalStringToInt d)
> "character reference"
entityRef :: XParser s String
entityRef
= do
amp
n <- name
semi
return n
> "entity reference"
peReference :: XParser s String
peReference
= try ( do
_ <- char '%'
n <- name
semi
return n
)
> "parameter-entity reference"
-- ------------------------------------------------------------
--
-- 4.3
encName :: XParser s String
encName
= do
c <- asciiLetter
r <- many (asciiLetter <|> digit <|> oneOf "._-")
return (c:r)
versionNum :: XParser s String
versionNum
= many1 xmlNameChar
-- ------------------------------------------------------------
--
-- keywords
keyword :: String -> XParser s String
keyword kw
= try ( do
n <- name
if n == kw
then return n
else unexpected n
)
> kw
keywords :: [String] -> XParser s String
keywords
= foldr1 (<|>) . map keyword
-- ------------------------------------------------------------
--
-- parser for quoted attribute values
quoted :: XParser s a -> XParser s a
quoted p
= between dq dq p
<|>
between sq sq p
-- ------------------------------------------------------------
--
-- simple char parsers
dq, sq, lt, gt, semi, amp :: XParser s ()
dq = char '\"' >> return ()
sq = char '\'' >> return ()
lt = char '<' >> return ()
gt = char '>' >> return ()
semi = char ';' >> return ()
amp = char '&' >> return ()
{-# INLINE dq #-}
{-# INLINE sq #-}
{-# INLINE lt #-}
{-# INLINE gt #-}
{-# INLINE semi #-}
{-# INLINE amp #-}
separator :: Char -> XParser s ()
separator c
= do
_ <- try ( do
skipS0
char c
)
skipS0
> [c]
bar, comma, eq, lpar, rpar :: XParser s ()
bar = separator '|'
comma = separator ','
eq = separator '='
{-# INLINE bar #-}
{-# INLINE comma #-}
{-# INLINE eq #-}
lpar = char '(' >> skipS0
{-# INLINE lpar #-}
rpar = skipS0 >> char ')' >> return ()
{-# INLINE rpar #-}
checkString :: String -> XParser s ()
checkString s
= try $ string s >> return ()
{-# INLINE checkString #-}
-- ------------------------------------------------------------
--
-- all chars but not a special substring
allBut :: (XParser s Char -> XParser s String) -> String -> XParser s String
allBut p str
= allBut1 p (const True) str
allBut1 :: (XParser s Char -> XParser s String) -> (Char -> Bool) -> String -> XParser s String
allBut1 p prd (c:rest)
= p ( satisfy (\ x -> isXmlCharCR x && prd x && not (x == c) )
<|>
xmlCRLFChar
<|>
try ( char c
>>
notFollowedBy (try (string rest) >> return c)
>>
return c
)
)
allBut1 _p _prd str
= error ("allBut1 _ _ " ++ show str ++ " is undefined")
-- ------------------------------------------------------------
--
-- concatenate parse results
concRes :: XParser s [[a]] -> XParser s [a]
concRes p
= do
sl <- p
return (concat sl)
mkList :: XParser s a -> XParser s [a]
mkList p
= do
r <- p
return [r]
-- ------------------------------------------------------------
--
-- token parsers returning XmlTrees
--
-- ------------------------------------------------------------
--
-- Literals (2.3)
nameT :: XParser s XmlTree
nameT
= do
n <- name
return (mkDTDElem' NAME [(a_name, n)] [])
nmtokenT :: XParser s XmlTree
nmtokenT
= do
n <- nmtoken
return (mkDTDElem' NAME [(a_name, n)] [])
entityValueT :: XParser s XmlTrees
entityValueT
= do
sl <- between dq dq (entityTokensT "%&\"")
return sl
<|>
do
sl <- between sq sq (entityTokensT "%&\'")
return sl
> "entity value (in quotes)"
entityTokensT :: String -> XParser s XmlTrees
entityTokensT notAllowed
= many (entityCharT notAllowed)
entityCharT :: String -> XParser s XmlTree
entityCharT notAllowed
= peReferenceT
<|>
charRefT
<|>
bypassedEntityRefT
<|>
( do
cs <- many1 (singleChar notAllowed)
return (mkText' cs)
)
attrValueT :: XParser s XmlTrees
attrValueT
= between dq dq (attrValueT' "<&\"")
<|>
between sq sq (attrValueT' "<&\'")
> "attribute value (in quotes)"
attrValueT' :: String -> XParser s XmlTrees
attrValueT' notAllowed
= mergeTextNodes <$> many ( referenceT <|> singleCharsT notAllowed)
singleCharsT :: String -> XParser s XmlTree
singleCharsT notAllowed
= do
cs <- singleChars notAllowed
return (mkText' cs)
-- ------------------------------------------------------------
--
-- Character and Entity References (4.1)
referenceT :: XParser s XmlTree
referenceT
= charRefT
<|>
entityRefT
charRefT :: XParser s XmlTree
charRefT
= do
i <- charRef
return (mkCharRef' i)
entityRefT :: XParser s XmlTree
entityRefT
= do
n <- entityRef
return $! (maybe (mkEntityRef' n) mkCharRef' . lookup n $ predefinedXmlEntities)
-- optimization: predefined XML entity refs are converted into equivalent char refs
-- so there is no need for an entitiy substitution phase, if there is no DTD
-- Attention: entityRefT must only be called from within XML/HTML content
-- in DTD parsing this optimization is not allowed because of different semantics
-- of charRefs and entityRefs during substitution of entites in ENTITY definitions
predefinedXmlEntities :: [(String, Int)]
predefinedXmlEntities
= [ ("lt", 60)
, ("gt", 62)
, ("amp", 38)
, ("apos", 39)
, ("quot", 34)
]
bypassedEntityRefT :: XParser s XmlTree
bypassedEntityRefT
= do
n <- entityRef
return $! (mkText' ("&" ++ n ++ ";"))
peReferenceT :: XParser s XmlTree
peReferenceT
= do
r <- peReference
return $! (mkDTDElem' PEREF [(a_peref, r)] [])
-- ------------------------------------------------------------
mergeTextNodes :: XmlTrees -> XmlTrees
mergeTextNodes
= foldr addText []
where
addText :: XmlTree -> XmlTrees -> XmlTrees
addText t []
= [t]
addText t (t1 : ts)
= mergeText t t1 ++ ts
-- ------------------------------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/XMLSchema/DataTypeLibW3CNames.hs 0000644 0000000 0000000 00000007301 12752557014 021346 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.XMLSchema.DataTypeLibW3C
Copyright : Copyright (C) 2005-2010 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
Version : $Id$
Datatype library for the W3C XML schema datatypes
-}
-- ------------------------------------------------------------
module Text.XML.HXT.XMLSchema.DataTypeLibW3CNames
where
-- ------------------------------------------------------------
-- | Namespace of the W3C XML schema datatype library
w3cNS :: String
w3cNS = "http://www.w3.org/2001/XMLSchema-datatypes"
xsd_string
, xsd_normalizedString
, xsd_token
, xsd_language
, xsd_NMTOKEN
, xsd_NMTOKENS
, xsd_Name
, xsd_NCName
, xsd_ID
, xsd_IDREF
, xsd_IDREFS
, xsd_ENTITY
, xsd_ENTITIES
, xsd_anyURI
, xsd_QName
, xsd_NOTATION
, xsd_hexBinary
, xsd_base64Binary
, xsd_decimal
, xsd_integer
, xsd_nonPositiveInteger
, xsd_negativeInteger
, xsd_nonNegativeInteger
, xsd_positiveInteger
, xsd_long
, xsd_int
, xsd_short
, xsd_byte
, xsd_unsignedLong
, xsd_unsignedInt
, xsd_unsignedShort
, xsd_unsignedByte
, xsd_boolean
, xsd_float
, xsd_double
, xsd_time
, xsd_duration
, xsd_date
, xsd_dateTime
, xsd_gDay
, xsd_gMonth
, xsd_gMonthDay
, xsd_gYear
, xsd_gYearMonth :: String
xsd_string = "string"
xsd_normalizedString = "normalizedString"
xsd_token = "token"
xsd_language = "language"
xsd_NMTOKEN = "NMTOKEN"
xsd_NMTOKENS = "NMTOKENS"
xsd_Name = "Name"
xsd_NCName = "NCName"
xsd_ID = "ID"
xsd_IDREF = "IDREF"
xsd_IDREFS = "IDREFS"
xsd_ENTITY = "ENTITY"
xsd_ENTITIES = "ENTITIES"
xsd_anyURI = "anyURI"
xsd_QName = "QName"
xsd_NOTATION = "NOTATION"
xsd_hexBinary = "hexBinary"
xsd_base64Binary = "base64Binary"
xsd_decimal = "decimal"
xsd_integer = "integer"
xsd_nonPositiveInteger = "nonPositiveInteger"
xsd_negativeInteger = "negativeInteger"
xsd_nonNegativeInteger = "nonNegativeInteger"
xsd_positiveInteger = "positiveInteger"
xsd_long = "long"
xsd_int = "int"
xsd_short = "short"
xsd_byte = "byte"
xsd_unsignedLong = "unsignedLong"
xsd_unsignedInt = "unsignedInt"
xsd_unsignedShort = "unsignedShort"
xsd_unsignedByte = "unsignedByte"
xsd_boolean = "boolean"
xsd_float = "float"
xsd_double = "double"
xsd_time = "time"
xsd_duration = "duration"
xsd_date = "date"
xsd_dateTime = "dateTime"
xsd_gDay = "gDay"
xsd_gMonth = "gMonth"
xsd_gMonthDay = "gMonthDay"
xsd_gYear = "gYear"
xsd_gYearMonth = "gYearMonth"
xsd_length
, xsd_maxLength
, xsd_minLength
, xsd_maxExclusive
, xsd_minExclusive
, xsd_maxInclusive
, xsd_minInclusive
, xsd_totalDigits
, xsd_fractionDigits
, xsd_pattern
, xsd_enumeration
, xsd_whiteSpace :: String
xsd_length = "length"
xsd_maxLength = "maxLength"
xsd_minLength = "minLength"
xsd_maxExclusive = "maxExclusive"
xsd_minExclusive = "minExclusive"
xsd_maxInclusive = "maxInclusive"
xsd_minInclusive = "minInclusive"
xsd_totalDigits = "totalDigits"
xsd_fractionDigits = "fractionDigits"
xsd_pattern = "pattern"
xsd_enumeration = "enumeration"
xsd_whiteSpace = "whiteSpace"
-- ----------------------------------------
hxt-9.3.1.22/src/Text/XML/HXT/Version.hs 0000644 0000000 0000000 00000000121 12752557014 015540 0 ustar 00 0000000 0000000 module Text.XML.HXT.Version
where
hxt_version :: String
hxt_version = "9.3.1.15"
hxt-9.3.1.22/LICENSE 0000644 0000000 0000000 00000002120 12752557013 011746 0 ustar 00 0000000 0000000 The MIT License
Copyright (c) 2005 Uwe Schmidt, Martin Schmidt, Torben Kuseler
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
hxt-9.3.1.22/Setup.lhs 0000755 0000000 0000000 00000000157 12752557013 012564 0 ustar 00 0000000 0000000 #!/usr/bin/runhaskell
> module Main where
> import Distribution.Simple
> main :: IO ()
> main = defaultMain
hxt-9.3.1.22/hxt.cabal 0000644 0000000 0000000 00000023277 14025464044 012544 0 ustar 00 0000000 0000000 -- arch-tag: Haskell XML Toolbox main description file
Name: hxt
Version: 9.3.1.22
Synopsis: A collection of tools for processing XML with Haskell.
Description: The Haskell XML Toolbox bases on the ideas of HaXml and HXML,
but introduces a more general approach for processing XML with Haskell.
The Haskell XML Toolbox uses a generic data model for representing XML documents,
including the DTD subset and the document subset, in Haskell.
It contains a validating XML parser, a HTML parser, namespace support,
an XPath expression evaluator, an XSLT library, a RelaxNG schema validator
and funtions for serialization and deserialization of user defined data.
The library makes extensive use of the arrow approach for processing XML.
Since version 9 the toolbox is partitioned into various (sub-)packages.
This package contains the core functionality,
hxt-curl, hxt-tagsoup, hxt-relaxng, hxt-xpath, hxt-xslt,
hxt-regex-xmlschema contain the extensions.
hxt-unicode contains encoding and decoding functions,
hxt-charproperties char properties for unicode and XML.
Changes from 9.3.1.21: ghc-9.0 compatibility
.
Changes from 9.3.1.20: ghc 8.10 and 9.0 compatibility, tuple picker up to 24-tuples, Either instance for xpickle
.
Changes from 9.3.1.19: ghc-8.8.2 compatibility
.
Changes from 9.3.1.15: Bug in quoting PI instructions in showXmlTrees fixed
.
Changes from 9.3.1.14: For ghc-7.10 network-uri is automatically selected
.
Changes from 9.3.1.13: ghc-7.10 compatibility
.
Changes from 9.3.1.12: Bug when unpickling an empty attribute value removed
.
Changes from 9.3.1.11: Bug fix in haddock comments
.
Changes from 9.3.1.10: Bug in DTD validation, space and time leak in delta removed
.
Changes from 9.3.1.9: lower bound of mtl dependency lowered to 2.0.1
.
Changes from 9.3.1.8: Bug in hread removed
.
Changes from 9.3.1.7: Foldable and Traversable instances for NTree added
Control.Except used instead of deprecated Control.Error
.
Changes from 9.3.1.6: canonicalize added in hread and hreadDoc
.
Changes from 9.3.1.4: conditionally (no default)
dependency from networt changed to network-uri with flag "network-uri"
.
Changes from 9.3.1.3: warnings from ghc-7.8.1 removed
.
Changes from 9.3.1.2: https as protocol added
.
Changes from 9.3.1.1: new parser xreadDoc
.
Changes from 9.3.1.0: in readString all input decoding switched off
.
Changes from 9.3.0.1: lower bound for network set to be >= 2.4
.
Changes from 9.3.0: upper bound for network set to be < 2.4
(URI signatures changed in 2.4)
.
Changes from 9.2.2: XMLSchema validation integrated
.
Changes from 9.2.1: user defined mime type handlers added
.
Changes from 9.2.0: New warnings from ghc-7.4 removed
License: MIT
License-file: LICENSE
Author: Uwe Schmidt, Martin Schmidt, Torben Kuseler
Maintainer: Uwe Schmidt
Stability: Stable
Category: XML
Homepage: https://github.com/UweSchmidt/hxt
Copyright: Copyright (c) 2005-2019 Uwe Schmidt
Build-type: Simple
Cabal-version: >=1.10
extra-source-files:
examples/arrows/absurls/AbsURIs.hs
examples/arrows/absurls/lousy.html
examples/arrows/absurls/Makefile
examples/arrows/absurls/ProcessDocument.hs
examples/arrows/AGentleIntroductionToHXT/.ghci
examples/arrows/AGentleIntroductionToHXT/Makefile
examples/arrows/AGentleIntroductionToHXT/PicklerExample/Baseball.hs
examples/arrows/AGentleIntroductionToHXT/PicklerExample/Makefile
examples/arrows/AGentleIntroductionToHXT/PicklerExample/new-simple2.xml
examples/arrows/AGentleIntroductionToHXT/PicklerExample/simple2.xml
examples/arrows/AGentleIntroductionToHXT/SimpleExamples.hs
examples/arrows/dtd2hxt/DTDtoHXT.hs
examples/arrows/dtd2hxt/.ghci
examples/arrows/dtd2hxt/Makefile
examples/arrows/HelloWorld/bye.xml
examples/arrows/HelloWorld/HelloWorld.hs
examples/arrows/HelloWorld/hello.xml
examples/arrows/HelloWorld/Makefile
examples/arrows/HelloWorld/Mini.hs
examples/arrows/hparser/emptyElements.html
examples/arrows/hparser/example1.xml
examples/arrows/hparser/example1CRLF.xml
examples/arrows/hparser/HXmlParser.hs
examples/arrows/hparser/invalid1.xml
examples/arrows/hparser/invalid2.rng
examples/arrows/hparser/invalid3.rng
examples/arrows/hparser/invalid.xml
examples/arrows/hparser/lousy.html
examples/arrows/hparser/Makefile
examples/arrows/hparser/namespace0.xml
examples/arrows/hparser/namespace1.xml
examples/arrows/hparser/valid1.rng
examples/arrows/hparser/valid1.xml
examples/arrows/performance/GenDoc.hs
examples/arrows/performance/Makefile
examples/arrows/pickle/Makefile
examples/arrows/pickle/PickleTest.hs
examples/xhtml/tmp.xml
examples/xhtml/xhtml1-frameset.dtd
examples/xhtml/xhtml1-strict.dtd
examples/xhtml/xhtml1-transitional.dtd
examples/xhtml/xhtml-lat1.ent
examples/xhtml/xhtml-special.ent
examples/xhtml/xhtml-symbol.ent
examples/xhtml/xhtml.xml
flag network-uri
description: Get Network.URI from the network-uri package,
with ghc < 7.10 default is False,
with ghc >= 7.10 default is True
default: False
flag profile
description: turn profiling on
default: False
library
exposed-modules:
Control.Arrow.ArrowExc,
Control.Arrow.ArrowIO,
Control.Arrow.ArrowIf,
Control.Arrow.ArrowList,
Control.Arrow.ArrowNF,
Control.Arrow.ArrowNavigatableTree,
Control.Arrow.ArrowState,
Control.Arrow.ArrowTree,
Control.Arrow.IOListArrow,
Control.Arrow.IOStateListArrow,
Control.Arrow.ListArrow,
Control.Arrow.ListArrows,
Control.Arrow.NTreeEdit,
Control.Arrow.StateListArrow,
Control.FlatSeq,
Data.AssocList,
Data.Atom,
Data.Function.Selector,
Data.Tree.Class,
Data.Tree.NTree.TypeDefs,
Data.Tree.NTree.Edit,
Data.Tree.NTree.Zipper.TypeDefs,
Data.Tree.NavigatableTree.Class,
Data.Tree.NavigatableTree.XPathAxis,
Text.XML.HXT.Arrow.Binary,
Text.XML.HXT.Arrow.DTDProcessing,
Text.XML.HXT.Arrow.DocumentInput,
Text.XML.HXT.Arrow.DocumentOutput,
Text.XML.HXT.Arrow.Edit,
Text.XML.HXT.Arrow.GeneralEntitySubstitution,
Text.XML.HXT.Arrow.Namespace,
Text.XML.HXT.Arrow.ParserInterface,
Text.XML.HXT.Arrow.Pickle,
Text.XML.HXT.Arrow.Pickle.DTD,
Text.XML.HXT.Arrow.Pickle.Schema,
Text.XML.HXT.Arrow.Pickle.Xml,
Text.XML.HXT.Arrow.ProcessDocument,
Text.XML.HXT.Arrow.ReadDocument,
Text.XML.HXT.Arrow.WriteDocument,
Text.XML.HXT.Arrow.XmlArrow,
Text.XML.HXT.Arrow.XmlOptions,
Text.XML.HXT.Arrow.XmlRegex,
Text.XML.HXT.Arrow.XmlState,
Text.XML.HXT.Arrow.XmlState.ErrorHandling,
Text.XML.HXT.Arrow.XmlState.MimeTypeTable,
Text.XML.HXT.Arrow.XmlState.RunIOStateArrow,
Text.XML.HXT.Arrow.XmlState.TraceHandling,
Text.XML.HXT.Arrow.XmlState.TypeDefs,
Text.XML.HXT.Arrow.XmlState.URIHandling,
Text.XML.HXT.Arrow.XmlState.SystemConfig,
Text.XML.HXT.Core,
Text.XML.HXT.DOM.FormatXmlTree,
Text.XML.HXT.DOM.Interface,
Text.XML.HXT.DOM.MimeTypeDefaults,
Text.XML.HXT.DOM.MimeTypes,
Text.XML.HXT.DOM.QualifiedName,
Text.XML.HXT.DOM.ShowXml,
Text.XML.HXT.DOM.TypeDefs,
Text.XML.HXT.DOM.Util,
Text.XML.HXT.DOM.XmlKeywords,
Text.XML.HXT.DOM.XmlNode,
Text.XML.HXT.DTDValidation.AttributeValueValidation,
Text.XML.HXT.DTDValidation.DTDValidation,
Text.XML.HXT.DTDValidation.DocTransformation,
Text.XML.HXT.DTDValidation.DocValidation,
Text.XML.HXT.DTDValidation.IdValidation,
Text.XML.HXT.DTDValidation.RE,
Text.XML.HXT.DTDValidation.TypeDefs,
Text.XML.HXT.DTDValidation.Validation,
Text.XML.HXT.DTDValidation.XmlRE,
Text.XML.HXT.IO.GetFILE,
Text.XML.HXT.Parser.HtmlParsec,
Text.XML.HXT.Parser.ProtocolHandlerUtil,
Text.XML.HXT.Parser.XhtmlEntities,
Text.XML.HXT.Parser.XmlCharParser,
Text.XML.HXT.Parser.XmlDTDParser,
Text.XML.HXT.Parser.XmlDTDTokenParser,
Text.XML.HXT.Parser.XmlEntities,
Text.XML.HXT.Parser.XmlParsec,
Text.XML.HXT.Parser.XmlTokenParser,
Text.XML.HXT.XMLSchema.DataTypeLibW3CNames,
Text.XML.HXT.Version
default-language: Haskell2010
hs-source-dirs: src
ghc-options: -Wall -fwarn-tabs
if flag(profile)
ghc-prof-options: -caf-all
default-extensions: MultiParamTypeClasses DeriveDataTypeable FunctionalDependencies FlexibleInstances CPP
build-depends: base >= 4 && < 5,
containers >= 0.2,
directory >= 1,
filepath >= 1,
parsec >= 2.1 && < 4,
mtl >= 2.0.1 && < 3,
deepseq >= 1.1,
bytestring >= 0.9,
binary >= 0.5,
hxt-charproperties >= 9.1,
hxt-unicode >= 9.0.1,
hxt-regex-xmlschema >= 9.2
if flag(network-uri)
build-depends: network-uri >= 2.6
else
if impl(ghc >= 7.10)
build-depends: network-uri >= 2.6
else
build-depends: network >= 2.4 && < 2.6
Source-Repository head
Type: git
Location: git://github.com/UweSchmidt/hxt.git
hxt-9.3.1.22/examples/arrows/absurls/AbsURIs.hs 0000644 0000000 0000000 00000007251 12752557013 017365 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : AbsURIs
Copyright : Copyright (C) 2005 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt
Maintainer : uwe@fh-wedel.de
Stability : experimental
Portability: portable
AbsURIs - Conversion references into absolute URIs in HTML pages
The commandline interface
-}
-- ------------------------------------------------------------
module Main
where
import Text.XML.HXT.Core -- import all stuff for parsing, validating, and transforming XML
import System.IO -- import the IO and commandline option stuff
import System.Environment
import System.Console.GetOpt
import System.Exit
import ProcessDocument
-- ------------------------------------------------------------
-- |
-- the main program of the Haskell XML Validating Parser
main :: IO ()
main
= do
argv <- getArgs -- get the commandline arguments
(al, src) <- cmdlineOpts argv -- and evaluate them, return a key-value list
[rc] <- runX (parser al src) -- run the parser arrow
exitProg (rc >= c_err) -- set return code and terminate
-- ------------------------------------------------------------
exitProg :: Bool -> IO a
exitProg True = exitWith (ExitFailure 1)
exitProg False = exitWith ExitSuccess
-- ------------------------------------------------------------
-- |
-- the /real/ main program
--
-- get wellformed document, validates document, propagates and check namespaces
-- and controls output
parser :: SysConfigList -> String -> IOSArrow b Int
parser config src
= configSysVars config -- set all global config options
>>>
readDocument [withParseHTML yes] src -- use HTML parser
>>>
traceMsg 1 "start processing"
>>>
processDocument
>>>
traceMsg 1 "processing finished"
>>>
traceSource
>>>
traceTree
>>>
( writeDocument [] $< getSysAttr "output-file" )
>>>
getErrStatus
-- ------------------------------------------------------------
--
-- the options definition part
-- see doc for System.Console.GetOpt
progName :: String
progName = "AbsURIs"
options :: [OptDescr SysConfig]
options
= generalOptions
++
inputOptions
++
[ Option "f" ["output-file"] (ReqArg (withSysAttr "output-file") "FILE")
"output file for resulting document (default: stdout)"
]
++
outputOptions
++
showOptions
usage :: [String] -> IO a
usage errl
| null errl
= do
hPutStrLn stdout use
exitProg False
| otherwise
= do
hPutStrLn stderr (concat errl ++ "\n" ++ use)
exitProg True
where
header = progName ++ " - Convert all references in an HTML document into absolute URIs\n\n" ++
"Usage: " ++ progName ++ " [OPTION...] [URI or FILE]"
use = usageInfo header options
cmdlineOpts :: [String] -> IO (SysConfigList, String)
cmdlineOpts argv
= case (getOpt Permute options argv) of
(scfg,n,[])
-> do
sa <- src n
help (getConfigAttr a_help scfg) sa
return (scfg, sa)
(_,_,errs)
-> usage errs
where
src [] = return []
src [uri] = return uri
src _ = usage ["only one input uri or file allowed\n"]
help "1" _ = usage []
help _ [] = usage ["no input uri or file given\n"]
help _ _ = return ()
-- ------------------------------------------------------------
hxt-9.3.1.22/examples/arrows/absurls/lousy.html 0000644 0000000 0000000 00000001206 12752557013 017614 0 ustar 00 0000000 0000000
A HTML Documents with some Errors
HTML
html
some text and a &xxx;
1 2 3
Uwe
Schmidt
Last modified: Mon May 12 13:11:29 CEST 2003
hxt-9.3.1.22/examples/arrows/absurls/Makefile 0000644 0000000 0000000 00000001700 12752557013 017212 0 ustar 00 0000000 0000000 # $Id: Makefile,v 1.1 2005/05/12 16:41:38 hxml Exp $
HXT_HOME = ../../..
PKGFLAGS =
GHCFLAGS = -Wall -O2
GHC = ghc $(GHCFLAGS) $(PKGFLAGS)
DIST = $(HXT_HOME)/dist/examples/arrows
DIST_DIR = $(DIST)/absurls
prog = ./AbsURIs
all : $(prog)
AbsURIs : AbsURIs.hs ProcessDocument.hs
$(GHC) --make -o $@ $<
force :
$(GHC) --make -o $(prog) $(prog).hs
test : $(prog)
@echo "===> run a few simple test cases"
$(MAKE) test0
EX = ./lousy.html
test0 :
@echo "===> the source of a lousy html document" ; echo ; sleep 2
cat $(EX)
@sleep 2 ; echo ; echo "===> all refs (href, src attributes) are transformed into absolute URIs with respect to the base element" ; echo ; sleep 2
$(prog) --trace=0 --encoding=ISO-8859-1 --output-encoding=ISO-8859-1 --indent --do-not-issue-warnings $(EX)
@echo
dist :
[ -d $(DIST_DIR) ] || mkdir -p $(DIST_DIR)
cp $(EX) Makefile $(prog).hs ProcessDocument.hs $(DIST_DIR)
clean :
rm -f $(prog) *.o *.hi
hxt-9.3.1.22/examples/arrows/absurls/ProcessDocument.hs 0000644 0000000 0000000 00000004416 12752557013 021232 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : ProcessDocument
Copyright : Copyright (C) 2005 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt
Maintainer : uwe@fh-wedel.de
Stability : experimental
Portability: portable
AbsURIs - Conversion references into absolute URIs in HTML pages
The REAL processing functions
-}
-- ------------------------------------------------------------
module ProcessDocument
( processDocument )
where
import Text.XML.HXT.Core -- import all stuff for parsing, validating, and transforming XML
import Data.Maybe
-- ------------------------------------------------------------
-- simple example of a processing arrow
processDocument :: IOSArrow XmlTree XmlTree
processDocument
= processChildren (mkAbs `when` isElem)
where
mkAbs = mkAbsURIs $< compBase
compBase :: IOSArrow XmlTree String
compBase
= single searchBaseElem -- search in element (only for wrong input: make the arrow deterministic)
`orElse`
getBaseURI -- use document base
where
searchBaseElem
= hasName "html"
>>> getChildren
>>> hasName "head"
>>> getChildren
>>> hasName "base"
>>> getAttrValue "href"
>>> mkAbsURI
mkAbsURIs :: String -> IOSArrow XmlTree XmlTree
mkAbsURIs base
= processTopDown editURIs -- edit all refs in documnt
where
-- build the edit filter from the list of element-attribute names
editURIs
= seqA . map (uncurry mkAbs) $ hrefAttrs
-- HTML elements and attributes, that contain references (possibly not yet complete)
hrefAttrs
= [ ("a", "href" )
, ("img", "src" )
, ("frame", "src" )
, ("iframe", "src" )
, ("link", "href" )
, ("script", "src" )
]
-- change the reference in attribute attrName of element elemName
mkAbs elemName attrName
= processAttrl ( changeAttrValue (mkAbsURIString base)
`when`
hasName attrName
)
`when`
hasName elemName
-- | compute an absolute URI, if not possible leave URI unchanged
mkAbsURIString :: String -> String -> String
mkAbsURIString base uri
= fromMaybe uri . expandURIString uri $ base
-- ------------------------------------------------------------
hxt-9.3.1.22/examples/arrows/AGentleIntroductionToHXT/.ghci 0000644 0000000 0000000 00000000103 12752557013 021616 0 ustar 00 0000000 0000000 :set -i../../../src
:set -Wall -fglasgow-exts
:load SimpleExamples
hxt-9.3.1.22/examples/arrows/AGentleIntroductionToHXT/Makefile 0000644 0000000 0000000 00000002422 12752557013 022351 0 ustar 00 0000000 0000000 # $Id: Makefile,v 1.2 2006/11/17 17:16:24 hxml Exp $
#
# hello world application of Haskell XML Toolbox
EXAMPLES = PicklerExample
HXT_HOME = ../../..
PKGFLAGS =
GHCFLAGS = -W -O2
GHC = ghc $(GHCFLAGS) $(PKGFLAGS)
DIST = $(HXT_HOME)/dist/examples/arrows
DIST_DIR = $(DIST)/AGentleIntroductionToHXT
src = SimpleExamples.hs
prog = ./SimpleExamples
tests = \
selectAllText \
selectAllTextAndAltValues \
selectAllTextAndRealAltValues \
addRefIcon \
helloWorld \
helloWorld2 \
imageTable \
imageTable0 \
imageTable1 \
imageTable2 \
imageTable3 \
toAbsHRefs \
toAbsRefs \
toAbsRefs1
all :
$(MAKE) $(prog)
$(foreach i,$(EXAMPLES),$(MAKE) -C $i PKGFLAGS="$(PKGFLAGS)" $@ ;)
force :
$(MAKE) distclean all
test : $(prog)
$(foreach op,$(tests),echo $(prog) $(op) "http://www.haskell.org/" "-" ; $(prog) $(op) "http://www.haskell.org/" "-" ;)
$(foreach i,$(EXAMPLES),$(MAKE) -C $i $@ ;)
dist :
[ -d $(DIST_DIR) ] || mkdir -p $(DIST_DIR)
$(foreach i,$(EXAMPLES),$(MAKE) -C $i dist DIST=../$(DIST_DIR) ;)
cp $(src) Makefile $(DIST_DIR)
clean :
$(foreach i,$(EXAMPLES),$(MAKE) -C $i $@ ;)
rm -f *.o *.hi
distclean :
$(MAKE) clean
rm -f mini hello
.PHONY : all test dist clean distclean force
$(prog) : $(src)
$(GHC) --make -o $@ $<
hxt-9.3.1.22/examples/arrows/AGentleIntroductionToHXT/PicklerExample/Baseball.hs 0000644 0000000 0000000 00000025616 12752557013 025671 0 ustar 00 0000000 0000000 {- |
Example for usage of pickler functions
to de-/serialise from/to XML
Example data is taken from haskell wiki
http://www.haskell.org/haskellwiki/HXT/Practical/Simple2
-}
module Main
where
import Data.Map (Map, fromList, toList)
import Text.XML.HXT.Core
-- Example data taken from:
-- http://www.ibiblio.org/xml/books/bible/examples/05/5-1.xml
-- ------------------------------------------------------------
-- the data modell
data Season = Season
{ sYear :: Int
, sLeagues :: Leagues
}
deriving (Show, Eq)
type Leagues = Map String Divisions
type Divisions = Map String [Team]
data Team = Team
{ teamName :: String
, city :: String
, players :: [Player]
}
deriving (Show, Eq)
data Player = Player
{ firstName :: String
, lastName :: String
, position :: String
, atBats :: Maybe Int
, hits :: Maybe Int
, era :: Maybe Float
}
deriving (Show, Eq)
-- ------------------------------------------------------------
-- the pickler instance declarations
-- in this case just for uniform naming
instance XmlPickler Season where
xpickle = xpSeason
instance XmlPickler Team where
xpickle = xpTeam
instance XmlPickler Player where
xpickle = xpPlayer
-- ------------------------------------------------------------
-- for every data type there is a pickler
-- the XML root element
xpSeason :: PU Season
xpSeason
= xpElem "SEASON" $
xpWrap ( uncurry Season
, \ s -> (sYear s, sLeagues s)) $
xpPair (xpAttr "YEAR" xpickle) xpLeagues
xpLeagues :: PU Leagues
xpLeagues
= xpWrap ( fromList
, toList ) $
xpList $
xpElem "LEAGUE" $
xpPair (xpAttr "NAME" xpText) xpDivisions
xpDivisions :: PU Divisions
xpDivisions
= xpWrap ( fromList
, toList
) $
xpList $
xpElem "DIVISION" $
xpPair (xpAttr "NAME" xpText)
xpickle
xpTeam :: PU Team
xpTeam
= xpElem "TEAM" $
xpWrap ( uncurry3 Team
, \ t -> (teamName t, city t, players t)
) $
xpTriple (xpAttr "NAME" xpText)
(xpAttr "CITY" xpText)
(xpList xpickle)
xpPlayer :: PU Player
xpPlayer
= xpElem "PLAYER" $
xpWrap ( \ ((f,l,p,a,h,e)) -> Player f l p a h e
, \ t -> (firstName t, lastName t
, position t, atBats t
, hits t, era t
)
) $
xp6Tuple (xpAttr "GIVEN_NAME" xpText)
(xpAttr "SURNAME" xpText)
(xpAttr "POSITION" xpText)
(xpOption (xpAttr "AT_BATS" xpickle))
(xpOption (xpAttr "HITS" xpickle))
(xpOption (xpAttr "ERA" xpPrim ))
-- ------------------------------------------------------------
-- a simple pickle/unpickle application
main :: IO ()
main
= do
runX ( xunpickleDocument xpSeason [ withValidate no
, withTrace 1
, withRemoveWS yes
, withPreserveComment no
] "simple2.xml"
>>>
processSeason
>>>
xpickleDocument xpSeason [ withIndent yes
] "new-simple2.xml"
)
return ()
-- the dummy for processing the unpickled data
processSeason :: IOSArrow Season Season
processSeason
= arrIO ( \ x -> do {print x ; return x})
-- ------------------------------------------------------------
-- the internal data of "simple2.xml"
season1998 :: Season
season1998
= Season
{ sYear = 1998
, sLeagues = fromList
[ ( "American League"
, fromList
[ ( "Central"
, [ Team { teamName = "White Sox"
, city = "Chicago"
, players = []}
, Team { teamName = "Royals"
, city = "Kansas City"
, players = []}
, Team { teamName = "Tigers"
, city = "Detroit"
, players = []}
, Team { teamName = "Indians"
, city = "Cleveland"
, players = []}
, Team { teamName = "Twins"
, city = "Minnesota"
, players = []}
])
, ( "East"
, [ Team { teamName = "Orioles"
, city = "Baltimore"
, players = []}
, Team { teamName = "Red Sox"
, city = "Boston"
, players = []}
, Team { teamName = "Yankees"
, city = "New York"
, players = []}
, Team { teamName = "Devil Rays"
, city = "Tampa Bay"
, players = []}
, Team { teamName = "Blue Jays"
, city = "Toronto"
, players = []}
])
, ( "West"
, [ Team { teamName = "Angels"
, city = "Anaheim"
, players = []}
, Team { teamName = "Athletics"
, city = "Oakland"
, players = []}
, Team { teamName = "Mariners"
, city = "Seattle"
, players = []}
, Team { teamName = "Rangers"
, city = "Texas"
, players = []}
])
])
, ( "National League"
, fromList
[ ( "Central"
, [ Team { teamName = "Cubs"
, city = "Chicago"
, players = []}
, Team { teamName = "Reds"
, city = "Cincinnati"
, players = []}
, Team { teamName = "Astros"
, city = "Houston"
, players = []}
, Team { teamName = "Brewers"
, city = "Milwaukee"
, players = []}
, Team { teamName = "Pirates"
, city = "Pittsburgh"
, players = []}
, Team { teamName = "Cardinals"
, city = "St. Louis"
, players = []}
])
, ( "East"
, [ Team { teamName = "Braves"
, city = "Atlanta"
, players =
[ Player { firstName = "Marty"
, lastName = "Malloy"
, position = "Second Base"
, atBats = Just 28
, hits = Just 5
, era = Nothing}
, Player { firstName = "Ozzie"
, lastName = "Guillen"
, position = "Shortstop"
, atBats = Just 264
, hits = Just 73
, era = Nothing}
, Player { firstName = "Danny"
, lastName = "Bautista"
, position = "Outfield"
, atBats = Just 144
, hits = Just 36
, era = Nothing}
, Player { firstName = "Gerald"
, lastName = "Williams"
, position = "Outfield"
, atBats = Just 266
, hits = Just 81
, era = Nothing}
, Player { firstName = "Tom"
, lastName = "Glavine"
, position = "Starting Pitcher"
, atBats = Nothing
, hits = Nothing
, era = Just 2.47}
, Player { firstName = "Javier"
, lastName = "Lopez"
, position = "Catcher"
, atBats = Just 489
, hits = Just 139
, era = Nothing}
, Player { firstName = "Ryan"
, lastName = "Klesko"
, position = "Outfield"
, atBats = Just 427
, hits = Just 117
, era = Nothing}
, Player { firstName = "Andres"
, lastName = "Galarraga"
, position = "First Base"
, atBats = Just 555
, hits = Just 169
, era = Nothing}
, Player { firstName = "Wes"
, lastName = "Helms"
, position = "Third Base"
, atBats = Just 13
, hits = Just 4
, era = Nothing}
]}
, Team { teamName = "Marlins"
, city = "Florida"
, players = []}
, Team { teamName = "Expos"
, city = "Montreal"
, players = []}
, Team { teamName = "Mets"
, city = "New York"
, players = []}
, Team { teamName = "Phillies"
, city = "Philadelphia"
, players = []}
])
, ( "West"
, [ Team { teamName = "Diamondbacks"
, city = "Arizona"
, players = []}
, Team { teamName = "Rockies"
, city = "Colorado"
, players = []}
, Team { teamName = "Dodgers"
, city = "Los Angeles"
, players = []}
, Team { teamName = "Padres"
, city = "San Diego"
, players = []}
, Team { teamName = "Giants"
, city = "San Francisco"
, players = []}
])
])
]
}
-- ------------------------------------------------------------
hxt-9.3.1.22/examples/arrows/AGentleIntroductionToHXT/PicklerExample/Makefile 0000644 0000000 0000000 00000001274 12752557013 025262 0 ustar 00 0000000 0000000 # $Id: Makefile,v 1.2 2006/11/17 17:16:24 hxml Exp $
#
# hello world application of Haskell XML Toolbox
HXT_HOME = ../../../..
PKGFLAGS =
GHCFLAGS = -W -O2
GHC = ghc $(GHCFLAGS) $(PKGFLAGS)
DIST = $(HXT_HOME)/dist/examples/arrows/AGentleIntroductionToHXT
DIST_DIR = $(DIST)/PicklerExample
src = Baseball.hs
prog = ./Baseball
all :
$(MAKE) $(prog)
force :
$(MAKE) distclean all
test : $(prog)
$(prog)
dist :
[ -d $(DIST_DIR) ] || mkdir -p $(DIST_DIR)
cp $(src) simple2.xml new-simple2.xml Makefile $(DIST_DIR)
clean :
rm -f *.o *.hi
distclean :
$(MAKE) clean
rm -f $(prog)
.PHONY : all test dist clean distclean force
$(prog) : $(src)
$(GHC) --make -o $@ $<
hxt-9.3.1.22/examples/arrows/AGentleIntroductionToHXT/PicklerExample/new-simple2.xml 0000644 0000000 0000000 00000005176 12752557013 026513 0 ustar 00 0000000 0000000
hxt-9.3.1.22/examples/arrows/AGentleIntroductionToHXT/PicklerExample/simple2.xml 0000644 0000000 0000000 00000011355 12752557013 025720 0 ustar 00 0000000 0000000
hxt-9.3.1.22/examples/arrows/AGentleIntroductionToHXT/SimpleExamples.hs 0000644 0000000 0000000 00000024353 12752557013 024204 0 ustar 00 0000000 0000000 {- |
The examples from the HXT tutorial at haskell.org "http://www.haskell.org/haskellwiki/HXT"
-}
module Main
where
import Text.XML.HXT.Core -- basic HXT stuff
import Text.XML.HXT.XPath -- additional XPath functions
import Text.XML.HXT.Curl -- Curl HTTP handler
import Data.List -- auxiliary functions
import Data.Maybe
import System.Environment
import System.Console.GetOpt()
import System.Exit
-- | call this program with 3 arguments,
-- the function name, see list of examples,
-- the input URL or file
-- and the output file, - for stdout
--
-- example: SimpleExamples selectAllText http://www.haskell.org/ -
main :: IO ()
main
= do
argv <- getArgs
(al, fct, src, dst) <- cmdlineOpts argv
[rc] <- runX (application al fct src dst)
if rc >= c_err
then exitWith (ExitFailure 1)
else exitWith ExitSuccess
application :: SysConfigList -> String -> String -> String -> IOSArrow b Int
application config fct src dst
= configSysVars config -- set all global config options
>>>
readDocument [] src
>>>
processChildren (processRootElement fct `when` isElem)
>>>
writeDocument [ withIndent yes,
withOutputEncoding isoLatin1
]
dst
>>>
getErrStatus
-- | the dummy for the boring stuff of option evaluation,
-- usually done with 'System.Console.GetOpt'
cmdlineOpts :: [String] -> IO (SysConfigList, String, String, String)
cmdlineOpts argv
= return ( [ withValidate no
, withParseHTML yes
, withCurl []
]
, argv!!0
, argv!!1
, argv!!2
)
-- | the processing examples
examples :: [ (String, IOSArrow XmlTree XmlTree) ]
examples
= [ ( "selectAllText", selectAllText )
, ( "selectAllTextAndAltValues", selectAllTextAndAltValues )
, ( "selectAllTextAndRealAltValues", selectAllTextAndRealAltValues )
, ( "addRefIcon", addRefIcon )
, ( "helloWorld", helloWorld )
, ( "helloWorld2", helloWorld2 )
, ( "imageTable", imageTable )
, ( "imageTable0", imageTable0 )
, ( "imageTable1", imageTable1 )
, ( "imageTable2", imageTable2 )
, ( "imageTable3", imageTable3 )
, ( "toAbsHRefs", toAbsHRefs )
, ( "toAbsRefs", toAbsRefs )
, ( "toAbsRefs1", toAbsRefs1 )
]
processRootElement :: String -> IOSArrow XmlTree XmlTree
processRootElement fct
= fromMaybe this . lookup fct $ examples
-- | selection arrows
selectAllText :: ArrowXml a => a XmlTree XmlTree
selectAllText
= selem "the-plain-text" [ deep isText ] -- create a root element, neccessary for wellformed XML output
selectAllTextAndAltValues :: ArrowXml a => a XmlTree XmlTree
selectAllTextAndAltValues
= selem "the-plain-text"
[ deep
( isText
<+>
( isElem >>> hasName "img"
>>>
getAttrValue "alt"
>>>
mkText
)
)
]
selectAllTextAndRealAltValues :: ArrowXml a => a XmlTree XmlTree
selectAllTextAndRealAltValues
= selem "the-plain-text"
[ deep
( isText
<+>
( isElem >>> hasName "img"
>>>
getAttrValue "alt"
>>>
isA significant
>>>
arr addBrackets
>>>
mkText
)
)
]
where
significant :: String -> Bool
significant = not . all (`elem` " \n\r\t")
addBrackets :: String -> String
addBrackets s
= " [[ " ++ s ++ " ]] "
-- | transformation arrows
addRefIcon :: ArrowXml a => a XmlTree XmlTree
addRefIcon
= processTopDown
( addImg
`when`
isExternalRef
)
where
isExternalRef
= isElem
>>>
hasName "a"
>>>
hasAttr "href"
>>>
getAttrValue "href"
>>>
isA isExtRef
where
isExtRef
= isPrefixOf "http:"
addImg
= replaceChildren
( getChildren
<+>
imgElement
)
imgElement
= mkelem "img"
[ sattr "src" "/icons/ref.png"
, sattr "alt" "external ref"
] []
-- | construction examples
helloWorld :: ArrowXml a => a XmlTree XmlTree
helloWorld
= mkelem "html" []
[ mkelem "head" []
[ mkelem "title" []
[ txt "Hello World" ]
]
, mkelem "body"
[ sattr "class" "haskell" ]
[ mkelem "h1" []
[ txt "Hello World" ]
]
]
helloWorld2 :: ArrowXml a => a XmlTree XmlTree
helloWorld2
= selem "html"
[ selem "head"
[ selem "title"
[ txt "Hello World" ]
]
, mkelem "body"
[ sattr "class" "haskell" ]
[ selem "h1"
[ txt "Hello World" ]
]
]
imageTable :: ArrowXml a => a XmlTree XmlTree
imageTable
= selem "html"
[ selem "head"
[ selem "title"
[ txt "Images in Page" ]
]
, selem "body"
[ selem "h1"
[ txt "Images in Page" ]
, selem "table"
[ collectImages
>>>
genTableRows
]
]
]
where
genTableRows
= selem "tr"
[ selem "td"
[ getAttrValue "src" >>> mkText ]
]
imageTable0 :: ArrowXml a => a XmlTree XmlTree
imageTable0
= selem "html"
[ pageHeader
, selem "body"
[ selem "h1"
[ txt "Images in Page" ]
, selem "table"
[ collectImages
>>>
genTableRows
]
]
]
where
pageHeader
= constA "Images in Page "
>>>
xread
genTableRows
= selem "tr"
[ selem "td"
[ getAttrValue "src" >>> mkText ]
]
imageTable1 :: ArrowXml a => a XmlTree XmlTree
imageTable1
= selem "html"
[ selem "head"
[ selem "title"
[ txt "Images in Page" ]
]
, selem "body"
[ selem "h1"
[ txt "Images in Page" ]
, selem "table"
[ collectImages
>>>
genTableRows
]
]
]
imageTable2 :: IOStateArrow s XmlTree XmlTree
imageTable2
= selem "html"
[ selem "head"
[ selem "title"
[ txt "Images in Page" ]
]
, selem "body"
[ selem "h1"
[ txt "Images in Page" ]
, selem "table"
[ collectImages
>>>
mkAbsImageRef
>>>
genTableRows
]
]
]
imageTable3 :: IOStateArrow s XmlTree XmlTree
imageTable3
= insertTreeTemplate
pageTemplate -- the page template
[ hasText (=="ImageList") :-> images] -- fill hole "ImageList" with image descriptions
where
images
= collectImages
>>>
mkAbsImageRef
>>>
genTableRows
pageTemplate
= constA "Images in Page Images in Page
ImageList
"
>>>
xread
collectImages :: ArrowXml a => a XmlTree XmlTree
collectImages
= deep ( isElem >>> hasName "img" )
genTableRows :: ArrowXml a => a XmlTree XmlTree
genTableRows
= selem "tr"
[ selem "td" -- (1)
[ this -- (1.1)
]
, selem "td" -- (2)
[ getAttrValue "src"
>>>
mkText
>>>
mkelem "a" -- (2.1)
[ attr "href" this ]
[ this ]
]
, selem "td" -- (3)
[ ( getAttrValue "width"
&&& -- (3.1)
getAttrValue "height"
)
>>>
arr2 geometry -- (3.2)
>>>
mkText
]
, selem "td" -- (4)
[ getAttrValue "alt"
>>>
mkText
]
]
where
geometry :: String -> String -> String
geometry "" ""
= ""
geometry w h
= w ++ "x" ++ h
mkAbsImageRef :: IOStateArrow s XmlTree XmlTree
mkAbsImageRef
= processAttrl (mkAbsRef `when` hasName "src")
where
mkAbsRef
= replaceChildren
( xshow getChildren
>>>
( mkAbsURI `orElse` this )
>>>
mkText
)
toAbsHRefs :: IOStateArrow s XmlTree XmlTree
toAbsHRefs
= ( mkAbsHRefs $< computeBaseRef )
>>>
removeBaseElement
removeBaseElement :: ArrowXml a => a XmlTree XmlTree
removeBaseElement
= processChildren
( processChildren ( none
`when`
( isElem >>> hasName "base" )
)
`when`
( isElem >>> hasName "head" )
)
mkAbsHRefs :: ArrowXml a => String -> a XmlTree XmlTree
mkAbsHRefs base
= processTopDown editHRef
where
editHRef
= processAttrl ( changeAttrValue (absHRef base)
`when`
hasName "href"
)
`when`
( isElem >>> hasName "a" )
where
absHRef :: String -> String -> String
absHRef base url
= fromMaybe url . expandURIString url $ base
toAbsRefs :: IOStateArrow s XmlTree XmlTree
toAbsRefs
= ( mkAbsRefs $< computeBaseRef )
>>>
removeBaseElement
mkAbsRefs0 :: ArrowXml a => String -> a XmlTree XmlTree
mkAbsRefs0 base
= processTopDown ( editRef "a" "href"
>>>
editRef "img" "src"
>>>
editRef "link" "href"
>>>
editRef "script" "src"
)
where
editRef en an
= processAttrl ( changeAttrValue (absHRef base)
`when`
hasName an
)
`when`
( isElem >>> hasName en )
where
absHRef :: String -> String -> String
absHRef base url
= fromMaybe url . expandURIString url $ base
mkAbsRefs :: ArrowXml a => String -> a XmlTree XmlTree
mkAbsRefs base
= processTopDown editRefs
where
editRefs
= seqA . map (uncurry editRef)
$
[ ("a", "href")
, ("img", "src")
, ("link", "href")
, ("script", "src") -- and more
]
editRef en an
= processAttrl ( changeAttrValue (absHRef base)
`when`
hasName an
)
`when`
( isElem >>> hasName en )
where
absHRef :: String -> String -> String
absHRef base url
= fromMaybe url . expandURIString url $ base
computeBaseRef :: IOStateArrow s XmlTree String
computeBaseRef
= ( ( ( isElem >>> hasName "html"
>>>
getChildren
>>>
isElem >>> hasName "head"
>>>
getChildren
>>>
isElem >>> hasName "base"
>>>
getAttrValue "href"
)
&&&
getBaseURI
)
>>> expandURI
)
`orElse` getBaseURI
getDescendends :: ArrowXml a => [String] -> a XmlTree XmlTree
getDescendends
= foldl1 (\ x y -> x >>> getChildren >>> y)
.
map (\ n -> isElem >>> hasName n)
computeBaseRef1 :: IOStateArrow s XmlTree String
computeBaseRef1
= ( ( ( getDescendends ["html","head","base"]
>>>
getAttrValue "href"
)
&&&
getBaseURI
)
>>> expandURI
)
`orElse` getBaseURI
computeBaseRef2 :: IOStateArrow s XmlTree String
computeBaseRef2
= ( ( xshow (getXPathTrees "/html/head/base@href")
&&&
getBaseURI
)
>>> expandURI
)
`orElse` getBaseURI
toAbsRefs1 :: IOStateArrow s XmlTree XmlTree
toAbsRefs1
= ( mkAbsRefs $< computeBaseRef1 )
>>>
removeBaseElement
hxt-9.3.1.22/examples/arrows/dtd2hxt/DTDtoHXT.hs 0000644 0000000 0000000 00000026212 12752557013 017363 0 ustar 00 0000000 0000000 -- |
-- DTDtoHXT - A program for generating access functions for the Haskell XML Toolbox
-- from a DTD (Arrow Version)
--
-- Author : Uwe Schmidt
--
-- this program may be used as example main program for the
-- Haskell XML Toolbox
module Main
where
import Text.XML.HXT.Core -- import all stuff for parsing, validating, and transforming XML
import Text.XML.HXT.Curl
import System.IO -- import the IO and commandline option stuff
import System.Environment
import System.Console.GetOpt
import System.Exit
import Data.Char
import Data.List
-- ------------------------------------------------------------
-- |
-- the main program
main :: IO ()
main
= do
argv <- getArgs -- get the commandline arguments
(al, src) <- cmdlineOpts argv -- and evaluate them, return a key-value list
[rc] <- runX (dtd2hxt al src)
exitProg (rc >= c_err)
-- ------------------------------------------------------------
exitProg :: Bool -> IO a
exitProg True = exitWith (ExitFailure 1)
exitProg False = exitWith ExitSuccess
-- ------------------------------------------------------------
--
-- options
uppercaseInitials, namespaceAware, prefixUnderline :: String
uppercaseInitials = "uppercase-initials"
namespaceAware = "namespace-aware"
prefixUnderline = "prefix-underline"
-- name prefixes
tagPrefix, attPrefix, nsPrefix, isPrefix, mkPrefix, hasPrefix, getPrefix
, mkAttPrefix, mkSAttPrefix
, nsDefault :: String
tagPrefix = "tag"
attPrefix = "attr"
nsPrefix = "ns"
isPrefix = "is"
mkPrefix = "e"
hasPrefix = "has"
getPrefix = "get"
mkAttPrefix = "a"
mkSAttPrefix = "sa"
nsDefault = "default"
-- ------------------------------------------------------------
-- |
-- the /real/ main program
--
-- get wellformed document, validates document, but not canonicalize
-- (this would remove the DTD),
-- and controls output
dtd2hxt :: SysConfigList -> String -> IOSArrow b Int
dtd2hxt config src
= configSysVars config -- set all global config options
>>>
readDocument [withCanonicalize no
,withCurl []
] src
>>>
traceMsg 1 "start processing DTD"
>>>
processChildren (isDTD `guards` genHXT)
>>>
traceMsg 1 "processing finished"
>>>
traceSource
>>>
traceTree
>>>
( writeDocument [withOutputPLAIN] $< getSysAttr "output-file" )
>>>
getErrStatus
where
genHXT
= catA $ map (>>> mkText) $
[ getModuleName -- the module header
>>>
arr genModHead
, constA $ comm "namespace declarations"
, getNSAttr -- namespace constants
>>> -- declared as "xmlns" or "xmlns:" attribute with #FIXED values
arr2 genNSCode
, constA $ comm "element arrows"
, getElems >>. sort -- element processing
>>>
arr genElemCode
, getAttrs >>. ( sort . nub ) -- attribute processing
>>>
arr genAttrCode
, getModuleName -- module footer
>>> arr genModFoot
]
-- auxiliary arrows --------------------------------------------------
getModuleName :: (ArrowXml a, ArrowDTD a) => a XmlTree String
getModuleName
= isDTDDoctype
>>>
getDTDAttrValue a_name
>>>
arr moduleName
-- filter namespace attributes ----------------------------------------
getNSAttr :: (ArrowXml a, ArrowDTD a) => a XmlTree (String, String)
getNSAttr
= deep isDTDAttlist
>>>
( ( getDTDAttrValue a_value >>> isA (\ s -> s == "xmlns" || "xmlns:" `isPrefixOf` s)
)
`guards`
( ( getDTDAttrValue a_kind >>> isA (== k_fixed)
)
`guards`
( ( getDTDAttrValue a_value >>> arr (drop 6) ) -- remove "xmlns:" prefix
&&&
getDTDAttrValue a_default
)
)
)
getElems :: (ArrowXml a, ArrowDTD a) => a XmlTree String
getElems
= deep isDTDElement
>>>
getDTDAttrValue a_name
getAttrs :: (ArrowXml a, ArrowDTD a) => a XmlTree String
getAttrs
= deep isDTDAttlist
>>>
getDTDAttrValue a_value
-- code generation ------------------------------------------------------------
genModHead :: String -> String
genModHead rootElem
= code [ sepl
, "--"
, "-- don't edit this module"
, "-- generated with " ++ progName
, "-- simple access function for Haskell XML Toolbox"
, "-- generated from DTD of document: " ++ show src
, ""
, "module " ++ rootElem ++ " ( module " ++ rootElem ++ " )"
, "where"
, ""
, "import Text.XML.HXT.Core (XmlTree, ArrowXml, (>>>))"
, "import qualified Text.XML.HXT.Core as X (attr, eelem, getAttrValue, hasAttr, hasName, isElem, sattr)"
]
genNSCode :: String -> String -> String
genNSCode prefix ns
= code [ ns' ++ "\t:: String"
, ns' ++ "\t= " ++ show ns
]
where
ns' = nsPrefix ++ nn (if null prefix then nsDefault else prefix)
genElemCode :: String -> String
genElemCode n
= code [ comm ("arrows for element " ++ show n)
, tagN ++ "\t:: String"
, tagN ++ "\t= " ++ show n
, ""
, isN ++ "\t:: ArrowXml a => a XmlTree XmlTree"
, isN ++ "\t= X.isElem >>> X.hasName " ++ tagN
, ""
, mkN ++ "\t:: ArrowXml a => a n XmlTree"
, mkN ++ "\t= X.eelem " ++ tagN
]
where
tagN = tagPrefix ++ nn n
isN = isPrefix ++ nn n
mkN = mkPrefix ++ nn n
genAttrCode :: String -> String
genAttrCode n
= code [ comm ("arrows for attribute " ++ show n)
, attN ++ "\t:: String"
, attN ++ "\t= " ++ show n
, ""
, hasN ++ "\t:: ArrowXml a => a XmlTree XmlTree"
, hasN ++ "\t= X.hasAttr " ++ attN
, ""
, getN ++ "\t:: ArrowXml a => a XmlTree String"
, getN ++ "\t= X.getAttrValue " ++ attN
, ""
, mkN ++ "\t:: ArrowXml a => a n XmlTree -> a n XmlTree"
, mkN ++ "\t= X.attr " ++ attN
, ""
, mksN ++ "\t:: ArrowXml a => String -> a n XmlTree"
, mksN ++ "\t= X.sattr " ++ attN
]
where
attN = attPrefix ++ nn n
hasN = hasPrefix ++ nn n
getN = getPrefix ++ nn n ++ nn "value"
mkN = mkAttPrefix ++ nn n
mksN = mkSAttPrefix ++ nn n
genModFoot :: String -> String
genModFoot rootElem
= comm ( "end of module " ++ rootElem)
-- string manipulation --------------------------------------------------
code :: [String] -> String
code = concatMap (++ "\n")
comm :: String -> String
comm cm = code [ "", sepl, "--", "-- " ++ cm, ""]
sepl :: String
sepl = "-- ----------------------------------------"
moduleName :: String -> String
moduleName rootElem
= modname . (\ x -> if null x then rootElem else x) . getConfigAttr "output_file" $ config
modname
= (\ x -> toUpper (head x) : tail x)
. reverse
. (\ n -> if '.' `elem` n -- remove extension
then drop 1 . dropWhile (/= '.') $ n
else n
)
. takeWhile (/= '/') -- remove dir path
. reverse
nn :: String -> String
nn
= trInitial . concatMap nc -- normalize names
nc :: Char -> String
nc c
| c `elem` ['a'..'z'] ++ ['A'..'Z'] ++ ['0'..'9'] ++ "_" = [c]
| c == ':' || c == '-' = "_"
| otherwise = ("_" ++) . show . fromEnum $ c
trInitial :: String -> String
trInitial str
| null str = str
| underLn = '_' : str
| upperCs = toUpper (head str) : tail str
| otherwise = str
upperCs, underLn {-, nsAware -} :: Bool
upperCs = (== "1") . getConfigAttr uppercaseInitials $ config
underLn = (== "1") . getConfigAttr prefixUnderline $ config
_nsAware = (== "1") . getConfigAttr namespaceAware $ config
-- ------------------------------------------------------------
--
-- the boring option definition and evaluation part
--
-- see doc for System.Console.GetOpt
progName :: String
progName = "DTDtoHXT"
options :: [OptDescr SysConfig]
options
= selectOptions [ a_help
] generalOptions
++
selectOptions [ a_trace
, a_proxy
, a_encoding
, a_validate
, a_check_namespaces
] inputOptions
++
selectOptions [ "output-file"
] outputOptions
++
[ Option "u" [prefixUnderline] (NoArg $ withSysAttr prefixUnderline "1") "separate tag and attribute names with a '_'"
, Option "U" [uppercaseInitials] (NoArg $ withSysAttr uppercaseInitials "1") "transform the first char of tag and attribute names to uppercase"
, Option "N" [namespaceAware] (NoArg $ withSysAttr namespaceAware "1") "filter are namespace aware, if namespace attributes occur in the DTD"
]
++
showOptions
usage :: [String] -> IO a
usage errl
| null errl
= do
hPutStrLn stdout use
exitProg False
| otherwise
= do
hPutStrLn stderr (concat errl ++ "\n" ++ use)
exitProg True
where
header = "DTDtoHXml - Generation of access function for the Haskell XML Toolbox from a DTD\n" ++
"Usage: " ++ progName ++ " [OPTION...] [URI or FILE]"
use = usageInfo header options
cmdlineOpts :: [String] -> IO (SysConfigList, String)
cmdlineOpts argv
= case (getOpt Permute options argv) of
(ol,n,[] )
-> do
sa <- src n
help (getConfigAttr a_help ol)
return (ol, sa)
(_,_,errs)
-> usage errs
where
src [uri] = return uri
src [] = usage ["input file/uri missing"]
src _ = usage ["only one input url or file allowed\n"]
help "1" = usage []
help _ = return ()
-- ------------------------------------------------------------
hxt-9.3.1.22/examples/arrows/dtd2hxt/.ghci 0000644 0000000 0000000 00000000132 12752557013 016371 0 ustar 00 0000000 0000000 :set -package-conf ../../lib/hxt/package.conf
:set -package hxt
:set -Wall
:load DTDtoHXT
hxt-9.3.1.22/examples/arrows/dtd2hxt/Makefile 0000644 0000000 0000000 00000002370 12752557013 017124 0 ustar 00 0000000 0000000 # $Id: Makefile,v 1.4 2005/05/15 17:01:04 hxml Exp $
HXT_HOME = ../../..
PKGFLAGS =
GHCFLAGS = -Wall -O2
GHC = ghc $(GHCFLAGS) $(PKGFLAGS)
prog = ./DTDtoHXT
all : $(prog)
$(prog) : $(prog).hs
$(GHC) --make -o $@ $<
force :
$(GHC) --make -o $(prog) $(prog).hs
test :
@echo "===> run a few generation examples"
$(MAKE) XHTML.o XHTML2.o
@echo "===> the generated modules"
ls -l XHTML*.hs XHTML*.o
EX1 = ../../xhtml/xhtml.xml
EX2 = ../../photoalbum/photos.xml
XHTML.o : $(prog) $(EX1)
@echo "===> generate a module for XHTML access function from the XHTML DTD with naming convention is, get, ..."
$(prog) --output-file XHTML.hs --uppercase-initials $(EX1)
$(GHC) -c XHTML.hs
XHTML2.o : $(prog) $(EX1)
@echo "===> generate a module for XHTML access function from the XHTML DTD with naming convention is_, get_, ..."
$(prog) --output-file XHTML2.hs --prefix-underline $(EX1)
$(GHC) -c XHTML2.hs
Photo.hs : $(prog) $(EX2)
$(prog) --output-file $@ --uppercase-initials $(EX2)
DIST = $(HXT_HOME)/dist/examples/arrows
DIST_DIR = $(DIST)/dtd2hxt
DIST_FILES = $(prog).hs Makefile
dist :
[ -d $(DIST_DIR) ] || mkdir -p $(DIST_DIR)
cp $(DIST_FILES) $(DIST_DIR)
clean :
rm -f $(prog) XHTML*.hs *.o *.hi
hxt-9.3.1.22/examples/arrows/HelloWorld/bye.xml 0000644 0000000 0000000 00000000107 12752557013 017453 0 ustar 00 0000000 0000000
Hello World!
hxt-9.3.1.22/examples/arrows/HelloWorld/HelloWorld.hs 0000644 0000000 0000000 00000000577 12752557013 020574 0 ustar 00 0000000 0000000 module Main
where
import Text.XML.HXT.Core
import System.Exit
main :: IO()
main
= do
[rc] <- runX ( readDocument [ withTrace 1
, withValidate no
] "hello.xml"
>>>
writeDocument [ withOutputEncoding utf8
] "-"
>>>
getErrStatus
)
exitWith ( if rc >= c_err
then ExitFailure 1
else ExitSuccess
)
hxt-9.3.1.22/examples/arrows/HelloWorld/hello.xml 0000644 0000000 0000000 00000000066 12752557013 020003 0 ustar 00 0000000 0000000
Hello World!
hxt-9.3.1.22/examples/arrows/HelloWorld/Makefile 0000644 0000000 0000000 00000001240 12752557013 017611 0 ustar 00 0000000 0000000 # $Id: Makefile,v 1.3 2005/04/14 12:52:50 hxml Exp $
#
# hello world application of Haskell XML Toolbox
HXT_HOME = ../../..
PKGFLAGS =
GHCFLAGS = -Wall -O2
GHC = ghc $(GHCFLAGS) $(PKGFLAGS)
DIST = $(HXT_HOME)/dist/examples/arrows
DIST_DIR = $(DIST)/HelloWorld
all : mini hello
force :
$(MAKE) distclean all
test :
./mini
./hello
dist :
[ -d $(DIST_DIR) ] || mkdir -p $(DIST_DIR)
cp Mini.hs HelloWorld.hs hello.xml Makefile $(DIST_DIR)
clean :
rm -f *.o *.hi
distclean :
$(MAKE) clean
rm -f mini hello
.PHONY : all test dist clean distclean force
hello : HelloWorld.hs
$(GHC) --make -o $@ $<
mini : Mini.hs
$(GHC) --make -o $@ $<
hxt-9.3.1.22/examples/arrows/HelloWorld/Mini.hs 0000644 0000000 0000000 00000000370 12752557013 017404 0 ustar 00 0000000 0000000 module Main
where
import Text.XML.HXT.Core
main :: IO()
main
= runX ( configSysVars [ withTrace 1 ]
>>>
readDocument [ withValidate no ] "hello.xml"
>>>
writeDocument [ ] "bye.xml"
)
>> return ()
hxt-9.3.1.22/examples/arrows/hparser/emptyElements.html 0000644 0000000 0000000 00000000477 12752557013 021276 0 ustar 00 0000000 0000000
hxt-9.3.1.22/examples/arrows/hparser/example1.xml 0000644 0000000 0000000 00000000710 12752557013 020001 0 ustar 00 0000000 0000000
]>
hello world äöüß test
hxt-9.3.1.22/examples/arrows/hparser/example1CRLF.xml 0000644 0000000 0000000 00000000736 12752557013 020460 0 ustar 00 0000000 0000000
]>
hello world äöüß test
hxt-9.3.1.22/examples/arrows/hparser/HXmlParser.hs 0000644 0000000 0000000 00000012275 12752557013 020135 0 ustar 00 0000000 0000000 -- ------------------------------------------------------------
{- |
Module : HXmlParser
Copyright : Copyright (C) 2005-2010 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt
Maintainer : uwe@fh-wedel.de
Stability : experimental
Portability: portable
HXmlParser - Minimal Validating XML Parser of the Haskell XML Toolbox, no HTTP supported
XML well-formed checker and validator.
this program may be used as example main program for the
arrow API of the Haskell XML Toolbox
commandline parameter evaluation and
and return code is the most complicated part
of this example application
-}
-- ------------------------------------------------------------
module Main
where
import Text.XML.HXT.Core -- import all stuff for parsing, validating, and transforming XML
import System.IO -- import the IO and commandline option stuff
import System.Environment
import System.Console.GetOpt
import System.Exit
-- ------------------------------------------------------------
-- |
-- the main program of the Haskell XML Validating Parser
main :: IO ()
main
= do
argv <- getArgs -- get the commandline arguments
(al, src) <- cmdlineOpts argv -- and evaluate them, return a key-value list
[rc] <- runX (parser al src) -- run the parser arrow
exitProg (rc >= c_err) -- set return code and terminate
-- ------------------------------------------------------------
exitProg :: Bool -> IO a
exitProg True = exitWith (ExitFailure 1)
exitProg False = exitWith ExitSuccess
-- ------------------------------------------------------------
-- |
-- the /real/ main program
--
-- get wellformed document, validates document, propagates and check namespaces
-- and controls output
parser :: SysConfigList -> String -> IOSArrow b Int
parser config src
= configSysVars config -- set all global config options, the output file and the
>>> -- other user options are stored as key-value pairs in the stystem state
readDocument [] src -- no more special read options needed
>>>
( ( traceMsg 1 "start processing document"
>>>
( processDocument $< getSysAttr "action" ) -- ask for the action stored in the key-value list of user defined values
>>>
traceMsg 1 "document processing finished"
)
`when`
documentStatusOk
)
>>>
traceSource
>>>
traceTree
>>>
( (writeDocument [] $< getSysAttr "output-file") -- ask for the output file stored in the system configuration
`whenNot`
( getSysAttr "no-output" >>> isA (== "1") ) -- ask for the no-output attr value in the system key-value list
)
>>>
getErrStatus
-- simple example of a processing arrow, selected by a command line option
processDocument :: String -> IOSArrow XmlTree XmlTree
processDocument "only-text"
= traceMsg 1 "selecting plain text"
>>>
processChildren (deep isText)
processDocument "indent"
= traceMsg 1 "indent document"
>>>
indentDoc
processDocument _action
= traceMsg 1 "default action: do nothing"
>>>
this
-- ------------------------------------------------------------
--
-- the options definition part
-- see doc for System.Console.GetOpt
progName :: String
progName = "HXmlParser"
options :: [OptDescr SysConfig]
options
= generalOptions
++
inputOptions
++
outputOptions
++
showOptions
++
[ Option "q" ["no-output"] (NoArg $ withSysAttr "no-output" "1") "no output of resulting document"
, Option "x" ["action"] (ReqArg (withSysAttr "action") "ACTION") "actions are: only-text, indent, no-op"
]
-- the last 2 option values will be stored by withAttr in the system key-value list
-- and can be read by getSysAttr key
usage :: [String] -> IO a
usage errl
| null errl
= do
hPutStrLn stdout use
exitProg False
| otherwise
= do
hPutStrLn stderr (concat errl ++ "\n" ++ use)
exitProg True
where
header = "HXmlParser - Validating XML Parser of the Haskell XML Toolbox with Arrow Interface\n" ++
"XML well-formed checker, DTD validator, HTML parser.\n\n" ++
"Usage: " ++ progName ++ " [OPTION...] [URI or FILE]"
use = usageInfo header options
cmdlineOpts :: [String] -> IO (SysConfigList, String)
cmdlineOpts argv
= case (getOpt Permute options argv) of
(scfg,n,[])
-> do
sa <- src n
help (getConfigAttr a_help scfg) sa
return (scfg, sa)
(_,_,errs)
-> usage errs
where
src [] = return []
src [uri] = return uri
src _ = usage ["only one input uri or file allowed\n"]
help "1" _ = usage []
help _ [] = usage ["no input uri or file given\n"]
help _ _ = return ()
-- ------------------------------------------------------------
hxt-9.3.1.22/examples/arrows/hparser/invalid1.xml 0000644 0000000 0000000 00000000006 12752557013 017772 0 ustar 00 0000000 0000000