Class XML Parser
inputs = String XML
attributes
String token_type {'atomic'|'open'|'close'}
String token_name yString
String token_parent yString
String token_full_name yString
String token_attributes[] xString
String token_value yString
String token_property_names[] ::= ('token_type', 'token_name', 'token_parent', 'token_full_name', 'token_attributes', 'token_value', 'token_properties', 'token_attributes')
String token_attribute_names[]
definitions
tChar ::= (>,<,/)
yString ::= any number of printable characters excluding tChar
xString ::= yString, excluding spaces
methods (interface)
get_token (String XML) :String
get_attribute (String hashArray[]) :String hashArray[]
get_value (String hashArray[]) :String hashArray[]
get_name (String hashArray[]) :String hashArray[]
methods (definition)
get_token ::= get_atomic_token ^ get_open_token ^ get_close_token
get_name ::= FIND (^[ ]*'<'xString([ ]+|'>'|'/>'))
SET this.token_name = xString
REMOVE MATCH(ed) String from XML String
get_attribute ::= WHILE token String still has more attributes
// find key="value", update attributes hash and attribute names array
FIND ^[ ]+xString[ ]*'='[ ]*'"'yString'"'
SET this.token_attributes.xString = yString
SET this.token_attribute_names[#LAST] = xString
remove current attribute from token String
IF this.token_type = ('atomic'|'close') THEN END parse
get_value ::= FIND (^yString)
SET this.value = yString
REMOVE MATCH(ed) String from XML String
get_atomic_token ::= FIND (^[ ]*'<'yString'/>')
SET this.token_type = 'atomic'
get_name
get_attribute
REMOVE MATCH(ed) String from XML String
get_open_token ::= FIND (^[ ]*'<'yString'>')
SET this.token_type = 'open'
get_name
get_attribute
get_value
REMOVE MATCH(ed) String from XML String
get_close_token ::= FIND (^[ ]*''yString'>')
SET this.token_type = 'close'
get_name
REMOVE MATCH(ed) String from XML String
logic
(ARRAY APPROACH) // Implementation using hash arrays.
Stack parents
// A Stack is an Array in which the first item in
// is always the last item out.
// The parents Stack is an Array that holds the list
// of parent XML tokens. When an Opening tag is
// encountered, a token is added to the Stack. After
// the Closing tags have been found the element is
// POP(ed) from the Stack. The Stack is not affected
// by Atomic tokens since they Open/Close immediately.
String parents_list = 'root_array'
// parents_list is a String records the current parent element
// starting from root_array to root_array.elm1.elmx and back to root_array
// the String grows as you descend into nested elements and it
// shrinks as you ascend back towards the root, after each element
// closing tag.
Array array
array['root_array'] = parse
IF array['root_array']['token_type'] = 'atomic'
RETURN array
// root array is atomic, return now, no more work to do
ELSE
current_array = parse
// get next token
IF current_array['token_type'] = 'close'
RETURN array
// closing tags found for root array, no more work to do
END IF
DO // Loop to scroll through all elements of XML document
parents_list = parents_list + '.' + current_array['token_name']
IF current_array['token_type'] = 'atomic'
// current item is an Atomic tag, e.g.
current_array['parent'] = parents[#LAST]
// Set this token's parent to be the last entry in the Stack(Array) of parents
current_array['full_name'] = parents_list + current_array['token_name']
// Set this token's full name to the String parents_list plus its own name
array[current_array['full_name']] = current_array
// Add this array to Super Array of all tokens, Hashed by its fully qualified name, e.g. "a.b.c.d"
ELSIF current_array['token_type'] = 'close'
// current item is a closing tag, e.g.
parents_list = parents_list minus this token's name
// Remove current token from parents_list String
POP parents
// Remove the name of this token from the end of the Stack(Array) of parents
ELSE
// current item is an opening tag, e.g.
current_array['parent'] = parents[#LAST]
// Set this token's parent to be the last entry in the Stack(Array) of parents
current_array['full_name'] = parents_list + current_array['token_name']
// Set this token's full name to the String parents_list plus its own name
array[current_array['full_name']] = current_array
// Add this array to Super Array of all tokens, Hashed by its fully qualified name, e.g. "a.b.c.d"
PUSH parents current_array['token_name']
// Add the name of this token to the end of the Stack(Array) of parents
END IF
WHILE current_array = parse
END IF
RETURN array
notes
1). After each method call, the Match(ed) text is returned and at the
same time, removed from the original XML string.
2). Each call to parse the XML string RETURNs an Array which contains
either the entire Hash of tokens or the current token only.
3). Arrays RETURN(ed) will be composite, i.e. nested Arrays will be
expressed as attributes of the parent Array
5). Arrays will be hashed by name rather than number and nested Arrays
will be expressed as individual Array items with a parent attribute
that points at the parent Array item. As such all tokens are
elements/items in the Super Array of all tokens. However, every
token will have a property, PARENT. If this property is 'root_array'
then this is the first element/token in the XML document. If it is
not 'root_array' then it will indicate the full name of the element/token
that is its parent.
6). By Hashing on fully qualified name rather than simple name, it becomes
possible to maintain several identical tokens and/or attributes that
occur at different nodes within the XML document in the same Hash Array.