23 : m_tagHandler( ph ), m_current( 0 ), m_root( 0 ), m_xmlnss( 0 ), m_state( Initial ),
24 m_preamble( 0 ), m_quote( false ), m_haveTagPrefix( false ), m_haveAttribPrefix( false ),
25 m_attribIsXmlns( false ), m_deleteRoot( deleteRoot )
35 Parser::DecodeState Parser::decode( std::string::size_type& pos,
const std::string& data )
37 std::string::size_type p = data.find(
';', pos );
38 std::string::size_type diff = p - pos;
40 if( p == std::string::npos )
42 m_backBuffer = data.substr( pos );
43 return DecodeInsufficient;
46 if( diff < 3 || diff > 9 )
50 switch( data[pos + 1] )
57 if( data[pos + 2] ==
'x' || data[pos + 2] ==
'X' )
64 const long int val = std::strtol( data.data() + pos + idx, &end, base );
65 if( *end !=
';' || val < 0 )
68 if( val == 0x9 || val == 0xA || val == 0xD || ( val >= 0x20 && val <= 0x7F ) )
72 else if( val >= 0x80 && val <= 0x7FF )
74 rep += char( 192 + ( val >> 6 ) );
75 rep += char( 128 + ( val % 64 ) );
77 else if( ( val >= 0x800 && val <= 0xD7FF ) || ( val >= 0xE000 && val <= 0xFFFD ) )
79 rep += char( 224 + ( val >> 12 ) );
80 rep += char( 128 + ( ( val >> 6 ) % 64 ) );
81 rep += char( 128 + ( val % 64 ) );
83 else if( val >= 0x100000 && val < 0x10FFFF )
85 rep += char( 240 + ( val >> 18 ) );
86 rep += char( 128 + ( ( val >> 12 ) % 64 ) );
87 rep += char( 128 + ( ( val >> 6 ) % 64 ) );
88 rep += char( 128 + ( val % 64 ) );
95 if( diff == 3 && data[pos + 2] ==
't' )
101 if( diff == 3 && data[pos + 2] ==
't' )
104 return DecodeInvalid;
107 if( diff == 5 && !data.compare( pos + 1, 5,
"apos;" ) )
109 else if( diff == 4 && !data.compare( pos + 1, 4,
"amp;" ) )
112 return DecodeInvalid;
115 if( diff == 5 && !data.compare( pos + 1, 5,
"quot;" ) )
118 return DecodeInvalid;
121 return DecodeInvalid;
129 case TagAttributeValue:
139 Parser::ForwardScanState Parser::forwardScan( std::string::size_type& pos,
const std::string& data,
140 const std::string& needle )
142 if( pos + needle.length() <= data.length() )
144 if( !data.compare( pos, needle.length(), needle ) )
146 pos += needle.length() - 1;
151 return ForwardNotFound;
156 m_backBuffer = data.substr( pos );
157 return ForwardInsufficientSize;
163 if( !m_backBuffer.empty() )
165 data.insert( 0, m_backBuffer );
169 std::string::size_type count = data.length();
170 for( std::string::size_type i = 0; i < count; ++i )
172 const unsigned char c = data[i];
178 return static_cast<int>( i );
185 if( isWhitespace( c ) )
191 m_state = TagOpening;
195 return static_cast<int>( i );
202 if( isWhitespace( c ) )
208 m_state = TagOpening;
222 if( isWhitespace( c ) )
231 return static_cast<int>( i );
234 m_state = TagClosingSlash;
237 m_state = TagNameCollect;
241 switch( forwardScan( i, data,
"![CDATA[" ) )
244 m_state = TagCDATASection;
246 case ForwardNotFound:
248 return static_cast<int>( i );
249 case ForwardInsufficientSize:
255 m_state = TagNameCollect;
259 case TagCDATASection:
263 switch( forwardScan( i, data,
"]]>" ) )
268 case ForwardNotFound:
271 case ForwardInsufficientSize:
282 if( isWhitespace( c ) )
284 m_state = TagNameComplete;
295 return static_cast<int>( i );
298 m_state = TagOpeningSlash;
305 if( !m_haveTagPrefix )
307 m_haveTagPrefix =
true;
314 return static_cast<int>( i );
329 m_state = TagOpening;
333 switch( decode( i, data ) )
339 return static_cast<int>( i );
340 case DecodeInsufficient:
349 case TagOpeningSlash:
351 if( isWhitespace( c ) )
361 return static_cast<int>( i );
369 return static_cast<int>( i );
372 case TagClosingSlash:
374 if( isWhitespace( c ) )
383 return static_cast<int>( i );
387 m_state = TagClosing;
401 return static_cast<int>( i );
404 if( !m_haveTagPrefix )
406 m_haveTagPrefix =
true;
413 return static_cast<int>( i );
421 return static_cast<int>( i );
430 case TagNameComplete:
432 if( isWhitespace( c ) )
441 return static_cast<int>( i );
444 m_state = TagOpeningSlash;
447 if( m_preamble == 1 )
450 return static_cast<int>( i );
456 if( m_preamble == 1 )
461 return static_cast<int>( i );
466 m_state = TagAttribute;
472 if( isWhitespace( c ) )
474 m_state = TagAttributeComplete;
487 return static_cast<int>( i );
490 m_state = TagAttributeEqual;
493 if( !m_haveAttribPrefix && m_attrib !=
XMLNS )
495 m_haveAttribPrefix =
true;
496 m_attribPrefix = m_attrib;
499 else if( m_attrib ==
XMLNS )
501 m_attribIsXmlns =
true;
507 return static_cast<int>( i );
514 case TagAttributeComplete:
516 if( isWhitespace( c ) )
522 m_state = TagAttributeEqual;
526 return static_cast<int>( i );
530 case TagAttributeEqual:
532 if( isWhitespace( c ) )
540 m_state = TagAttributeValue;
544 return static_cast<int>( i );
548 case TagAttributeValue:
554 return static_cast<int>( i );
564 m_state = TagNameAlmostComplete;
569 switch( decode( i, data ) )
575 return static_cast<int>( i );
576 case DecodeInsufficient:
585 case TagNameAlmostComplete:
587 if( isWhitespace( c ) )
589 m_state = TagNameComplete;
596 m_state = TagOpeningSlash;
599 if( m_preamble == 1 )
602 return static_cast<int>( i );
608 if( m_preamble == 1 )
613 return static_cast<int>( i );
618 return static_cast<int>( i );
632 void Parser::addTag()
637 m_root =
new Tag( m_tag );
643 m_current =
new Tag( m_current, m_tag );
646 if( m_haveTagPrefix )
650 m_haveTagPrefix =
false;
653 if( m_attribs.size() )
675 streamEvent( m_root );
682 if( m_root && m_root == m_current && m_tagPrefix ==
"stream" )
685 if( m_tag ==
"xml" && m_preamble == 2 )
689 void Parser::addAttribute()
691 Tag::Attribute* attr =
new Tag::Attribute( m_attrib, m_value );;
692 if( m_attribIsXmlns )
697 (*m_xmlnss)[m_attrib] = m_value;
698 attr->setPrefix(
XMLNS );
703 if( !m_attribPrefix.empty() )
704 attr->setPrefix( m_attribPrefix );
705 if( m_attrib ==
XMLNS )
708 m_attribs.push_back( attr );
712 m_haveAttribPrefix =
false;
713 m_attribIsXmlns =
false;
716 void Parser::addCData()
718 if( m_current && !m_cdata.empty() )
727 bool Parser::closeTag()
731 if( m_tag ==
"stream" && m_tagPrefix ==
"stream" )
734 if( !m_current || m_current->
name() != m_tag
735 || ( !m_current->
prefix().empty() && m_current->
prefix() != m_tagPrefix ) )
747 m_haveTagPrefix =
false;
750 m_current = m_current->
parent();
754 streamEvent( m_root );
774 m_haveAttribPrefix =
false;
775 m_haveTagPrefix =
false;
784 bool Parser::isValid(
unsigned char c )
786 return ( c != 0xc0 || c != 0xc1 || c < 0xf5 );
789 bool Parser::isWhitespace(
unsigned char c )
791 return ( c == 0x09 || c == 0x0a || c == 0x0d || c == 0x20 );
794 void Parser::streamEvent( Tag* tag )