23 : m_tagHandler( ph ), m_current( 0 ), m_root( 0 ), m_xmlnss( 0 ), m_state( Initial ),
24 m_preamble( 0 ), m_quote( false ), m_haveTagPrefix( false ), m_haveAttribPrefix( false ),
25 m_attribIsXmlns( false ), m_deleteRoot( deleteRoot )
35 Parser::DecodeState Parser::decode( std::string::size_type& pos,
const std::string& data )
37 std::string::size_type p = data.find(
';', pos );
38 std::string::size_type diff = p - pos;
40 if( p == std::string::npos )
42 m_backBuffer = data.substr( pos );
43 return DecodeInsufficient;
46 if( diff < 3 || diff > 9 )
50 switch( data[pos + 1] )
57 if( data[pos + 2] ==
'x' || data[pos + 2] ==
'X' )
64 const long int val = std::strtol( data.data() + pos + idx, &end, base );
65 if( *end !=
';' || val < 0 )
68 if( val == 0x9 || val == 0xA || val == 0xD || ( val >= 0x20 && val <= 0x7F ) )
72 else if( val >= 0x80 && val <= 0x7FF )
74 rep += char( 192 + ( val >> 6 ) );
75 rep += char( 128 + ( val % 64 ) );
77 else if( ( val >= 0x800 && val <= 0xD7FF ) || ( val >= 0xE000 && val <= 0xFFFD ) )
79 rep += char( 224 + ( val >> 12 ) );
80 rep += char( 128 + ( ( val >> 6 ) % 64 ) );
81 rep += char( 128 + ( val % 64 ) );
83 else if( val >= 0x100000 && val < 0x10FFFF )
85 rep += char( 240 + ( val >> 18 ) );
86 rep += char( 128 + ( ( val >> 12 ) % 64 ) );
87 rep += char( 128 + ( ( val >> 6 ) % 64 ) );
88 rep += char( 128 + ( val % 64 ) );
95 if( diff == 3 && data[pos + 2] ==
't' )
101 if( diff == 3 && data[pos + 2] ==
't' )
104 return DecodeInvalid;
107 if( diff == 5 && !data.compare( pos + 1, 5,
"apos;" ) )
109 else if( diff == 4 && !data.compare( pos + 1, 4,
"amp;" ) )
112 return DecodeInvalid;
115 if( diff == 5 && !data.compare( pos + 1, 5,
"quot;" ) )
118 return DecodeInvalid;
121 return DecodeInvalid;
130 case TagAttributeValue:
140 Parser::ForwardScanState Parser::forwardScan( std::string::size_type& pos,
const std::string& data,
141 const std::string& needle )
143 if( pos + needle.length() <= data.length() )
145 if( !data.compare( pos, needle.length(), needle ) )
147 pos += needle.length() - 1;
152 return ForwardNotFound;
157 m_backBuffer = data.substr( pos );
158 return ForwardInsufficientSize;
164 if( !m_backBuffer.empty() )
166 data.insert( 0, m_backBuffer );
170 std::string::size_type count = data.length();
171 for( std::string::size_type i = 0; i < count; ++i )
173 const unsigned char c = data[i];
179 return static_cast<int>( i );
186 if( isWhitespace( c ) )
192 m_state = TagOpening;
196 return static_cast<int>( i );
203 if( isWhitespace( c ) )
215 switch( decode( i, data ) )
222 return static_cast<int>( i );
223 case DecodeInsufficient:
228 m_state = TagOpening;
242 if( isWhitespace( c ) )
251 return static_cast<int>( i );
254 m_state = TagClosingSlash;
257 m_state = TagNameCollect;
261 switch( forwardScan( i, data,
"![CDATA[" ) )
264 m_state = TagCDATASection;
266 case ForwardNotFound:
268 return static_cast<int>( i );
269 case ForwardInsufficientSize:
275 m_state = TagNameCollect;
279 case TagCDATASection:
283 switch( forwardScan( i, data,
"]]>" ) )
288 case ForwardNotFound:
291 case ForwardInsufficientSize:
302 if( isWhitespace( c ) )
304 m_state = TagNameComplete;
315 return static_cast<int>( i );
318 m_state = TagOpeningSlash;
325 if( !m_haveTagPrefix )
327 m_haveTagPrefix =
true;
334 return static_cast<int>( i );
349 m_state = TagOpening;
353 switch( decode( i, data ) )
359 return static_cast<int>( i );
360 case DecodeInsufficient:
369 case TagOpeningSlash:
371 if( isWhitespace( c ) )
381 return static_cast<int>( i );
389 return static_cast<int>( i );
392 case TagClosingSlash:
394 if( isWhitespace( c ) )
403 return static_cast<int>( i );
407 m_state = TagClosing;
421 return static_cast<int>( i );
424 if( !m_haveTagPrefix )
426 m_haveTagPrefix =
true;
433 return static_cast<int>( i );
441 return static_cast<int>( i );
450 case TagNameComplete:
452 if( isWhitespace( c ) )
461 return static_cast<int>( i );
464 m_state = TagOpeningSlash;
467 if( m_preamble == 1 )
470 return static_cast<int>( i );
476 if( m_preamble == 1 )
481 return static_cast<int>( i );
486 m_state = TagAttribute;
492 if( isWhitespace( c ) )
494 m_state = TagAttributeComplete;
507 return static_cast<int>( i );
510 m_state = TagAttributeEqual;
513 if( !m_haveAttribPrefix && m_attrib !=
XMLNS )
515 m_haveAttribPrefix =
true;
516 m_attribPrefix = m_attrib;
519 else if( m_attrib ==
XMLNS )
521 m_attribIsXmlns =
true;
527 return static_cast<int>( i );
534 case TagAttributeComplete:
536 if( isWhitespace( c ) )
542 m_state = TagAttributeEqual;
546 return static_cast<int>( i );
550 case TagAttributeEqual:
552 if( isWhitespace( c ) )
560 m_state = TagAttributeValue;
564 return static_cast<int>( i );
568 case TagAttributeValue:
574 return static_cast<int>( i );
584 m_state = TagNameAlmostComplete;
589 switch( decode( i, data ) )
595 return static_cast<int>( i );
596 case DecodeInsufficient:
605 case TagNameAlmostComplete:
607 if( isWhitespace( c ) )
609 m_state = TagNameComplete;
616 m_state = TagOpeningSlash;
619 if( m_preamble == 1 )
622 return static_cast<int>( i );
628 if( m_preamble == 1 )
633 return static_cast<int>( i );
638 return static_cast<int>( i );
652 void Parser::addTag()
657 m_root =
new Tag( m_tag );
663 m_current =
new Tag( m_current, m_tag );
666 if( m_haveTagPrefix )
670 m_haveTagPrefix =
false;
673 if( m_attribs.size() )
695 streamEvent( m_root );
702 if( m_root && m_root == m_current && m_tagPrefix ==
"stream" )
705 if( m_tag ==
"xml" && m_preamble == 2 )
709 void Parser::addAttribute()
711 Tag::Attribute* attr =
new Tag::Attribute( m_attrib, m_value );;
712 if( m_attribIsXmlns )
717 (*m_xmlnss)[m_attrib] = m_value;
718 attr->setPrefix(
XMLNS );
723 if( !m_attribPrefix.empty() )
724 attr->setPrefix( m_attribPrefix );
725 if( m_attrib ==
XMLNS )
728 m_attribs.push_back( attr );
732 m_haveAttribPrefix =
false;
733 m_attribIsXmlns =
false;
736 void Parser::addCData()
738 if( m_current && !m_cdata.empty() )
747 bool Parser::closeTag()
751 if( m_tag ==
"stream" && m_tagPrefix ==
"stream" )
754 if( !m_current || m_current->
name() != m_tag
755 || ( !m_current->
prefix().empty() && m_current->
prefix() != m_tagPrefix ) )
767 m_haveTagPrefix =
false;
770 m_current = m_current->
parent();
774 streamEvent( m_root );
794 m_haveAttribPrefix =
false;
795 m_haveTagPrefix =
false;
804 bool Parser::isValid(
unsigned char c )
806 return ( c != 0xc0 || c != 0xc1 || c < 0xf5 );
809 bool Parser::isWhitespace(
unsigned char c )
811 return ( c == 0x09 || c == 0x0a || c == 0x0d || c == 0x20 );
814 void Parser::streamEvent( Tag* tag )