gloox  0.9.9.12
parser.cpp
1 /*
2  Copyright (c) 2004-2008 by Jakob Schroeter <js@camaya.net>
3  This file is part of the gloox library. http://camaya.net/gloox
4 
5  This software is distributed under a license. The full license
6  agreement can be found in the file LICENSE in this distribution.
7  This software may not be copied, modified, sold or distributed
8  other than expressed in the named license agreement.
9 
10  This software is distributed without any warranty.
11 */
12 
13 
14 
15 #include "gloox.h"
16 
17 #include "parser.h"
18 
19 namespace gloox
20 {
21 
23  : m_tagHandler( ph ), m_current( 0 ), m_root( 0 ), m_state( Initial ),
24  m_preamble( 0 ), m_quote( false )
25  {
26  }
27 
29  {
30  delete m_root;
31  }
32 
33  bool Parser::feed( const std::string& data )
34  {
35  std::string::const_iterator it = data.begin();
36  for( ; it != data.end(); ++it )
37  {
38  const unsigned char c = (*it);
39 // printf( "found char: %c, ", c );
40 
41  if( !isValid( c ) )
42  {
43  cleanup();
44  return false;
45  }
46 
47  switch( m_state )
48  {
49  case Initial:
50  m_tag = "";
51  if( isWhitespace( c ) )
52  break;
53 
54  switch( c )
55  {
56  case '<':
57  m_state = TagOpening;
58  break;
59  case '>':
60  default:
61 // cleanup();
62 // return false;
63  break;
64  }
65  break;
66  case TagOpening: // opening '<' has been found before
67  if( isWhitespace( c ) )
68  break;
69 
70  switch( c )
71  {
72  case '<':
73  case '>':
74  case '!':
75  cleanup();
76  return false;
77  break;
78  case '/':
79  m_state = TagClosingSlash;
80  break;
81  case '?':
82  m_state = TagNameCollect;
83  m_preamble = 1;
84  break;
85  default:
86  m_tag += c;
87  m_state = TagNameCollect;
88  break;
89  }
90  break;
91  case TagNameCollect: // we're collecting the tag's name, we have at least one octet already
92  if( isWhitespace( c ) )
93  {
94  m_state = TagNameComplete;
95  break;
96  }
97 
98  switch( c )
99  {
100  case '<':
101  case '?':
102  case '!':
103  cleanup();
104  return false;
105  break;
106  case '/':
107  m_state = TagOpeningSlash;
108  break;
109  case '>':
110  addTag();
111  m_state = TagInside;
112  break;
113  default:
114  m_tag += c;
115  break;
116  }
117  break;
118  case TagInside: // we're inside a tag, expecting a child tag or cdata
119  m_tag = "";
120  switch( c )
121  {
122  case '<':
123  addCData();
124  m_state = TagOpening;
125  break;
126  default:
127  m_cdata += c;
128  break;
129  }
130  break;
131  case TagOpeningSlash: // a slash in an opening tag has been found, initing close of the tag
132  if( isWhitespace( c ) )
133  break;
134 
135  if( c == '>' )
136  {
137  addTag();
138  if( !closeTag() )
139  {
140  cleanup();
141  return false;
142  }
143 
144  m_state = Initial;
145  }
146  else
147  {
148  cleanup();
149  return false;
150  }
151  break;
152  case TagClosingSlash: // we have found the '/' of a closing tag
153  if( isWhitespace( c ) )
154  break;
155 
156  switch( c )
157  {
158  case '>':
159  case '<':
160  case '/':
161  cleanup();
162  return false;
163  break;
164  default:
165  m_tag += c;
166  m_state = TagClosing;
167  break;
168  }
169  break;
170  case TagClosing: // we're collecting the name of a closing tag
171  switch( c )
172  {
173  case '<':
174  case '/':
175  cleanup();
176  return false;
177  break;
178  case '>':
179  if( !closeTag() )
180  {
181  cleanup();
182  return false;
183  }
184 
185  m_state = Initial;
186  break;
187  default:
188  m_tag += c;
189  break;
190  }
191  break;
192  case TagNameComplete: // a tag name is complete, expect tag close or attribs
193  if( isWhitespace( c ) )
194  break;
195 
196  switch( c )
197  {
198  case '<':
199  cleanup();
200  return false;
201  break;
202  case '/':
203  m_state = TagOpeningSlash;
204  break;
205  case '>':
206  if( m_preamble == 1 )
207  {
208  cleanup();
209  return false;
210  }
211  m_state = TagInside;
212  addTag();
213  break;
214  case '?':
215  if( m_preamble == 1 )
216  m_preamble = 2;
217  else
218  {
219  cleanup();
220  return false;
221  }
222  break;
223  default:
224  m_attrib += c;
225  m_state = TagAttribute;
226  break;
227  }
228  break;
229  case TagAttribute: // we're collecting the name of an attribute, we have at least 1 octet
230  if( isWhitespace( c ) )
231  {
232  m_state = TagAttributeComplete;
233  break;
234  }
235 
236  switch( c )
237  {
238  case '<':
239  case '/':
240  case '>':
241  cleanup();
242  return false;
243  break;
244  case '=':
245  m_state = TagAttributeEqual;
246  break;
247  default:
248  m_attrib += c;
249  }
250  break;
251  case TagAttributeComplete: // we're expecting an equals sign or ws or the attrib value
252  if( isWhitespace( c ) )
253  break;
254 
255  switch( c )
256  {
257  case '=':
258  m_state = TagAttributeEqual;
259  break;
260  case '<':
261  case '/':
262  case '>':
263  default:
264  cleanup();
265  return false;
266  break;
267  }
268  break;
269  case TagAttributeEqual: // we have found an equals sign
270  if( isWhitespace( c ) )
271  break;
272 
273  switch( c )
274  {
275  case '"':
276  m_quote = true;
277  case '\'':
278  m_state = TagValue;
279  break;
280  case '=':
281  case '<':
282  case '>':
283  default:
284  cleanup();
285  return false;
286  break;
287  }
288  break;
289  case TagValue: // we're expecting value data
290  switch( c )
291  {
292  case '<':
293  cleanup();
294  return false;
295  break;
296  case '\'':
297  if( m_quote )
298  {
299  m_value += c;
300  break;
301  }
302  case '"':
303  addAttribute();
304  m_state = TagNameComplete;
305  m_quote = false;
306  break;
307  case '>':
308  default:
309  m_value += c;
310  }
311  break;
312  default:
313 // printf( "default action!?\n" );
314  break;
315  }
316 // printf( "parser state: %d\n", m_state );
317  }
318 
319  return true;
320  }
321 
322  void Parser::addTag()
323  {
324  if( !m_root )
325  {
326 // printf( "created Tag named %s, ", m_tag.c_str() );
327  m_root = new Tag( m_tag, "", true );
328  m_current = m_root;
329  }
330  else
331  {
332 // printf( "created Tag named %s, ", m_tag.c_str() );
333  m_current = new Tag( m_current, m_tag, "", true );
334  }
335 
336  if( m_attribs.size() )
337  {
338  m_current->setAttributes( m_attribs );
339 // printf( "added %d attributes, ", m_attribs.size() );
340  m_attribs.clear();
341  }
342 
343  if( m_tag == "stream:stream" )
344  {
345  streamEvent( m_root );
346  cleanup();
347  }
348 // else
349 // printf( "%s, ", m_root->xml().c_str() );
350 
351  if( m_tag == "xml" && m_preamble == 2 )
352  cleanup();
353  }
354 
355  void Parser::addAttribute()
356  {
357 // printf( "adding attribute: %s='%s', ", m_attrib.c_str(), m_value.c_str() );
358  m_attribs.push_back( Tag::Attribute( Tag::relax( m_attrib ), Tag::relax( m_value ) ) );
359  m_attrib = "";
360  m_value = "";
361 // printf( "added, " );
362  }
363 
364  void Parser::addCData()
365  {
366  if( m_current )
367  {
368  m_current->setCData( m_cdata );
369 // printf( "added cdata %s, ", m_cdata.c_str() );
370  m_cdata = "";
371  }
372  }
373 
374  bool Parser::closeTag()
375  {
376 // printf( "about to close, " );
377 
378  if( m_tag == "stream:stream" )
379  return true;
380 
381  if( !m_current || m_current->name() != m_tag )
382  return false;
383 
384 // printf( "m_current: %s, ", m_current->name().c_str() );
385 // printf( "m_tag: %s, ", m_tag.c_str() );
386 
387  if( m_current->parent() )
388  m_current = m_current->parent();
389  else
390  {
391 // printf( "pushing upstream, " );
392  streamEvent( m_root );
393  cleanup();
394  }
395 
396  return true;
397  }
398 
399  void Parser::cleanup()
400  {
401  delete m_root;
402  m_root = 0;
403  m_current = 0;
404  m_cdata = "";
405  m_tag = "";
406  m_attrib = "";
407  m_value = "";
408  m_attribs.clear();
409  m_state = Initial;
410  m_preamble = 0;
411  }
412 
413  bool Parser::isValid( unsigned char c )
414  {
415  return ( c != 0xc0 || c != 0xc1 || c < 0xf5 );
416  }
417 
418  bool Parser::isWhitespace( unsigned char c )
419  {
420  return ( c == 0x09 || c == 0x0a || c == 0x0d || c == 0x20 );
421  }
422 
423  void Parser::streamEvent( Tag *tag )
424  {
425  if( m_tagHandler )
426  m_tagHandler->handleTag( tag );
427  }
428 
429 }