1   
   2   
   3   
   4   
   5   
   6   
   7   
   8   
   9   
  10   
  11   
  12   
  13   
  14   
  15   
  16   
  17   
  18   
  19   
  20   
  21   
  22   
  23   
  24   
  25  __doc__ = \ 
  26  """ 
  27  pyparsing module - Classes and methods to define and execute parsing grammars 
  28   
  29  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  30  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  31  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  32  provides a library of classes that you use to construct the grammar directly in Python. 
  33   
  34  Here is a program to parse "Hello, World!" (or any greeting of the form  
  35  C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements  
  36  (L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to 
  37  L{Literal} expressions):: 
  38   
  39      from pyparsing import Word, alphas 
  40   
  41      # define grammar of a greeting 
  42      greet = Word(alphas) + "," + Word(alphas) + "!" 
  43   
  44      hello = "Hello, World!" 
  45      print (hello, "->", greet.parseString(hello)) 
  46   
  47  The program outputs the following:: 
  48   
  49      Hello, World! -> ['Hello', ',', 'World', '!'] 
  50   
  51  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  52  class names, and the use of '+', '|' and '^' operators. 
  53   
  54  The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an 
  55  object with named attributes. 
  56   
  57  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  58   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  59   - quoted strings 
  60   - embedded comments 
  61  """ 
  62   
  63  __version__ = "2.2.0" 
  64  __versionTime__ = "06 Mar 2017 02:06 UTC" 
  65  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  66   
  67  import string 
  68  from weakref import ref as wkref 
  69  import copy 
  70  import sys 
  71  import warnings 
  72  import re 
  73  import sre_constants 
  74  import collections 
  75  import pprint 
  76  import traceback 
  77  import types 
  78  from datetime import datetime 
  79   
  80  try: 
  81      from _thread import RLock 
  82  except ImportError: 
  83      from threading import RLock 
  84   
  85  try: 
  86      from collections import OrderedDict as _OrderedDict 
  87  except ImportError: 
  88      try: 
  89          from ordereddict import OrderedDict as _OrderedDict 
  90      except ImportError: 
  91          _OrderedDict = None 
  92   
  93   
  94   
  95  __all__ = [ 
  96  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  97  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  98  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  99  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
 100  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
 101  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',  
 102  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
 103  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
 104  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
 105  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 
 106  'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', 
 107  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
 108  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
 109  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
 110  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
 111  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
 112  'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', 
 113  'CloseMatch', 'tokenMap', 'pyparsing_common', 
 114  ] 
 115   
 116  system_version = tuple(sys.version_info)[:3] 
 117  PY_3 = system_version[0] == 3 
 118  if PY_3: 
 119      _MAX_INT = sys.maxsize 
 120      basestring = str 
 121      unichr = chr 
 122      _ustr = str 
 123   
 124       
 125      singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 
 126   
 127  else: 
 128      _MAX_INT = sys.maxint 
 129      range = xrange 
 132          """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 
 133             str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 
 134             then < returns the unicode object | encodes it with the default encoding | ... >. 
 135          """ 
 136          if isinstance(obj,unicode): 
 137              return obj 
 138   
 139          try: 
 140               
 141               
 142              return str(obj) 
 143   
 144          except UnicodeEncodeError: 
 145               
 146              ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 
 147              xmlcharref = Regex(r'&#\d+;') 
 148              xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 
 149              return xmlcharref.transformString(ret) 
  150   
 151       
 152      singleArgBuiltins = [] 
 153      import __builtin__ 
 154      for fname in "sum len sorted reversed list tuple set any all min max".split(): 
 155          try: 
 156              singleArgBuiltins.append(getattr(__builtin__,fname)) 
 157          except AttributeError: 
 158              continue 
 159               
 160  _generatorType = type((y for y in range(1))) 
 163      """Escape &, <, >, ", ', etc. in a string of data.""" 
 164   
 165       
 166      from_symbols = '&><"\'' 
 167      to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 
 168      for from_,to_ in zip(from_symbols, to_symbols): 
 169          data = data.replace(from_, to_) 
 170      return data 
  171   
 174   
 175  alphas     = string.ascii_uppercase + string.ascii_lowercase 
 176  nums       = "0123456789" 
 177  hexnums    = nums + "ABCDEFabcdef" 
 178  alphanums  = alphas + nums 
 179  _bslash    = chr(92) 
 180  printables = "".join(c for c in string.printable if c not in string.whitespace) 
 183      """base exception class for all parsing runtime exceptions""" 
 184       
 185       
 186 -    def __init__( self, pstr, loc=0, msg=None, elem=None ): 
  187          self.loc = loc 
 188          if msg is None: 
 189              self.msg = pstr 
 190              self.pstr = "" 
 191          else: 
 192              self.msg = msg 
 193              self.pstr = pstr 
 194          self.parserElement = elem 
 195          self.args = (pstr, loc, msg) 
  196   
 197      @classmethod 
 199          """ 
 200          internal factory method to simplify creating one type of ParseException  
 201          from another - avoids having __init__ signature conflicts among subclasses 
 202          """ 
 203          return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement) 
  204   
 206          """supported attributes by name are: 
 207              - lineno - returns the line number of the exception text 
 208              - col - returns the column number of the exception text 
 209              - line - returns the line containing the exception text 
 210          """ 
 211          if( aname == "lineno" ): 
 212              return lineno( self.loc, self.pstr ) 
 213          elif( aname in ("col", "column") ): 
 214              return col( self.loc, self.pstr ) 
 215          elif( aname == "line" ): 
 216              return line( self.loc, self.pstr ) 
 217          else: 
 218              raise AttributeError(aname) 
  219   
 221          return "%s (at char %d), (line:%d, col:%d)" % \ 
 222                  ( self.msg, self.loc, self.lineno, self.column ) 
  236          return "lineno col line".split() + dir(type(self)) 
   237   
 239      """ 
 240      Exception thrown when parse expressions don't match class; 
 241      supported attributes by name are: 
 242       - lineno - returns the line number of the exception text 
 243       - col - returns the column number of the exception text 
 244       - line - returns the line containing the exception text 
 245           
 246      Example:: 
 247          try: 
 248              Word(nums).setName("integer").parseString("ABC") 
 249          except ParseException as pe: 
 250              print(pe) 
 251              print("column: {}".format(pe.col)) 
 252               
 253      prints:: 
 254         Expected integer (at char 0), (line:1, col:1) 
 255          column: 1 
 256      """ 
 257      pass 
  258   
 260      """user-throwable exception thrown when inconsistent parse content 
 261         is found; stops all parsing immediately""" 
 262      pass 
  263   
 265      """just like L{ParseFatalException}, but thrown internally when an 
 266         L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop  
 267         immediately because an unbacktrackable syntax error has been found""" 
 268      pass 
  269   
 284      """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive""" 
 285 -    def __init__( self, parseElementList ): 
  286          self.parseElementTrace = parseElementList 
  287   
 289          return "RecursiveGrammarException: %s" % self.parseElementTrace 
   290   
 297          return repr(self.tup[0]) 
  299          self.tup = (self.tup[0],i) 
  302      """ 
 303      Structured parse results, to provide multiple means of access to the parsed data: 
 304         - as a list (C{len(results)}) 
 305         - by list index (C{results[0], results[1]}, etc.) 
 306         - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName}) 
 307   
 308      Example:: 
 309          integer = Word(nums) 
 310          date_str = (integer.setResultsName("year") + '/'  
 311                          + integer.setResultsName("month") + '/'  
 312                          + integer.setResultsName("day")) 
 313          # equivalent form: 
 314          # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 
 315   
 316          # parseString returns a ParseResults object 
 317          result = date_str.parseString("1999/12/31") 
 318   
 319          def test(s, fn=repr): 
 320              print("%s -> %s" % (s, fn(eval(s)))) 
 321          test("list(result)") 
 322          test("result[0]") 
 323          test("result['month']") 
 324          test("result.day") 
 325          test("'month' in result") 
 326          test("'minutes' in result") 
 327          test("result.dump()", str) 
 328      prints:: 
 329          list(result) -> ['1999', '/', '12', '/', '31'] 
 330          result[0] -> '1999' 
 331          result['month'] -> '12' 
 332          result.day -> '31' 
 333          'month' in result -> True 
 334          'minutes' in result -> False 
 335          result.dump() -> ['1999', '/', '12', '/', '31'] 
 336          - day: 31 
 337          - month: 12 
 338          - year: 1999 
 339      """ 
 340 -    def __new__(cls, toklist=None, name=None, asList=True, modal=True ): 
  341          if isinstance(toklist, cls): 
 342              return toklist 
 343          retobj = object.__new__(cls) 
 344          retobj.__doinit = True 
 345          return retobj 
  346   
 347       
 348       
 349 -    def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ): 
  350          if self.__doinit: 
 351              self.__doinit = False 
 352              self.__name = None 
 353              self.__parent = None 
 354              self.__accumNames = {} 
 355              self.__asList = asList 
 356              self.__modal = modal 
 357              if toklist is None: 
 358                  toklist = [] 
 359              if isinstance(toklist, list): 
 360                  self.__toklist = toklist[:] 
 361              elif isinstance(toklist, _generatorType): 
 362                  self.__toklist = list(toklist) 
 363              else: 
 364                  self.__toklist = [toklist] 
 365              self.__tokdict = dict() 
 366   
 367          if name is not None and name: 
 368              if not modal: 
 369                  self.__accumNames[name] = 0 
 370              if isinstance(name,int): 
 371                  name = _ustr(name)  
 372              self.__name = name 
 373              if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 
 374                  if isinstance(toklist,basestring): 
 375                      toklist = [ toklist ] 
 376                  if asList: 
 377                      if isinstance(toklist,ParseResults): 
 378                          self[name] = _ParseResultsWithOffset(toklist.copy(),0) 
 379                      else: 
 380                          self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 
 381                      self[name].__name = name 
 382                  else: 
 383                      try: 
 384                          self[name] = toklist[0] 
 385                      except (KeyError,TypeError,IndexError): 
 386                          self[name] = toklist 
  387   
 389          if isinstance( i, (int,slice) ): 
 390              return self.__toklist[i] 
 391          else: 
 392              if i not in self.__accumNames: 
 393                  return self.__tokdict[i][-1][0] 
 394              else: 
 395                  return ParseResults([ v[0] for v in self.__tokdict[i] ]) 
  396   
 398          if isinstance(v,_ParseResultsWithOffset): 
 399              self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 
 400              sub = v[0] 
 401          elif isinstance(k,(int,slice)): 
 402              self.__toklist[k] = v 
 403              sub = v 
 404          else: 
 405              self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 
 406              sub = v 
 407          if isinstance(sub,ParseResults): 
 408              sub.__parent = wkref(self) 
  409   
 411          if isinstance(i,(int,slice)): 
 412              mylen = len( self.__toklist ) 
 413              del self.__toklist[i] 
 414   
 415               
 416              if isinstance(i, int): 
 417                  if i < 0: 
 418                      i += mylen 
 419                  i = slice(i, i+1) 
 420               
 421              removed = list(range(*i.indices(mylen))) 
 422              removed.reverse() 
 423               
 424              for name,occurrences in self.__tokdict.items(): 
 425                  for j in removed: 
 426                      for k, (value, position) in enumerate(occurrences): 
 427                          occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 
 428          else: 
 429              del self.__tokdict[i] 
  430   
 432          return k in self.__tokdict 
  433   
 434 -    def __len__( self ): return len( self.__toklist ) 
  435 -    def __bool__(self): return ( not not self.__toklist ) 
  436      __nonzero__ = __bool__ 
 437 -    def __iter__( self ): return iter( self.__toklist ) 
  438 -    def __reversed__( self ): return iter( self.__toklist[::-1] ) 
  440          if hasattr(self.__tokdict, "iterkeys"): 
 441              return self.__tokdict.iterkeys() 
 442          else: 
 443              return iter(self.__tokdict) 
  444   
 446          return (self[k] for k in self._iterkeys()) 
  447               
 449          return ((k, self[k]) for k in self._iterkeys()) 
  450   
 451      if PY_3: 
 452          keys = _iterkeys        
 453          """Returns an iterator of all named result keys (Python 3.x only).""" 
 454   
 455          values = _itervalues 
 456          """Returns an iterator of all named result values (Python 3.x only).""" 
 457   
 458          items = _iteritems 
 459          """Returns an iterator of all named result key-value tuples (Python 3.x only).""" 
 460   
 461      else: 
 462          iterkeys = _iterkeys 
 463          """Returns an iterator of all named result keys (Python 2.x only).""" 
 464   
 465          itervalues = _itervalues 
 466          """Returns an iterator of all named result values (Python 2.x only).""" 
 467   
 468          iteritems = _iteritems 
 469          """Returns an iterator of all named result key-value tuples (Python 2.x only).""" 
 470   
 472              """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" 
 473              return list(self.iterkeys()) 
  474   
 476              """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" 
 477              return list(self.itervalues()) 
  478                   
 480              """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" 
 481              return list(self.iteritems()) 
  482   
 484          """Since keys() returns an iterator, this method is helpful in bypassing 
 485             code that looks for the existence of any defined results names.""" 
 486          return bool(self.__tokdict) 
  487           
 488 -    def pop( self, *args, **kwargs): 
  489          """ 
 490          Removes and returns item at specified index (default=C{last}). 
 491          Supports both C{list} and C{dict} semantics for C{pop()}. If passed no 
 492          argument or an integer argument, it will use C{list} semantics 
 493          and pop tokens from the list of parsed tokens. If passed a  
 494          non-integer argument (most likely a string), it will use C{dict} 
 495          semantics and pop the corresponding value from any defined  
 496          results names. A second default return value argument is  
 497          supported, just as in C{dict.pop()}. 
 498   
 499          Example:: 
 500              def remove_first(tokens): 
 501                  tokens.pop(0) 
 502              print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 
 503              print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321'] 
 504   
 505              label = Word(alphas) 
 506              patt = label("LABEL") + OneOrMore(Word(nums)) 
 507              print(patt.parseString("AAB 123 321").dump()) 
 508   
 509              # Use pop() in a parse action to remove named result (note that corresponding value is not 
 510              # removed from list form of results) 
 511              def remove_LABEL(tokens): 
 512                  tokens.pop("LABEL") 
 513                  return tokens 
 514              patt.addParseAction(remove_LABEL) 
 515              print(patt.parseString("AAB 123 321").dump()) 
 516          prints:: 
 517              ['AAB', '123', '321'] 
 518              - LABEL: AAB 
 519   
 520              ['AAB', '123', '321'] 
 521          """ 
 522          if not args: 
 523              args = [-1] 
 524          for k,v in kwargs.items(): 
 525              if k == 'default': 
 526                  args = (args[0], v) 
 527              else: 
 528                  raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 
 529          if (isinstance(args[0], int) or  
 530                          len(args) == 1 or  
 531                          args[0] in self): 
 532              index = args[0] 
 533              ret = self[index] 
 534              del self[index] 
 535              return ret 
 536          else: 
 537              defaultvalue = args[1] 
 538              return defaultvalue 
  539   
 540 -    def get(self, key, defaultValue=None): 
  541          """ 
 542          Returns named result matching the given key, or if there is no 
 543          such name, then returns the given C{defaultValue} or C{None} if no 
 544          C{defaultValue} is specified. 
 545   
 546          Similar to C{dict.get()}. 
 547           
 548          Example:: 
 549              integer = Word(nums) 
 550              date_str = integer("year") + '/' + integer("month") + '/' + integer("day")            
 551   
 552              result = date_str.parseString("1999/12/31") 
 553              print(result.get("year")) # -> '1999' 
 554              print(result.get("hour", "not specified")) # -> 'not specified' 
 555              print(result.get("hour")) # -> None 
 556          """ 
 557          if key in self: 
 558              return self[key] 
 559          else: 
 560              return defaultValue 
  561   
 562 -    def insert( self, index, insStr ): 
  563          """ 
 564          Inserts new element at location index in the list of parsed tokens. 
 565           
 566          Similar to C{list.insert()}. 
 567   
 568          Example:: 
 569              print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 
 570   
 571              # use a parse action to insert the parse location in the front of the parsed results 
 572              def insert_locn(locn, tokens): 
 573                  tokens.insert(0, locn) 
 574              print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] 
 575          """ 
 576          self.__toklist.insert(index, insStr) 
 577           
 578          for name,occurrences in self.__tokdict.items(): 
 579              for k, (value, position) in enumerate(occurrences): 
 580                  occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 
  581   
 583          """ 
 584          Add single element to end of ParseResults list of elements. 
 585   
 586          Example:: 
 587              print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 
 588               
 589              # use a parse action to compute the sum of the parsed integers, and add it to the end 
 590              def append_sum(tokens): 
 591                  tokens.append(sum(map(int, tokens))) 
 592              print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444] 
 593          """ 
 594          self.__toklist.append(item) 
  595   
 597          """ 
 598          Add sequence of elements to end of ParseResults list of elements. 
 599   
 600          Example:: 
 601              patt = OneOrMore(Word(alphas)) 
 602               
 603              # use a parse action to append the reverse of the matched strings, to make a palindrome 
 604              def make_palindrome(tokens): 
 605                  tokens.extend(reversed([t[::-1] for t in tokens])) 
 606                  return ''.join(tokens) 
 607              print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' 
 608          """ 
 609          if isinstance(itemseq, ParseResults): 
 610              self += itemseq 
 611          else: 
 612              self.__toklist.extend(itemseq) 
  613   
 615          """ 
 616          Clear all elements and results names. 
 617          """ 
 618          del self.__toklist[:] 
 619          self.__tokdict.clear() 
  620   
 622          try: 
 623              return self[name] 
 624          except KeyError: 
 625              return "" 
 626               
 627          if name in self.__tokdict: 
 628              if name not in self.__accumNames: 
 629                  return self.__tokdict[name][-1][0] 
 630              else: 
 631                  return ParseResults([ v[0] for v in self.__tokdict[name] ]) 
 632          else: 
 633              return "" 
  634   
 636          ret = self.copy() 
 637          ret += other 
 638          return ret 
  639   
 641          if other.__tokdict: 
 642              offset = len(self.__toklist) 
 643              addoffset = lambda a: offset if a<0 else a+offset 
 644              otheritems = other.__tokdict.items() 
 645              otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 
 646                                  for (k,vlist) in otheritems for v in vlist] 
 647              for k,v in otherdictitems: 
 648                  self[k] = v 
 649                  if isinstance(v[0],ParseResults): 
 650                      v[0].__parent = wkref(self) 
 651               
 652          self.__toklist += other.__toklist 
 653          self.__accumNames.update( other.__accumNames ) 
 654          return self 
  655   
 657          if isinstance(other,int) and other == 0: 
 658               
 659              return self.copy() 
 660          else: 
 661               
 662              return other + self 
  663           
 665          return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) 
  666   
 668          return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']' 
  669   
 671          out = [] 
 672          for item in self.__toklist: 
 673              if out and sep: 
 674                  out.append(sep) 
 675              if isinstance( item, ParseResults ): 
 676                  out += item._asStringList() 
 677              else: 
 678                  out.append( _ustr(item) ) 
 679          return out 
  680   
 682          """ 
 683          Returns the parse results as a nested list of matching tokens, all converted to strings. 
 684   
 685          Example:: 
 686              patt = OneOrMore(Word(alphas)) 
 687              result = patt.parseString("sldkj lsdkj sldkj") 
 688              # even though the result prints in string-like form, it is actually a pyparsing ParseResults 
 689              print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj'] 
 690               
 691              # Use asList() to create an actual list 
 692              result_list = result.asList() 
 693              print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj'] 
 694          """ 
 695          return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist] 
  696   
 698          """ 
 699          Returns the named parse results as a nested dictionary. 
 700   
 701          Example:: 
 702              integer = Word(nums) 
 703              date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 
 704               
 705              result = date_str.parseString('12/31/1999') 
 706              print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) 
 707               
 708              result_dict = result.asDict() 
 709              print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'} 
 710   
 711              # even though a ParseResults supports dict-like access, sometime you just need to have a dict 
 712              import json 
 713              print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable 
 714              print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"} 
 715          """ 
 716          if PY_3: 
 717              item_fn = self.items 
 718          else: 
 719              item_fn = self.iteritems 
 720               
 721          def toItem(obj): 
 722              if isinstance(obj, ParseResults): 
 723                  if obj.haskeys(): 
 724                      return obj.asDict() 
 725                  else: 
 726                      return [toItem(v) for v in obj] 
 727              else: 
 728                  return obj 
  729                   
 730          return dict((k,toItem(v)) for k,v in item_fn()) 
  731   
 733          """ 
 734          Returns a new copy of a C{ParseResults} object. 
 735          """ 
 736          ret = ParseResults( self.__toklist ) 
 737          ret.__tokdict = self.__tokdict.copy() 
 738          ret.__parent = self.__parent 
 739          ret.__accumNames.update( self.__accumNames ) 
 740          ret.__name = self.__name 
 741          return ret 
  742   
 743 -    def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): 
  744          """ 
 745          (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. 
 746          """ 
 747          nl = "\n" 
 748          out = [] 
 749          namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 
 750                                                              for v in vlist) 
 751          nextLevelIndent = indent + "  " 
 752   
 753           
 754          if not formatted: 
 755              indent = "" 
 756              nextLevelIndent = "" 
 757              nl = "" 
 758   
 759          selfTag = None 
 760          if doctag is not None: 
 761              selfTag = doctag 
 762          else: 
 763              if self.__name: 
 764                  selfTag = self.__name 
 765   
 766          if not selfTag: 
 767              if namedItemsOnly: 
 768                  return "" 
 769              else: 
 770                  selfTag = "ITEM" 
 771   
 772          out += [ nl, indent, "<", selfTag, ">" ] 
 773   
 774          for i,res in enumerate(self.__toklist): 
 775              if isinstance(res,ParseResults): 
 776                  if i in namedItems: 
 777                      out += [ res.asXML(namedItems[i], 
 778                                          namedItemsOnly and doctag is None, 
 779                                          nextLevelIndent, 
 780                                          formatted)] 
 781                  else: 
 782                      out += [ res.asXML(None, 
 783                                          namedItemsOnly and doctag is None, 
 784                                          nextLevelIndent, 
 785                                          formatted)] 
 786              else: 
 787                   
 788                  resTag = None 
 789                  if i in namedItems: 
 790                      resTag = namedItems[i] 
 791                  if not resTag: 
 792                      if namedItemsOnly: 
 793                          continue 
 794                      else: 
 795                          resTag = "ITEM" 
 796                  xmlBodyText = _xml_escape(_ustr(res)) 
 797                  out += [ nl, nextLevelIndent, "<", resTag, ">", 
 798                                                  xmlBodyText, 
 799                                                  "</", resTag, ">" ] 
 800   
 801          out += [ nl, indent, "</", selfTag, ">" ] 
 802          return "".join(out) 
  803   
 805          for k,vlist in self.__tokdict.items(): 
 806              for v,loc in vlist: 
 807                  if sub is v: 
 808                      return k 
 809          return None 
  810   
 812          r""" 
 813          Returns the results name for this token expression. Useful when several  
 814          different expressions might match at a particular location. 
 815   
 816          Example:: 
 817              integer = Word(nums) 
 818              ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") 
 819              house_number_expr = Suppress('#') + Word(nums, alphanums) 
 820              user_data = (Group(house_number_expr)("house_number")  
 821                          | Group(ssn_expr)("ssn") 
 822                          | Group(integer)("age")) 
 823              user_info = OneOrMore(user_data) 
 824               
 825              result = user_info.parseString("22 111-22-3333 #221B") 
 826              for item in result: 
 827                  print(item.getName(), ':', item[0]) 
 828          prints:: 
 829              age : 22 
 830              ssn : 111-22-3333 
 831              house_number : 221B 
 832          """ 
 833          if self.__name: 
 834              return self.__name 
 835          elif self.__parent: 
 836              par = self.__parent() 
 837              if par: 
 838                  return par.__lookup(self) 
 839              else: 
 840                  return None 
 841          elif (len(self) == 1 and 
 842                 len(self.__tokdict) == 1 and 
 843                 next(iter(self.__tokdict.values()))[0][1] in (0,-1)): 
 844              return next(iter(self.__tokdict.keys())) 
 845          else: 
 846              return None 
  847   
 848 -    def dump(self, indent='', depth=0, full=True): 
  849          """ 
 850          Diagnostic method for listing out the contents of a C{ParseResults}. 
 851          Accepts an optional C{indent} argument so that this string can be embedded 
 852          in a nested display of other data. 
 853   
 854          Example:: 
 855              integer = Word(nums) 
 856              date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 
 857               
 858              result = date_str.parseString('12/31/1999') 
 859              print(result.dump()) 
 860          prints:: 
 861              ['12', '/', '31', '/', '1999'] 
 862              - day: 1999 
 863              - month: 31 
 864              - year: 12 
 865          """ 
 866          out = [] 
 867          NL = '\n' 
 868          out.append( indent+_ustr(self.asList()) ) 
 869          if full: 
 870              if self.haskeys(): 
 871                  items = sorted((str(k), v) for k,v in self.items()) 
 872                  for k,v in items: 
 873                      if out: 
 874                          out.append(NL) 
 875                      out.append( "%s%s- %s: " % (indent,('  '*depth), k) ) 
 876                      if isinstance(v,ParseResults): 
 877                          if v: 
 878                              out.append( v.dump(indent,depth+1) ) 
 879                          else: 
 880                              out.append(_ustr(v)) 
 881                      else: 
 882                          out.append(repr(v)) 
 883              elif any(isinstance(vv,ParseResults) for vv in self): 
 884                  v = self 
 885                  for i,vv in enumerate(v): 
 886                      if isinstance(vv,ParseResults): 
 887                          out.append("\n%s%s[%d]:\n%s%s%s" % (indent,('  '*(depth)),i,indent,('  '*(depth+1)),vv.dump(indent,depth+1) )) 
 888                      else: 
 889                          out.append("\n%s%s[%d]:\n%s%s%s" % (indent,('  '*(depth)),i,indent,('  '*(depth+1)),_ustr(vv))) 
 890               
 891          return "".join(out) 
  892   
 893 -    def pprint(self, *args, **kwargs): 
  894          """ 
 895          Pretty-printer for parsed results as a list, using the C{pprint} module. 
 896          Accepts additional positional or keyword args as defined for the  
 897          C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint}) 
 898   
 899          Example:: 
 900              ident = Word(alphas, alphanums) 
 901              num = Word(nums) 
 902              func = Forward() 
 903              term = ident | num | Group('(' + func + ')') 
 904              func <<= ident + Group(Optional(delimitedList(term))) 
 905              result = func.parseString("fna a,b,(fnb c,d,200),100") 
 906              result.pprint(width=40) 
 907          prints:: 
 908              ['fna', 
 909               ['a', 
 910                'b', 
 911                ['(', 'fnb', ['c', 'd', '200'], ')'], 
 912                '100']] 
 913          """ 
 914          pprint.pprint(self.asList(), *args, **kwargs) 
  915   
 916       
 918          return ( self.__toklist, 
 919                   ( self.__tokdict.copy(), 
 920                     self.__parent is not None and self.__parent() or None, 
 921                     self.__accumNames, 
 922                     self.__name ) ) 
  923   
 925          self.__toklist = state[0] 
 926          (self.__tokdict, 
 927           par, 
 928           inAccumNames, 
 929           self.__name) = state[1] 
 930          self.__accumNames = {} 
 931          self.__accumNames.update(inAccumNames) 
 932          if par is not None: 
 933              self.__parent = wkref(par) 
 934          else: 
 935              self.__parent = None 
  936   
 938          return self.__toklist, self.__name, self.__asList, self.__modal 
  939   
 941          return (dir(type(self)) + list(self.keys())) 
  942   
 943  collections.MutableMapping.register(ParseResults) 
 944   
 945 -def col (loc,strg): 
  946      """Returns current column within a string, counting newlines as line separators. 
 947     The first column is number 1. 
 948   
 949     Note: the default parsing behavior is to expand tabs in the input string 
 950     before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 
 951     on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 
 952     consistent view of the parsed string, the parse location, and line and column 
 953     positions within the parsed string. 
 954     """ 
 955      s = strg 
 956      return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc) 
  957   
 959      """Returns current line number within a string, counting newlines as line separators. 
 960     The first line is number 1. 
 961   
 962     Note: the default parsing behavior is to expand tabs in the input string 
 963     before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 
 964     on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 
 965     consistent view of the parsed string, the parse location, and line and column 
 966     positions within the parsed string. 
 967     """ 
 968      return strg.count("\n",0,loc) + 1 
  969   
 970 -def line( loc, strg ): 
  971      """Returns the line of text containing loc within a string, counting newlines as line separators. 
 972         """ 
 973      lastCR = strg.rfind("\n", 0, loc) 
 974      nextCR = strg.find("\n", loc) 
 975      if nextCR >= 0: 
 976          return strg[lastCR+1:nextCR] 
 977      else: 
 978          return strg[lastCR+1:] 
  979   
 981      print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))) 
  982   
 984      print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) 
  985   
 987      print ("Exception raised:" + _ustr(exc)) 
  988   
 990      """'Do-nothing' debug action, to suppress debugging output during parsing.""" 
 991      pass 
  992   
 993   
 994   
 995   
 996       
 997           
 998       
 999       
1000       
1001           
1002           
1003               
1004                   
1005                   
1006                   
1007               
1008                   
1009                       
1010                   
1011                   
1012       
1013   
1014   
1015  'decorator to trim function calls to match the arity of the target' 
1017      if func in singleArgBuiltins: 
1018          return lambda s,l,t: func(t) 
1019      limit = [0] 
1020      foundArity = [False] 
1021       
1022       
1023      if system_version[:2] >= (3,5): 
1024          def extract_stack(limit=0): 
1025               
1026              offset = -3 if system_version == (3,5,0) else -2 
1027              frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset] 
1028              return [(frame_summary.filename, frame_summary.lineno)] 
 1029          def extract_tb(tb, limit=0): 
1030              frames = traceback.extract_tb(tb, limit=limit) 
1031              frame_summary = frames[-1] 
1032              return [(frame_summary.filename, frame_summary.lineno)] 
1033      else: 
1034          extract_stack = traceback.extract_stack 
1035          extract_tb = traceback.extract_tb 
1036       
1037       
1038       
1039       
1040      LINE_DIFF = 6 
1041       
1042       
1043      this_line = extract_stack(limit=2)[-1] 
1044      pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) 
1045   
1046      def wrapper(*args): 
1047          while 1: 
1048              try: 
1049                  ret = func(*args[limit[0]:]) 
1050                  foundArity[0] = True 
1051                  return ret 
1052              except TypeError: 
1053                   
1054                  if foundArity[0]: 
1055                      raise 
1056                  else: 
1057                      try: 
1058                          tb = sys.exc_info()[-1] 
1059                          if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth: 
1060                              raise 
1061                      finally: 
1062                          del tb 
1063   
1064                  if limit[0] <= maxargs: 
1065                      limit[0] += 1 
1066                      continue 
1067                  raise 
1068   
1069       
1070      func_name = "<parse action>" 
1071      try: 
1072          func_name = getattr(func, '__name__',  
1073                              getattr(func, '__class__').__name__) 
1074      except Exception: 
1075          func_name = str(func) 
1076      wrapper.__name__ = func_name 
1077   
1078      return wrapper 
1079   
1081      """Abstract base level parser element class.""" 
1082      DEFAULT_WHITE_CHARS = " \n\t\r" 
1083      verbose_stacktrace = False 
1084   
1085      @staticmethod 
1087          r""" 
1088          Overrides the default whitespace chars 
1089   
1090          Example:: 
1091              # default whitespace chars are space, <TAB> and newline 
1092              OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def', 'ghi', 'jkl'] 
1093               
1094              # change to just treat newline as significant 
1095              ParserElement.setDefaultWhitespaceChars(" \t") 
1096              OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def'] 
1097          """ 
1098          ParserElement.DEFAULT_WHITE_CHARS = chars 
 1099   
1100      @staticmethod 
1102          """ 
1103          Set class to be used for inclusion of string literals into a parser. 
1104           
1105          Example:: 
1106              # default literal class used is Literal 
1107              integer = Word(nums) 
1108              date_str = integer("year") + '/' + integer("month") + '/' + integer("day")            
1109   
1110              date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31'] 
1111   
1112   
1113              # change to Suppress 
1114              ParserElement.inlineLiteralsUsing(Suppress) 
1115              date_str = integer("year") + '/' + integer("month") + '/' + integer("day")            
1116   
1117              date_str.parseString("1999/12/31")  # -> ['1999', '12', '31'] 
1118          """ 
1119          ParserElement._literalStringClass = cls 
 1120   
1122          self.parseAction = list() 
1123          self.failAction = None 
1124           
1125          self.strRepr = None 
1126          self.resultsName = None 
1127          self.saveAsList = savelist 
1128          self.skipWhitespace = True 
1129          self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 
1130          self.copyDefaultWhiteChars = True 
1131          self.mayReturnEmpty = False  
1132          self.keepTabs = False 
1133          self.ignoreExprs = list() 
1134          self.debug = False 
1135          self.streamlined = False 
1136          self.mayIndexError = True  
1137          self.errmsg = "" 
1138          self.modalResults = True  
1139          self.debugActions = ( None, None, None )  
1140          self.re = None 
1141          self.callPreparse = True  
1142          self.callDuringTry = False 
 1143   
1145          """ 
1146          Make a copy of this C{ParserElement}.  Useful for defining different parse actions 
1147          for the same parsing pattern, using copies of the original parse element. 
1148           
1149          Example:: 
1150              integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 
1151              integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K") 
1152              integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 
1153               
1154              print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) 
1155          prints:: 
1156              [5120, 100, 655360, 268435456] 
1157          Equivalent form of C{expr.copy()} is just C{expr()}:: 
1158              integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 
1159          """ 
1160          cpy = copy.copy( self ) 
1161          cpy.parseAction = self.parseAction[:] 
1162          cpy.ignoreExprs = self.ignoreExprs[:] 
1163          if self.copyDefaultWhiteChars: 
1164              cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 
1165          return cpy 
 1166   
1168          """ 
1169          Define name for this expression, makes debugging and exception messages clearer. 
1170           
1171          Example:: 
1172              Word(nums).parseString("ABC")  # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) 
1173              Word(nums).setName("integer").parseString("ABC")  # -> Exception: Expected integer (at char 0), (line:1, col:1) 
1174          """ 
1175          self.name = name 
1176          self.errmsg = "Expected " + self.name 
1177          if hasattr(self,"exception"): 
1178              self.exception.msg = self.errmsg 
1179          return self 
 1180   
1182          """ 
1183          Define name for referencing matching tokens as a nested attribute 
1184          of the returned parse results. 
1185          NOTE: this returns a *copy* of the original C{ParserElement} object; 
1186          this is so that the client can define a basic element, such as an 
1187          integer, and reference it in multiple places with different names. 
1188   
1189          You can also set results names using the abbreviated syntax, 
1190          C{expr("name")} in place of C{expr.setResultsName("name")} -  
1191          see L{I{__call__}<__call__>}. 
1192   
1193          Example:: 
1194              date_str = (integer.setResultsName("year") + '/'  
1195                          + integer.setResultsName("month") + '/'  
1196                          + integer.setResultsName("day")) 
1197   
1198              # equivalent form: 
1199              date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 
1200          """ 
1201          newself = self.copy() 
1202          if name.endswith("*"): 
1203              name = name[:-1] 
1204              listAllMatches=True 
1205          newself.resultsName = name 
1206          newself.modalResults = not listAllMatches 
1207          return newself 
 1208   
1210          """Method to invoke the Python pdb debugger when this element is 
1211             about to be parsed. Set C{breakFlag} to True to enable, False to 
1212             disable. 
1213          """ 
1214          if breakFlag: 
1215              _parseMethod = self._parse 
1216              def breaker(instring, loc, doActions=True, callPreParse=True): 
1217                  import pdb 
1218                  pdb.set_trace() 
1219                  return _parseMethod( instring, loc, doActions, callPreParse ) 
 1220              breaker._originalParseMethod = _parseMethod 
1221              self._parse = breaker 
1222          else: 
1223              if hasattr(self._parse,"_originalParseMethod"): 
1224                  self._parse = self._parse._originalParseMethod 
1225          return self 
 1226   
1228          """ 
1229          Define one or more actions to perform when successfully matching parse element definition. 
1230          Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 
1231          C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 
1232           - s   = the original string being parsed (see note below) 
1233           - loc = the location of the matching substring 
1234           - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 
1235          If the functions in fns modify the tokens, they can return them as the return 
1236          value from fn, and the modified list of tokens will replace the original. 
1237          Otherwise, fn does not need to return any value. 
1238   
1239          Optional keyword arguments: 
1240           - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing 
1241   
1242          Note: the default parsing behavior is to expand tabs in the input string 
1243          before starting the parsing process.  See L{I{parseString}<parseString>} for more information 
1244          on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 
1245          consistent view of the parsed string, the parse location, and line and column 
1246          positions within the parsed string. 
1247           
1248          Example:: 
1249              integer = Word(nums) 
1250              date_str = integer + '/' + integer + '/' + integer 
1251   
1252              date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31'] 
1253   
1254              # use parse action to convert to ints at parse time 
1255              integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 
1256              date_str = integer + '/' + integer + '/' + integer 
1257   
1258              # note that integer fields are now ints, not strings 
1259              date_str.parseString("1999/12/31")  # -> [1999, '/', 12, '/', 31] 
1260          """ 
1261          self.parseAction = list(map(_trim_arity, list(fns))) 
1262          self.callDuringTry = kwargs.get("callDuringTry", False) 
1263          return self 
 1264   
1266          """ 
1267          Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}. 
1268           
1269          See examples in L{I{copy}<copy>}. 
1270          """ 
1271          self.parseAction += list(map(_trim_arity, list(fns))) 
1272          self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 
1273          return self 
 1274   
1276          """Add a boolean predicate function to expression's list of parse actions. See  
1277          L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},  
1278          functions passed to C{addCondition} need to return boolean success/fail of the condition. 
1279   
1280          Optional keyword arguments: 
1281           - message = define a custom message to be used in the raised exception 
1282           - fatal   = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException 
1283            
1284          Example:: 
1285              integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 
1286              year_int = integer.copy() 
1287              year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 
1288              date_str = year_int + '/' + integer + '/' + integer 
1289   
1290              result = date_str.parseString("1999/12/31")  # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1) 
1291          """ 
1292          msg = kwargs.get("message", "failed user-defined condition") 
1293          exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException 
1294          for fn in fns: 
1295              def pa(s,l,t): 
1296                  if not bool(_trim_arity(fn)(s,l,t)): 
1297                      raise exc_type(s,l,msg) 
 1298              self.parseAction.append(pa) 
1299          self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 
1300          return self 
1301   
1303          """Define action to perform if parsing fails at this expression. 
1304             Fail acton fn is a callable function that takes the arguments 
1305             C{fn(s,loc,expr,err)} where: 
1306              - s = string being parsed 
1307              - loc = location where expression match was attempted and failed 
1308              - expr = the parse expression that failed 
1309              - err = the exception thrown 
1310             The function returns no value.  It may throw C{L{ParseFatalException}} 
1311             if it is desired to stop parsing immediately.""" 
1312          self.failAction = fn 
1313          return self 
 1314   
1316          exprsFound = True 
1317          while exprsFound: 
1318              exprsFound = False 
1319              for e in self.ignoreExprs: 
1320                  try: 
1321                      while 1: 
1322                          loc,dummy = e._parse( instring, loc ) 
1323                          exprsFound = True 
1324                  except ParseException: 
1325                      pass 
1326          return loc 
 1327   
1329          if self.ignoreExprs: 
1330              loc = self._skipIgnorables( instring, loc ) 
1331   
1332          if self.skipWhitespace: 
1333              wt = self.whiteChars 
1334              instrlen = len(instring) 
1335              while loc < instrlen and instring[loc] in wt: 
1336                  loc += 1 
1337   
1338          return loc 
 1339   
1340 -    def parseImpl( self, instring, loc, doActions=True ): 
 1342   
1343 -    def postParse( self, instring, loc, tokenlist ): 
 1345   
1346       
1347 -    def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): 
 1348          debugging = ( self.debug )  
1349   
1350          if debugging or self.failAction: 
1351               
1352              if (self.debugActions[0] ): 
1353                  self.debugActions[0]( instring, loc, self ) 
1354              if callPreParse and self.callPreparse: 
1355                  preloc = self.preParse( instring, loc ) 
1356              else: 
1357                  preloc = loc 
1358              tokensStart = preloc 
1359              try: 
1360                  try: 
1361                      loc,tokens = self.parseImpl( instring, preloc, doActions ) 
1362                  except IndexError: 
1363                      raise ParseException( instring, len(instring), self.errmsg, self ) 
1364              except ParseBaseException as err: 
1365                   
1366                  if self.debugActions[2]: 
1367                      self.debugActions[2]( instring, tokensStart, self, err ) 
1368                  if self.failAction: 
1369                      self.failAction( instring, tokensStart, self, err ) 
1370                  raise 
1371          else: 
1372              if callPreParse and self.callPreparse: 
1373                  preloc = self.preParse( instring, loc ) 
1374              else: 
1375                  preloc = loc 
1376              tokensStart = preloc 
1377              if self.mayIndexError or loc >= len(instring): 
1378                  try: 
1379                      loc,tokens = self.parseImpl( instring, preloc, doActions ) 
1380                  except IndexError: 
1381                      raise ParseException( instring, len(instring), self.errmsg, self ) 
1382              else: 
1383                  loc,tokens = self.parseImpl( instring, preloc, doActions ) 
1384   
1385          tokens = self.postParse( instring, loc, tokens ) 
1386   
1387          retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 
1388          if self.parseAction and (doActions or self.callDuringTry): 
1389              if debugging: 
1390                  try: 
1391                      for fn in self.parseAction: 
1392                          tokens = fn( instring, tokensStart, retTokens ) 
1393                          if tokens is not None: 
1394                              retTokens = ParseResults( tokens, 
1395                                                        self.resultsName, 
1396                                                        asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 
1397                                                        modal=self.modalResults ) 
1398                  except ParseBaseException as err: 
1399                       
1400                      if (self.debugActions[2] ): 
1401                          self.debugActions[2]( instring, tokensStart, self, err ) 
1402                      raise 
1403              else: 
1404                  for fn in self.parseAction: 
1405                      tokens = fn( instring, tokensStart, retTokens ) 
1406                      if tokens is not None: 
1407                          retTokens = ParseResults( tokens, 
1408                                                    self.resultsName, 
1409                                                    asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 
1410                                                    modal=self.modalResults ) 
1411   
1412          if debugging: 
1413               
1414              if (self.debugActions[1] ): 
1415                  self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 
1416   
1417          return loc, retTokens 
 1418   
1424       
1426          try: 
1427              self.tryParse(instring, loc) 
1428          except (ParseException, IndexError): 
1429              return False 
1430          else: 
1431              return True 
 1432   
1435              cache = {} 
1436              self.not_in_cache = not_in_cache = object() 
1437   
1438              def get(self, key): 
1439                  return cache.get(key, not_in_cache) 
 1440   
1441              def set(self, key, value): 
1442                  cache[key] = value 
1458                  self.not_in_cache = not_in_cache = object() 
1459   
1460                  cache = _OrderedDict() 
1461   
1462                  def get(self, key): 
1463                      return cache.get(key, not_in_cache) 
 1464   
1465                  def set(self, key, value): 
1466                      cache[key] = value 
1467                      while len(cache) > size: 
1468                          try: 
1469                              cache.popitem(False) 
1470                          except KeyError: 
1471                              pass 
1487                  self.not_in_cache = not_in_cache = object() 
1488   
1489                  cache = {} 
1490                  key_fifo = collections.deque([], size) 
1491   
1492                  def get(self, key): 
1493                      return cache.get(key, not_in_cache) 
 1494   
1495                  def set(self, key, value): 
1496                      cache[key] = value 
1497                      while len(key_fifo) > size: 
1498                          cache.pop(key_fifo.popleft(), None) 
1499                      key_fifo.append(key) 
1520 -    def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): 
 1521          HIT, MISS = 0, 1 
1522          lookup = (self, instring, loc, callPreParse, doActions) 
1523          with ParserElement.packrat_cache_lock: 
1524              cache = ParserElement.packrat_cache 
1525              value = cache.get(lookup) 
1526              if value is cache.not_in_cache: 
1527                  ParserElement.packrat_cache_stats[MISS] += 1 
1528                  try: 
1529                      value = self._parseNoCache(instring, loc, doActions, callPreParse) 
1530                  except ParseBaseException as pe: 
1531                       
1532                      cache.set(lookup, pe.__class__(*pe.args)) 
1533                      raise 
1534                  else: 
1535                      cache.set(lookup, (value[0], value[1].copy())) 
1536                      return value 
1537              else: 
1538                  ParserElement.packrat_cache_stats[HIT] += 1 
1539                  if isinstance(value, Exception): 
1540                      raise value 
1541                  return (value[0], value[1].copy()) 
 1542   
1543      _parse = _parseNoCache 
1544   
1545      @staticmethod 
1549   
1550      _packratEnabled = False 
1551      @staticmethod 
1553          """Enables "packrat" parsing, which adds memoizing to the parsing logic. 
1554             Repeated parse attempts at the same string location (which happens 
1555             often in many complex grammars) can immediately return a cached value, 
1556             instead of re-executing parsing/validating code.  Memoizing is done of 
1557             both valid results and parsing exceptions. 
1558              
1559             Parameters: 
1560              - cache_size_limit - (default=C{128}) - if an integer value is provided 
1561                will limit the size of the packrat cache; if None is passed, then 
1562                the cache size will be unbounded; if 0 is passed, the cache will 
1563                be effectively disabled. 
1564               
1565             This speedup may break existing programs that use parse actions that 
1566             have side-effects.  For this reason, packrat parsing is disabled when 
1567             you first import pyparsing.  To activate the packrat feature, your 
1568             program must call the class method C{ParserElement.enablePackrat()}.  If 
1569             your program uses C{psyco} to "compile as you go", you must call 
1570             C{enablePackrat} before calling C{psyco.full()}.  If you do not do this, 
1571             Python will crash.  For best results, call C{enablePackrat()} immediately 
1572             after importing pyparsing. 
1573              
1574             Example:: 
1575                 import pyparsing 
1576                 pyparsing.ParserElement.enablePackrat() 
1577          """ 
1578          if not ParserElement._packratEnabled: 
1579              ParserElement._packratEnabled = True 
1580              if cache_size_limit is None: 
1581                  ParserElement.packrat_cache = ParserElement._UnboundedCache() 
1582              else: 
1583                  ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) 
1584              ParserElement._parse = ParserElement._parseCache 
 1585   
1587          """ 
1588          Execute the parse expression with the given string. 
1589          This is the main interface to the client code, once the complete 
1590          expression has been built. 
1591   
1592          If you want the grammar to require that the entire input string be 
1593          successfully parsed, then set C{parseAll} to True (equivalent to ending 
1594          the grammar with C{L{StringEnd()}}). 
1595   
1596          Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 
1597          in order to report proper column numbers in parse actions. 
1598          If the input string contains tabs and 
1599          the grammar uses parse actions that use the C{loc} argument to index into the 
1600          string being parsed, you can ensure you have a consistent view of the input 
1601          string by: 
1602           - calling C{parseWithTabs} on your grammar before calling C{parseString} 
1603             (see L{I{parseWithTabs}<parseWithTabs>}) 
1604           - define your parse action using the full C{(s,loc,toks)} signature, and 
1605             reference the input string using the parse action's C{s} argument 
1606           - explictly expand the tabs in your input string before calling 
1607             C{parseString} 
1608           
1609          Example:: 
1610              Word('a').parseString('aaaaabaaa')  # -> ['aaaaa'] 
1611              Word('a').parseString('aaaaabaaa', parseAll=True)  # -> Exception: Expected end of text 
1612          """ 
1613          ParserElement.resetCache() 
1614          if not self.streamlined: 
1615              self.streamline() 
1616               
1617          for e in self.ignoreExprs: 
1618              e.streamline() 
1619          if not self.keepTabs: 
1620              instring = instring.expandtabs() 
1621          try: 
1622              loc, tokens = self._parse( instring, 0 ) 
1623              if parseAll: 
1624                  loc = self.preParse( instring, loc ) 
1625                  se = Empty() + StringEnd() 
1626                  se._parse( instring, loc ) 
1627          except ParseBaseException as exc: 
1628              if ParserElement.verbose_stacktrace: 
1629                  raise 
1630              else: 
1631                   
1632                  raise exc 
1633          else: 
1634              return tokens 
 1635   
1637          """ 
1638          Scan the input string for expression matches.  Each match will return the 
1639          matching tokens, start location, and end location.  May be called with optional 
1640          C{maxMatches} argument, to clip scanning after 'n' matches are found.  If 
1641          C{overlap} is specified, then overlapping matches will be reported. 
1642   
1643          Note that the start and end locations are reported relative to the string 
1644          being parsed.  See L{I{parseString}<parseString>} for more information on parsing 
1645          strings with embedded tabs. 
1646   
1647          Example:: 
1648              source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 
1649              print(source) 
1650              for tokens,start,end in Word(alphas).scanString(source): 
1651                  print(' '*start + '^'*(end-start)) 
1652                  print(' '*start + tokens[0]) 
1653           
1654          prints:: 
1655           
1656              sldjf123lsdjjkf345sldkjf879lkjsfd987 
1657              ^^^^^ 
1658              sldjf 
1659                      ^^^^^^^ 
1660                      lsdjjkf 
1661                                ^^^^^^ 
1662                                sldkjf 
1663                                         ^^^^^^ 
1664                                         lkjsfd 
1665          """ 
1666          if not self.streamlined: 
1667              self.streamline() 
1668          for e in self.ignoreExprs: 
1669              e.streamline() 
1670   
1671          if not self.keepTabs: 
1672              instring = _ustr(instring).expandtabs() 
1673          instrlen = len(instring) 
1674          loc = 0 
1675          preparseFn = self.preParse 
1676          parseFn = self._parse 
1677          ParserElement.resetCache() 
1678          matches = 0 
1679          try: 
1680              while loc <= instrlen and matches < maxMatches: 
1681                  try: 
1682                      preloc = preparseFn( instring, loc ) 
1683                      nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 
1684                  except ParseException: 
1685                      loc = preloc+1 
1686                  else: 
1687                      if nextLoc > loc: 
1688                          matches += 1 
1689                          yield tokens, preloc, nextLoc 
1690                          if overlap: 
1691                              nextloc = preparseFn( instring, loc ) 
1692                              if nextloc > loc: 
1693                                  loc = nextLoc 
1694                              else: 
1695                                  loc += 1 
1696                          else: 
1697                              loc = nextLoc 
1698                      else: 
1699                          loc = preloc+1 
1700          except ParseBaseException as exc: 
1701              if ParserElement.verbose_stacktrace: 
1702                  raise 
1703              else: 
1704                   
1705                  raise exc 
 1706   
1749   
1751          """ 
1752          Another extension to C{L{scanString}}, simplifying the access to the tokens found 
1753          to match the given parse expression.  May be called with optional 
1754          C{maxMatches} argument, to clip searching after 'n' matches are found. 
1755           
1756          Example:: 
1757              # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 
1758              cap_word = Word(alphas.upper(), alphas.lower()) 
1759               
1760              print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) 
1761   
1762              # the sum() builtin can be used to merge results into a single ParseResults object 
1763              print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))) 
1764          prints:: 
1765              [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 
1766              ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 
1767          """ 
1768          try: 
1769              return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 
1770          except ParseBaseException as exc: 
1771              if ParserElement.verbose_stacktrace: 
1772                  raise 
1773              else: 
1774                   
1775                  raise exc 
 1776   
1777 -    def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False): 
 1778          """ 
1779          Generator method to split a string using the given expression as a separator. 
1780          May be called with optional C{maxsplit} argument, to limit the number of splits; 
1781          and the optional C{includeSeparators} argument (default=C{False}), if the separating 
1782          matching text should be included in the split results. 
1783           
1784          Example::         
1785              punc = oneOf(list(".,;:/-!?")) 
1786              print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 
1787          prints:: 
1788              ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 
1789          """ 
1790          splits = 0 
1791          last = 0 
1792          for t,s,e in self.scanString(instring, maxMatches=maxsplit): 
1793              yield instring[last:s] 
1794              if includeSeparators: 
1795                  yield t[0] 
1796              last = e 
1797          yield instring[last:] 
 1798   
1800          """ 
1801          Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement 
1802          converts them to L{Literal}s by default. 
1803           
1804          Example:: 
1805              greet = Word(alphas) + "," + Word(alphas) + "!" 
1806              hello = "Hello, World!" 
1807              print (hello, "->", greet.parseString(hello)) 
1808          Prints:: 
1809              Hello, World! -> ['Hello', ',', 'World', '!'] 
1810          """ 
1811          if isinstance( other, basestring ): 
1812              other = ParserElement._literalStringClass( other ) 
1813          if not isinstance( other, ParserElement ): 
1814              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1815                      SyntaxWarning, stacklevel=2) 
1816              return None 
1817          return And( [ self, other ] ) 
 1818   
1820          """ 
1821          Implementation of + operator when left operand is not a C{L{ParserElement}} 
1822          """ 
1823          if isinstance( other, basestring ): 
1824              other = ParserElement._literalStringClass( other ) 
1825          if not isinstance( other, ParserElement ): 
1826              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1827                      SyntaxWarning, stacklevel=2) 
1828              return None 
1829          return other + self 
 1830   
1832          """ 
1833          Implementation of - operator, returns C{L{And}} with error stop 
1834          """ 
1835          if isinstance( other, basestring ): 
1836              other = ParserElement._literalStringClass( other ) 
1837          if not isinstance( other, ParserElement ): 
1838              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1839                      SyntaxWarning, stacklevel=2) 
1840              return None 
1841          return self + And._ErrorStop() + other 
 1842   
1844          """ 
1845          Implementation of - operator when left operand is not a C{L{ParserElement}} 
1846          """ 
1847          if isinstance( other, basestring ): 
1848              other = ParserElement._literalStringClass( other ) 
1849          if not isinstance( other, ParserElement ): 
1850              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1851                      SyntaxWarning, stacklevel=2) 
1852              return None 
1853          return other - self 
 1854   
1856          """ 
1857          Implementation of * operator, allows use of C{expr * 3} in place of 
1858          C{expr + expr + expr}.  Expressions may also me multiplied by a 2-integer 
1859          tuple, similar to C{{min,max}} multipliers in regular expressions.  Tuples 
1860          may also include C{None} as in: 
1861           - C{expr*(n,None)} or C{expr*(n,)} is equivalent 
1862                to C{expr*n + L{ZeroOrMore}(expr)} 
1863                (read as "at least n instances of C{expr}") 
1864           - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 
1865                (read as "0 to n instances of C{expr}") 
1866           - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 
1867           - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 
1868   
1869          Note that C{expr*(None,n)} does not raise an exception if 
1870          more than n exprs exist in the input stream; that is, 
1871          C{expr*(None,n)} does not enforce a maximum number of expr 
1872          occurrences.  If this behavior is desired, then write 
1873          C{expr*(None,n) + ~expr} 
1874          """ 
1875          if isinstance(other,int): 
1876              minElements, optElements = other,0 
1877          elif isinstance(other,tuple): 
1878              other = (other + (None, None))[:2] 
1879              if other[0] is None: 
1880                  other = (0, other[1]) 
1881              if isinstance(other[0],int) and other[1] is None: 
1882                  if other[0] == 0: 
1883                      return ZeroOrMore(self) 
1884                  if other[0] == 1: 
1885                      return OneOrMore(self) 
1886                  else: 
1887                      return self*other[0] + ZeroOrMore(self) 
1888              elif isinstance(other[0],int) and isinstance(other[1],int): 
1889                  minElements, optElements = other 
1890                  optElements -= minElements 
1891              else: 
1892                  raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 
1893          else: 
1894              raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 
1895   
1896          if minElements < 0: 
1897              raise ValueError("cannot multiply ParserElement by negative value") 
1898          if optElements < 0: 
1899              raise ValueError("second tuple value must be greater or equal to first tuple value") 
1900          if minElements == optElements == 0: 
1901              raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 
1902   
1903          if (optElements): 
1904              def makeOptionalList(n): 
1905                  if n>1: 
1906                      return Optional(self + makeOptionalList(n-1)) 
1907                  else: 
1908                      return Optional(self) 
 1909              if minElements: 
1910                  if minElements == 1: 
1911                      ret = self + makeOptionalList(optElements) 
1912                  else: 
1913                      ret = And([self]*minElements) + makeOptionalList(optElements) 
1914              else: 
1915                  ret = makeOptionalList(optElements) 
1916          else: 
1917              if minElements == 1: 
1918                  ret = self 
1919              else: 
1920                  ret = And([self]*minElements) 
1921          return ret 
1922   
1925   
1927          """ 
1928          Implementation of | operator - returns C{L{MatchFirst}} 
1929          """ 
1930          if isinstance( other, basestring ): 
1931              other = ParserElement._literalStringClass( other ) 
1932          if not isinstance( other, ParserElement ): 
1933              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1934                      SyntaxWarning, stacklevel=2) 
1935              return None 
1936          return MatchFirst( [ self, other ] ) 
 1937   
1939          """ 
1940          Implementation of | operator when left operand is not a C{L{ParserElement}} 
1941          """ 
1942          if isinstance( other, basestring ): 
1943              other = ParserElement._literalStringClass( other ) 
1944          if not isinstance( other, ParserElement ): 
1945              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1946                      SyntaxWarning, stacklevel=2) 
1947              return None 
1948          return other | self 
 1949   
1951          """ 
1952          Implementation of ^ operator - returns C{L{Or}} 
1953          """ 
1954          if isinstance( other, basestring ): 
1955              other = ParserElement._literalStringClass( other ) 
1956          if not isinstance( other, ParserElement ): 
1957              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1958                      SyntaxWarning, stacklevel=2) 
1959              return None 
1960          return Or( [ self, other ] ) 
 1961   
1963          """ 
1964          Implementation of ^ operator when left operand is not a C{L{ParserElement}} 
1965          """ 
1966          if isinstance( other, basestring ): 
1967              other = ParserElement._literalStringClass( other ) 
1968          if not isinstance( other, ParserElement ): 
1969              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1970                      SyntaxWarning, stacklevel=2) 
1971              return None 
1972          return other ^ self 
 1973   
1975          """ 
1976          Implementation of & operator - returns C{L{Each}} 
1977          """ 
1978          if isinstance( other, basestring ): 
1979              other = ParserElement._literalStringClass( other ) 
1980          if not isinstance( other, ParserElement ): 
1981              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1982                      SyntaxWarning, stacklevel=2) 
1983              return None 
1984          return Each( [ self, other ] ) 
 1985   
1987          """ 
1988          Implementation of & operator when left operand is not a C{L{ParserElement}} 
1989          """ 
1990          if isinstance( other, basestring ): 
1991              other = ParserElement._literalStringClass( other ) 
1992          if not isinstance( other, ParserElement ): 
1993              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1994                      SyntaxWarning, stacklevel=2) 
1995              return None 
1996          return other & self 
 1997   
1999          """ 
2000          Implementation of ~ operator - returns C{L{NotAny}} 
2001          """ 
2002          return NotAny( self ) 
 2003   
2005          """ 
2006          Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}. 
2007           
2008          If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 
2009          passed as C{True}. 
2010              
2011          If C{name} is omitted, same as calling C{L{copy}}. 
2012   
2013          Example:: 
2014              # these are equivalent 
2015              userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 
2016              userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")              
2017          """ 
2018          if name is not None: 
2019              return self.setResultsName(name) 
2020          else: 
2021              return self.copy() 
 2022   
2024          """ 
2025          Suppresses the output of this C{ParserElement}; useful to keep punctuation from 
2026          cluttering up returned output. 
2027          """ 
2028          return Suppress( self ) 
 2029   
2031          """ 
2032          Disables the skipping of whitespace before matching the characters in the 
2033          C{ParserElement}'s defined pattern.  This is normally only used internally by 
2034          the pyparsing module, but may be needed in some whitespace-sensitive grammars. 
2035          """ 
2036          self.skipWhitespace = False 
2037          return self 
 2038   
2040          """ 
2041          Overrides the default whitespace chars 
2042          """ 
2043          self.skipWhitespace = True 
2044          self.whiteChars = chars 
2045          self.copyDefaultWhiteChars = False 
2046          return self 
 2047   
2049          """ 
2050          Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 
2051          Must be called before C{parseString} when the input grammar contains elements that 
2052          match C{<TAB>} characters. 
2053          """ 
2054          self.keepTabs = True 
2055          return self 
 2056   
2058          """ 
2059          Define expression to be ignored (e.g., comments) while doing pattern 
2060          matching; may be called repeatedly, to define multiple comment or other 
2061          ignorable patterns. 
2062           
2063          Example:: 
2064              patt = OneOrMore(Word(alphas)) 
2065              patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] 
2066               
2067              patt.ignore(cStyleComment) 
2068              patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] 
2069          """ 
2070          if isinstance(other, basestring): 
2071              other = Suppress(other) 
2072   
2073          if isinstance( other, Suppress ): 
2074              if other not in self.ignoreExprs: 
2075                  self.ignoreExprs.append(other) 
2076          else: 
2077              self.ignoreExprs.append( Suppress( other.copy() ) ) 
2078          return self 
 2079   
2080 -    def setDebugActions( self, startAction, successAction, exceptionAction ): 
 2081          """ 
2082          Enable display of debugging messages while doing pattern matching. 
2083          """ 
2084          self.debugActions = (startAction or _defaultStartDebugAction, 
2085                               successAction or _defaultSuccessDebugAction, 
2086                               exceptionAction or _defaultExceptionDebugAction) 
2087          self.debug = True 
2088          return self 
 2089   
2091          """ 
2092          Enable display of debugging messages while doing pattern matching. 
2093          Set C{flag} to True to enable, False to disable. 
2094   
2095          Example:: 
2096              wd = Word(alphas).setName("alphaword") 
2097              integer = Word(nums).setName("numword") 
2098              term = wd | integer 
2099               
2100              # turn on debugging for wd 
2101              wd.setDebug() 
2102   
2103              OneOrMore(term).parseString("abc 123 xyz 890") 
2104           
2105          prints:: 
2106              Match alphaword at loc 0(1,1) 
2107              Matched alphaword -> ['abc'] 
2108              Match alphaword at loc 3(1,4) 
2109              Exception raised:Expected alphaword (at char 4), (line:1, col:5) 
2110              Match alphaword at loc 7(1,8) 
2111              Matched alphaword -> ['xyz'] 
2112              Match alphaword at loc 11(1,12) 
2113              Exception raised:Expected alphaword (at char 12), (line:1, col:13) 
2114              Match alphaword at loc 15(1,16) 
2115              Exception raised:Expected alphaword (at char 15), (line:1, col:16) 
2116   
2117          The output shown is that produced by the default debug actions - custom debug actions can be 
2118          specified using L{setDebugActions}. Prior to attempting 
2119          to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"} 
2120          is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"} 
2121          message is shown. Also note the use of L{setName} to assign a human-readable name to the expression, 
2122          which makes debugging and exception messages easier to understand - for instance, the default 
2123          name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}. 
2124          """ 
2125          if flag: 
2126              self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 
2127          else: 
2128              self.debug = False 
2129          return self 
 2130   
2133   
2136   
2138          self.streamlined = True 
2139          self.strRepr = None 
2140          return self 
 2141   
2144   
2145 -    def validate( self, validateTrace=[] ): 
 2146          """ 
2147          Check defined expressions for valid structure, check for infinite recursive definitions. 
2148          """ 
2149          self.checkRecursion( [] ) 
 2150   
2151 -    def parseFile( self, file_or_filename, parseAll=False ): 
 2152          """ 
2153          Execute the parse expression on the given file or filename. 
2154          If a filename is specified (instead of a file object), 
2155          the entire file is opened, read, and closed before parsing. 
2156          """ 
2157          try: 
2158              file_contents = file_or_filename.read() 
2159          except AttributeError: 
2160              with open(file_or_filename, "r") as f: 
2161                  file_contents = f.read() 
2162          try: 
2163              return self.parseString(file_contents, parseAll) 
2164          except ParseBaseException as exc: 
2165              if ParserElement.verbose_stacktrace: 
2166                  raise 
2167              else: 
2168                   
2169                  raise exc 
 2170   
2172          if isinstance(other, ParserElement): 
2173              return self is other or vars(self) == vars(other) 
2174          elif isinstance(other, basestring): 
2175              return self.matches(other) 
2176          else: 
2177              return super(ParserElement,self)==other 
 2178   
2180          return not (self == other) 
 2181   
2183          return hash(id(self)) 
 2184   
2186          return self == other 
 2187   
2189          return not (self == other) 
 2190   
2191 -    def matches(self, testString, parseAll=True): 
 2192          """ 
2193          Method for quick testing of a parser against a test string. Good for simple  
2194          inline microtests of sub expressions while building up larger parser. 
2195              
2196          Parameters: 
2197           - testString - to test against this expression for a match 
2198           - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 
2199               
2200          Example:: 
2201              expr = Word(nums) 
2202              assert expr.matches("100") 
2203          """ 
2204          try: 
2205              self.parseString(_ustr(testString), parseAll=parseAll) 
2206              return True 
2207          except ParseBaseException: 
2208              return False 
 2209                   
2210 -    def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False): 
 2211          """ 
2212          Execute the parse expression on a series of test strings, showing each 
2213          test, the parsed results or where the parse failed. Quick and easy way to 
2214          run a parse expression against a list of sample strings. 
2215              
2216          Parameters: 
2217           - tests - a list of separate test strings, or a multiline string of test strings 
2218           - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests            
2219           - comment - (default=C{'#'}) - expression for indicating embedded comments in the test  
2220                string; pass None to disable comment filtering 
2221           - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline; 
2222                if False, only dump nested list 
2223           - printResults - (default=C{True}) prints test output to stdout 
2224           - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing 
2225   
2226          Returns: a (success, results) tuple, where success indicates that all tests succeeded 
2227          (or failed if C{failureTests} is True), and the results contain a list of lines of each  
2228          test's output 
2229           
2230          Example:: 
2231              number_expr = pyparsing_common.number.copy() 
2232   
2233              result = number_expr.runTests(''' 
2234                  # unsigned integer 
2235                  100 
2236                  # negative integer 
2237                  -100 
2238                  # float with scientific notation 
2239                  6.02e23 
2240                  # integer with scientific notation 
2241                  1e-12 
2242                  ''') 
2243              print("Success" if result[0] else "Failed!") 
2244   
2245              result = number_expr.runTests(''' 
2246                  # stray character 
2247                  100Z 
2248                  # missing leading digit before '.' 
2249                  -.100 
2250                  # too many '.' 
2251                  3.14.159 
2252                  ''', failureTests=True) 
2253              print("Success" if result[0] else "Failed!") 
2254          prints:: 
2255              # unsigned integer 
2256              100 
2257              [100] 
2258   
2259              # negative integer 
2260              -100 
2261              [-100] 
2262   
2263              # float with scientific notation 
2264              6.02e23 
2265              [6.02e+23] 
2266   
2267              # integer with scientific notation 
2268              1e-12 
2269              [1e-12] 
2270   
2271              Success 
2272               
2273              # stray character 
2274              100Z 
2275                 ^ 
2276              FAIL: Expected end of text (at char 3), (line:1, col:4) 
2277   
2278              # missing leading digit before '.' 
2279              -.100 
2280              ^ 
2281              FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 
2282   
2283              # too many '.' 
2284              3.14.159 
2285                  ^ 
2286              FAIL: Expected end of text (at char 4), (line:1, col:5) 
2287   
2288              Success 
2289   
2290          Each test string must be on a single line. If you want to test a string that spans multiple 
2291          lines, create a test like this:: 
2292   
2293              expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines") 
2294           
2295          (Note that this is a raw string literal, you must include the leading 'r'.) 
2296          """ 
2297          if isinstance(tests, basestring): 
2298              tests = list(map(str.strip, tests.rstrip().splitlines())) 
2299          if isinstance(comment, basestring): 
2300              comment = Literal(comment) 
2301          allResults = [] 
2302          comments = [] 
2303          success = True 
2304          for t in tests: 
2305              if comment is not None and comment.matches(t, False) or comments and not t: 
2306                  comments.append(t) 
2307                  continue 
2308              if not t: 
2309                  continue 
2310              out = ['\n'.join(comments), t] 
2311              comments = [] 
2312              try: 
2313                  t = t.replace(r'\n','\n') 
2314                  result = self.parseString(t, parseAll=parseAll) 
2315                  out.append(result.dump(full=fullDump)) 
2316                  success = success and not failureTests 
2317              except ParseBaseException as pe: 
2318                  fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 
2319                  if '\n' in t: 
2320                      out.append(line(pe.loc, t)) 
2321                      out.append(' '*(col(pe.loc,t)-1) + '^' + fatal) 
2322                  else: 
2323                      out.append(' '*pe.loc + '^' + fatal) 
2324                  out.append("FAIL: " + str(pe)) 
2325                  success = success and failureTests 
2326                  result = pe 
2327              except Exception as exc: 
2328                  out.append("FAIL-EXCEPTION: " + str(exc)) 
2329                  success = success and failureTests 
2330                  result = exc 
2331   
2332              if printResults: 
2333                  if fullDump: 
2334                      out.append('') 
2335                  print('\n'.join(out)) 
2336   
2337              allResults.append((t, result)) 
2338           
2339          return success, allResults 
 2340   
2341           
2342 -class Token(ParserElement): 
 2343      """ 
2344      Abstract C{ParserElement} subclass, for defining atomic matching patterns. 
2345      """ 
 2348   
2349   
2350 -class Empty(Token): 
 2351      """ 
2352      An empty token, will always match. 
2353      """ 
2355          super(Empty,self).__init__() 
2356          self.name = "Empty" 
2357          self.mayReturnEmpty = True 
2358          self.mayIndexError = False 
  2359   
2362      """ 
2363      A token that will never match. 
2364      """ 
2366          super(NoMatch,self).__init__() 
2367          self.name = "NoMatch" 
2368          self.mayReturnEmpty = True 
2369          self.mayIndexError = False 
2370          self.errmsg = "Unmatchable token" 
 2371   
2372 -    def parseImpl( self, instring, loc, doActions=True ): 
  2374   
2377      """ 
2378      Token to exactly match a specified string. 
2379       
2380      Example:: 
2381          Literal('blah').parseString('blah')  # -> ['blah'] 
2382          Literal('blah').parseString('blahfooblah')  # -> ['blah'] 
2383          Literal('blah').parseString('bla')  # -> Exception: Expected "blah" 
2384       
2385      For case-insensitive matching, use L{CaselessLiteral}. 
2386       
2387      For keyword matching (force word break before and after the matched string), 
2388      use L{Keyword} or L{CaselessKeyword}. 
2389      """ 
2391          super(Literal,self).__init__() 
2392          self.match = matchString 
2393          self.matchLen = len(matchString) 
2394          try: 
2395              self.firstMatchChar = matchString[0] 
2396          except IndexError: 
2397              warnings.warn("null string passed to Literal; use Empty() instead", 
2398                              SyntaxWarning, stacklevel=2) 
2399              self.__class__ = Empty 
2400          self.name = '"%s"' % _ustr(self.match) 
2401          self.errmsg = "Expected " + self.name 
2402          self.mayReturnEmpty = False 
2403          self.mayIndexError = False 
 2404   
2405       
2406       
2407       
2408       
2409 -    def parseImpl( self, instring, loc, doActions=True ): 
 2410          if (instring[loc] == self.firstMatchChar and 
2411              (self.matchLen==1 or instring.startswith(self.match,loc)) ): 
2412              return loc+self.matchLen, self.match 
2413          raise ParseException(instring, loc, self.errmsg, self) 
  2414  _L = Literal 
2415  ParserElement._literalStringClass = Literal 
2418      """ 
2419      Token to exactly match a specified string as a keyword, that is, it must be 
2420      immediately followed by a non-keyword character.  Compare with C{L{Literal}}: 
2421       - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}. 
2422       - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 
2423      Accepts two optional constructor arguments in addition to the keyword string: 
2424       - C{identChars} is a string of characters that would be valid identifier characters, 
2425            defaulting to all alphanumerics + "_" and "$" 
2426       - C{caseless} allows case-insensitive matching, default is C{False}. 
2427          
2428      Example:: 
2429          Keyword("start").parseString("start")  # -> ['start'] 
2430          Keyword("start").parseString("starting")  # -> Exception 
2431   
2432      For case-insensitive matching, use L{CaselessKeyword}. 
2433      """ 
2434      DEFAULT_KEYWORD_CHARS = alphanums+"_$" 
2435   
2436 -    def __init__( self, matchString, identChars=None, caseless=False ): 
 2437          super(Keyword,self).__init__() 
2438          if identChars is None: 
2439              identChars = Keyword.DEFAULT_KEYWORD_CHARS 
2440          self.match = matchString 
2441          self.matchLen = len(matchString) 
2442          try: 
2443              self.firstMatchChar = matchString[0] 
2444          except IndexError: 
2445              warnings.warn("null string passed to Keyword; use Empty() instead", 
2446                              SyntaxWarning, stacklevel=2) 
2447          self.name = '"%s"' % self.match 
2448          self.errmsg = "Expected " + self.name 
2449          self.mayReturnEmpty = False 
2450          self.mayIndexError = False 
2451          self.caseless = caseless 
2452          if caseless: 
2453              self.caselessmatch = matchString.upper() 
2454              identChars = identChars.upper() 
2455          self.identChars = set(identChars) 
 2456   
2457 -    def parseImpl( self, instring, loc, doActions=True ): 
 2458          if self.caseless: 
2459              if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 
2460                   (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 
2461                   (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 
2462                  return loc+self.matchLen, self.match 
2463          else: 
2464              if (instring[loc] == self.firstMatchChar and 
2465                  (self.matchLen==1 or instring.startswith(self.match,loc)) and 
2466                  (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 
2467                  (loc == 0 or instring[loc-1] not in self.identChars) ): 
2468                  return loc+self.matchLen, self.match 
2469          raise ParseException(instring, loc, self.errmsg, self) 
 2470   
2475   
2476      @staticmethod 
 2481   
2483      """ 
2484      Token to match a specified string, ignoring case of letters. 
2485      Note: the matched results will always be in the case of the given 
2486      match string, NOT the case of the input text. 
2487   
2488      Example:: 
2489          OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] 
2490           
2491      (Contrast with example for L{CaselessKeyword}.) 
2492      """ 
2494          super(CaselessLiteral,self).__init__( matchString.upper() ) 
2495           
2496          self.returnString = matchString 
2497          self.name = "'%s'" % self.returnString 
2498          self.errmsg = "Expected " + self.name 
 2499   
2500 -    def parseImpl( self, instring, loc, doActions=True ): 
 2501          if instring[ loc:loc+self.matchLen ].upper() == self.match: 
2502              return loc+self.matchLen, self.returnString 
2503          raise ParseException(instring, loc, self.errmsg, self) 
  2504   
2506      """ 
2507      Caseless version of L{Keyword}. 
2508   
2509      Example:: 
2510          OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] 
2511           
2512      (Contrast with example for L{CaselessLiteral}.) 
2513      """ 
2514 -    def __init__( self, matchString, identChars=None ): 
 2516   
2517 -    def parseImpl( self, instring, loc, doActions=True ): 
 2518          if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 
2519               (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 
2520              return loc+self.matchLen, self.match 
2521          raise ParseException(instring, loc, self.errmsg, self) 
  2522   
2524      """ 
2525      A variation on L{Literal} which matches "close" matches, that is,  
2526      strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters: 
2527       - C{match_string} - string to be matched 
2528       - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match 
2529       
2530      The results from a successful parse will contain the matched text from the input string and the following named results: 
2531       - C{mismatches} - a list of the positions within the match_string where mismatches were found 
2532       - C{original} - the original match_string used to compare against the input string 
2533       
2534      If C{mismatches} is an empty list, then the match was an exact match. 
2535       
2536      Example:: 
2537          patt = CloseMatch("ATCATCGAATGGA") 
2538          patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 
2539          patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 
2540   
2541          # exact match 
2542          patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 
2543   
2544          # close match allowing up to 2 mismatches 
2545          patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2) 
2546          patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 
2547      """ 
2548 -    def __init__(self, match_string, maxMismatches=1): 
 2549          super(CloseMatch,self).__init__() 
2550          self.name = match_string 
2551          self.match_string = match_string 
2552          self.maxMismatches = maxMismatches 
2553          self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches) 
2554          self.mayIndexError = False 
2555          self.mayReturnEmpty = False 
 2556   
2557 -    def parseImpl( self, instring, loc, doActions=True ): 
 2558          start = loc 
2559          instrlen = len(instring) 
2560          maxloc = start + len(self.match_string) 
2561   
2562          if maxloc <= instrlen: 
2563              match_string = self.match_string 
2564              match_stringloc = 0 
2565              mismatches = [] 
2566              maxMismatches = self.maxMismatches 
2567   
2568              for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)): 
2569                  src,mat = s_m 
2570                  if src != mat: 
2571                      mismatches.append(match_stringloc) 
2572                      if len(mismatches) > maxMismatches: 
2573                          break 
2574              else: 
2575                  loc = match_stringloc + 1 
2576                  results = ParseResults([instring[start:loc]]) 
2577                  results['original'] = self.match_string 
2578                  results['mismatches'] = mismatches 
2579                  return loc, results 
2580   
2581          raise ParseException(instring, loc, self.errmsg, self) 
  2582   
2583   
2584 -class Word(Token): 
 2585      """ 
2586      Token for matching words composed of allowed character sets. 
2587      Defined with string containing all allowed initial characters, 
2588      an optional string containing allowed body characters (if omitted, 
2589      defaults to the initial character set), and an optional minimum, 
2590      maximum, and/or exact length.  The default value for C{min} is 1 (a 
2591      minimum value < 1 is not valid); the default values for C{max} and C{exact} 
2592      are 0, meaning no maximum or exact length restriction. An optional 
2593      C{excludeChars} parameter can list characters that might be found in  
2594      the input C{bodyChars} string; useful to define a word of all printables 
2595      except for one or two characters, for instance. 
2596       
2597      L{srange} is useful for defining custom character set strings for defining  
2598      C{Word} expressions, using range notation from regular expression character sets. 
2599       
2600      A common mistake is to use C{Word} to match a specific literal string, as in  
2601      C{Word("Address")}. Remember that C{Word} uses the string argument to define 
2602      I{sets} of matchable characters. This expression would match "Add", "AAA", 
2603      "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'. 
2604      To match an exact literal string, use L{Literal} or L{Keyword}. 
2605   
2606      pyparsing includes helper strings for building Words: 
2607       - L{alphas} 
2608       - L{nums} 
2609       - L{alphanums} 
2610       - L{hexnums} 
2611       - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.) 
2612       - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.) 
2613       - L{printables} (any non-whitespace character) 
2614   
2615      Example:: 
2616          # a word composed of digits 
2617          integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 
2618           
2619          # a word with a leading capital, and zero or more lowercase 
2620          capital_word = Word(alphas.upper(), alphas.lower()) 
2621   
2622          # hostnames are alphanumeric, with leading alpha, and '-' 
2623          hostname = Word(alphas, alphanums+'-') 
2624           
2625          # roman numeral (not a strict parser, accepts invalid mix of characters) 
2626          roman = Word("IVXLCDM") 
2627           
2628          # any string of non-whitespace characters, except for ',' 
2629          csv_value = Word(printables, excludeChars=",") 
2630      """ 
2631 -    def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): 
 2632          super(Word,self).__init__() 
2633          if excludeChars: 
2634              initChars = ''.join(c for c in initChars if c not in excludeChars) 
2635              if bodyChars: 
2636                  bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 
2637          self.initCharsOrig = initChars 
2638          self.initChars = set(initChars) 
2639          if bodyChars : 
2640              self.bodyCharsOrig = bodyChars 
2641              self.bodyChars = set(bodyChars) 
2642          else: 
2643              self.bodyCharsOrig = initChars 
2644              self.bodyChars = set(initChars) 
2645   
2646          self.maxSpecified = max > 0 
2647   
2648          if min < 1: 
2649              raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 
2650   
2651          self.minLen = min 
2652   
2653          if max > 0: 
2654              self.maxLen = max 
2655          else: 
2656              self.maxLen = _MAX_INT 
2657   
2658          if exact > 0: 
2659              self.maxLen = exact 
2660              self.minLen = exact 
2661   
2662          self.name = _ustr(self) 
2663          self.errmsg = "Expected " + self.name 
2664          self.mayIndexError = False 
2665          self.asKeyword = asKeyword 
2666   
2667          if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 
2668              if self.bodyCharsOrig == self.initCharsOrig: 
2669                  self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 
2670              elif len(self.initCharsOrig) == 1: 
2671                  self.reString = "%s[%s]*" % \ 
2672                                        (re.escape(self.initCharsOrig), 
2673                                        _escapeRegexRangeChars(self.bodyCharsOrig),) 
2674              else: 
2675                  self.reString = "[%s][%s]*" % \ 
2676                                        (_escapeRegexRangeChars(self.initCharsOrig), 
2677                                        _escapeRegexRangeChars(self.bodyCharsOrig),) 
2678              if self.asKeyword: 
2679                  self.reString = r"\b"+self.reString+r"\b" 
2680              try: 
2681                  self.re = re.compile( self.reString ) 
2682              except Exception: 
2683                  self.re = None 
 2684   
2685 -    def parseImpl( self, instring, loc, doActions=True ): 
 2686          if self.re: 
2687              result = self.re.match(instring,loc) 
2688              if not result: 
2689                  raise ParseException(instring, loc, self.errmsg, self) 
2690   
2691              loc = result.end() 
2692              return loc, result.group() 
2693   
2694          if not(instring[ loc ] in self.initChars): 
2695              raise ParseException(instring, loc, self.errmsg, self) 
2696   
2697          start = loc 
2698          loc += 1 
2699          instrlen = len(instring) 
2700          bodychars = self.bodyChars 
2701          maxloc = start + self.maxLen 
2702          maxloc = min( maxloc, instrlen ) 
2703          while loc < maxloc and instring[loc] in bodychars: 
2704              loc += 1 
2705   
2706          throwException = False 
2707          if loc - start < self.minLen: 
2708              throwException = True 
2709          if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 
2710              throwException = True 
2711          if self.asKeyword: 
2712              if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 
2713                  throwException = True 
2714   
2715          if throwException: 
2716              raise ParseException(instring, loc, self.errmsg, self) 
2717   
2718          return loc, instring[start:loc] 
 2719   
2721          try: 
2722              return super(Word,self).__str__() 
2723          except Exception: 
2724              pass 
2725   
2726   
2727          if self.strRepr is None: 
2728   
2729              def charsAsStr(s): 
2730                  if len(s)>4: 
2731                      return s[:4]+"..." 
2732                  else: 
2733                      return s 
 2734   
2735              if ( self.initCharsOrig != self.bodyCharsOrig ): 
2736                  self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 
2737              else: 
2738                  self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 
2739   
2740          return self.strRepr 
 2741   
2742   
2743 -class Regex(Token): 
 2744      r""" 
2745      Token for matching strings that match a given regular expression. 
2746      Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 
2747      If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as  
2748      named parse results. 
2749   
2750      Example:: 
2751          realnum = Regex(r"[+-]?\d+\.\d*") 
2752          date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 
2753          # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 
2754          roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 
2755      """ 
2756      compiledREtype = type(re.compile("[A-Z]")) 
2757 -    def __init__( self, pattern, flags=0): 
 2758          """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 
2759          super(Regex,self).__init__() 
2760   
2761          if isinstance(pattern, basestring): 
2762              if not pattern: 
2763                  warnings.warn("null string passed to Regex; use Empty() instead", 
2764                          SyntaxWarning, stacklevel=2) 
2765   
2766              self.pattern = pattern 
2767              self.flags = flags 
2768   
2769              try: 
2770                  self.re = re.compile(self.pattern, self.flags) 
2771                  self.reString = self.pattern 
2772              except sre_constants.error: 
2773                  warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 
2774                      SyntaxWarning, stacklevel=2) 
2775                  raise 
2776   
2777          elif isinstance(pattern, Regex.compiledREtype): 
2778              self.re = pattern 
2779              self.pattern = \ 
2780              self.reString = str(pattern) 
2781              self.flags = flags 
2782               
2783          else: 
2784              raise ValueError("Regex may only be constructed with a string or a compiled RE object") 
2785   
2786          self.name = _ustr(self) 
2787          self.errmsg = "Expected " + self.name 
2788          self.mayIndexError = False 
2789          self.mayReturnEmpty = True 
 2790   
2791 -    def parseImpl( self, instring, loc, doActions=True ): 
 2792          result = self.re.match(instring,loc) 
2793          if not result: 
2794              raise ParseException(instring, loc, self.errmsg, self) 
2795   
2796          loc = result.end() 
2797          d = result.groupdict() 
2798          ret = ParseResults(result.group()) 
2799          if d: 
2800              for k in d: 
2801                  ret[k] = d[k] 
2802          return loc,ret 
 2803   
2805          try: 
2806              return super(Regex,self).__str__() 
2807          except Exception: 
2808              pass 
2809   
2810          if self.strRepr is None: 
2811              self.strRepr = "Re:(%s)" % repr(self.pattern) 
2812   
2813          return self.strRepr 
  2814   
2817      r""" 
2818      Token for matching strings that are delimited by quoting characters. 
2819       
2820      Defined with the following parameters: 
2821          - quoteChar - string of one or more characters defining the quote delimiting string 
2822          - escChar - character to escape quotes, typically backslash (default=C{None}) 
2823          - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None}) 
2824          - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 
2825          - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 
2826          - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 
2827          - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) 
2828   
2829      Example:: 
2830          qs = QuotedString('"') 
2831          print(qs.searchString('lsjdf "This is the quote" sldjf')) 
2832          complex_qs = QuotedString('{{', endQuoteChar='}}') 
2833          print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) 
2834          sql_qs = QuotedString('"', escQuote='""') 
2835          print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 
2836      prints:: 
2837          [['This is the quote']] 
2838          [['This is the "quote"']] 
2839          [['This is the quote with "embedded" quotes']] 
2840      """ 
2841 -    def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True): 
 2842          super(QuotedString,self).__init__() 
2843   
2844           
2845          quoteChar = quoteChar.strip() 
2846          if not quoteChar: 
2847              warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 
2848              raise SyntaxError() 
2849   
2850          if endQuoteChar is None: 
2851              endQuoteChar = quoteChar 
2852          else: 
2853              endQuoteChar = endQuoteChar.strip() 
2854              if not endQuoteChar: 
2855                  warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 
2856                  raise SyntaxError() 
2857   
2858          self.quoteChar = quoteChar 
2859          self.quoteCharLen = len(quoteChar) 
2860          self.firstQuoteChar = quoteChar[0] 
2861          self.endQuoteChar = endQuoteChar 
2862          self.endQuoteCharLen = len(endQuoteChar) 
2863          self.escChar = escChar 
2864          self.escQuote = escQuote 
2865          self.unquoteResults = unquoteResults 
2866          self.convertWhitespaceEscapes = convertWhitespaceEscapes 
2867   
2868          if multiline: 
2869              self.flags = re.MULTILINE | re.DOTALL 
2870              self.pattern = r'%s(?:[^%s%s]' % \ 
2871                  ( re.escape(self.quoteChar), 
2872                    _escapeRegexRangeChars(self.endQuoteChar[0]), 
2873                    (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 
2874          else: 
2875              self.flags = 0 
2876              self.pattern = r'%s(?:[^%s\n\r%s]' % \ 
2877                  ( re.escape(self.quoteChar), 
2878                    _escapeRegexRangeChars(self.endQuoteChar[0]), 
2879                    (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 
2880          if len(self.endQuoteChar) > 1: 
2881              self.pattern += ( 
2882                  '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 
2883                                                 _escapeRegexRangeChars(self.endQuoteChar[i])) 
2884                                      for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 
2885                  ) 
2886          if escQuote: 
2887              self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 
2888          if escChar: 
2889              self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 
2890              self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 
2891          self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 
2892   
2893          try: 
2894              self.re = re.compile(self.pattern, self.flags) 
2895              self.reString = self.pattern 
2896          except sre_constants.error: 
2897              warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 
2898                  SyntaxWarning, stacklevel=2) 
2899              raise 
2900   
2901          self.name = _ustr(self) 
2902          self.errmsg = "Expected " + self.name 
2903          self.mayIndexError = False 
2904          self.mayReturnEmpty = True 
 2905   
2906 -    def parseImpl( self, instring, loc, doActions=True ): 
 2907          result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 
2908          if not result: 
2909              raise ParseException(instring, loc, self.errmsg, self) 
2910   
2911          loc = result.end() 
2912          ret = result.group() 
2913   
2914          if self.unquoteResults: 
2915   
2916               
2917              ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 
2918   
2919              if isinstance(ret,basestring): 
2920                   
2921                  if '\\' in ret and self.convertWhitespaceEscapes: 
2922                      ws_map = { 
2923                          r'\t' : '\t', 
2924                          r'\n' : '\n', 
2925                          r'\f' : '\f', 
2926                          r'\r' : '\r', 
2927                      } 
2928                      for wslit,wschar in ws_map.items(): 
2929                          ret = ret.replace(wslit, wschar) 
2930   
2931                   
2932                  if self.escChar: 
2933                      ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) 
2934   
2935                   
2936                  if self.escQuote: 
2937                      ret = ret.replace(self.escQuote, self.endQuoteChar) 
2938   
2939          return loc, ret 
 2940   
2942          try: 
2943              return super(QuotedString,self).__str__() 
2944          except Exception: 
2945              pass 
2946   
2947          if self.strRepr is None: 
2948              self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 
2949   
2950          return self.strRepr 
  2951   
2954      """ 
2955      Token for matching words composed of characters I{not} in a given set (will 
2956      include whitespace in matched characters if not listed in the provided exclusion set - see example). 
2957      Defined with string containing all disallowed characters, and an optional 
2958      minimum, maximum, and/or exact length.  The default value for C{min} is 1 (a 
2959      minimum value < 1 is not valid); the default values for C{max} and C{exact} 
2960      are 0, meaning no maximum or exact length restriction. 
2961   
2962      Example:: 
2963          # define a comma-separated-value as anything that is not a ',' 
2964          csv_value = CharsNotIn(',') 
2965          print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")) 
2966      prints:: 
2967          ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 
2968      """ 
2969 -    def __init__( self, notChars, min=1, max=0, exact=0 ): 
 2970          super(CharsNotIn,self).__init__() 
2971          self.skipWhitespace = False 
2972          self.notChars = notChars 
2973   
2974          if min < 1: 
2975              raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 
2976   
2977          self.minLen = min 
2978   
2979          if max > 0: 
2980              self.maxLen = max 
2981          else: 
2982              self.maxLen = _MAX_INT 
2983   
2984          if exact > 0: 
2985              self.maxLen = exact 
2986              self.minLen = exact 
2987   
2988          self.name = _ustr(self) 
2989          self.errmsg = "Expected " + self.name 
2990          self.mayReturnEmpty = ( self.minLen == 0 ) 
2991          self.mayIndexError = False 
 2992   
2993 -    def parseImpl( self, instring, loc, doActions=True ): 
 2994          if instring[loc] in self.notChars: 
2995              raise ParseException(instring, loc, self.errmsg, self) 
2996   
2997          start = loc 
2998          loc += 1 
2999          notchars = self.notChars 
3000          maxlen = min( start+self.maxLen, len(instring) ) 
3001          while loc < maxlen and \ 
3002                (instring[loc] not in notchars): 
3003              loc += 1 
3004   
3005          if loc - start < self.minLen: 
3006              raise ParseException(instring, loc, self.errmsg, self) 
3007   
3008          return loc, instring[start:loc] 
 3009   
3011          try: 
3012              return super(CharsNotIn, self).__str__() 
3013          except Exception: 
3014              pass 
3015   
3016          if self.strRepr is None: 
3017              if len(self.notChars) > 4: 
3018                  self.strRepr = "!W:(%s...)" % self.notChars[:4] 
3019              else: 
3020                  self.strRepr = "!W:(%s)" % self.notChars 
3021   
3022          return self.strRepr 
  3023   
3025      """ 
3026      Special matching class for matching whitespace.  Normally, whitespace is ignored 
3027      by pyparsing grammars.  This class is included when some whitespace structures 
3028      are significant.  Define with a string containing the whitespace characters to be 
3029      matched; default is C{" \\t\\r\\n"}.  Also takes optional C{min}, C{max}, and C{exact} arguments, 
3030      as defined for the C{L{Word}} class. 
3031      """ 
3032      whiteStrs = { 
3033          " " : "<SPC>", 
3034          "\t": "<TAB>", 
3035          "\n": "<LF>", 
3036          "\r": "<CR>", 
3037          "\f": "<FF>", 
3038          } 
3039 -    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): 
 3040          super(White,self).__init__() 
3041          self.matchWhite = ws 
3042          self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 
3043           
3044          self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 
3045          self.mayReturnEmpty = True 
3046          self.errmsg = "Expected " + self.name 
3047   
3048          self.minLen = min 
3049   
3050          if max > 0: 
3051              self.maxLen = max 
3052          else: 
3053              self.maxLen = _MAX_INT 
3054   
3055          if exact > 0: 
3056              self.maxLen = exact 
3057              self.minLen = exact 
 3058   
3059 -    def parseImpl( self, instring, loc, doActions=True ): 
 3060          if not(instring[ loc ] in self.matchWhite): 
3061              raise ParseException(instring, loc, self.errmsg, self) 
3062          start = loc 
3063          loc += 1 
3064          maxloc = start + self.maxLen 
3065          maxloc = min( maxloc, len(instring) ) 
3066          while loc < maxloc and instring[loc] in self.matchWhite: 
3067              loc += 1 
3068   
3069          if loc - start < self.minLen: 
3070              raise ParseException(instring, loc, self.errmsg, self) 
3071   
3072          return loc, instring[start:loc] 
  3073   
3077          super(_PositionToken,self).__init__() 
3078          self.name=self.__class__.__name__ 
3079          self.mayReturnEmpty = True 
3080          self.mayIndexError = False 
 3083      """ 
3084      Token to advance to a specific column of input text; useful for tabular report scraping. 
3085      """ 
3089   
3091          if col(loc,instring) != self.col: 
3092              instrlen = len(instring) 
3093              if self.ignoreExprs: 
3094                  loc = self._skipIgnorables( instring, loc ) 
3095              while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 
3096                  loc += 1 
3097          return loc 
 3098   
3099 -    def parseImpl( self, instring, loc, doActions=True ): 
 3100          thiscol = col( loc, instring ) 
3101          if thiscol > self.col: 
3102              raise ParseException( instring, loc, "Text not in expected column", self ) 
3103          newloc = loc + self.col - thiscol 
3104          ret = instring[ loc: newloc ] 
3105          return newloc, ret 
  3106   
3109      """ 
3110      Matches if current position is at the beginning of a line within the parse string 
3111       
3112      Example:: 
3113       
3114          test = '''\ 
3115          AAA this line 
3116          AAA and this line 
3117            AAA but not this one 
3118          B AAA and definitely not this one 
3119          ''' 
3120   
3121          for t in (LineStart() + 'AAA' + restOfLine).searchString(test): 
3122              print(t) 
3123       
3124      Prints:: 
3125          ['AAA', ' this line'] 
3126          ['AAA', ' and this line']     
3127   
3128      """ 
3132   
3133 -    def parseImpl( self, instring, loc, doActions=True ): 
 3134          if col(loc, instring) == 1: 
3135              return loc, [] 
3136          raise ParseException(instring, loc, self.errmsg, self) 
  3137   
3139      """ 
3140      Matches if current position is at the end of a line within the parse string 
3141      """ 
3146   
3147 -    def parseImpl( self, instring, loc, doActions=True ): 
 3148          if loc<len(instring): 
3149              if instring[loc] == "\n": 
3150                  return loc+1, "\n" 
3151              else: 
3152                  raise ParseException(instring, loc, self.errmsg, self) 
3153          elif loc == len(instring): 
3154              return loc+1, [] 
3155          else: 
3156              raise ParseException(instring, loc, self.errmsg, self) 
  3157   
3159      """ 
3160      Matches if current position is at the beginning of the parse string 
3161      """ 
3165   
3166 -    def parseImpl( self, instring, loc, doActions=True ): 
 3167          if loc != 0: 
3168               
3169              if loc != self.preParse( instring, 0 ): 
3170                  raise ParseException(instring, loc, self.errmsg, self) 
3171          return loc, [] 
  3172   
3174      """ 
3175      Matches if current position is at the end of the parse string 
3176      """ 
3180   
3181 -    def parseImpl( self, instring, loc, doActions=True ): 
 3182          if loc < len(instring): 
3183              raise ParseException(instring, loc, self.errmsg, self) 
3184          elif loc == len(instring): 
3185              return loc+1, [] 
3186          elif loc > len(instring): 
3187              return loc, [] 
3188          else: 
3189              raise ParseException(instring, loc, self.errmsg, self) 
  3190   
3192      """ 
3193      Matches if the current position is at the beginning of a Word, and 
3194      is not preceded by any character in a given set of C{wordChars} 
3195      (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 
3196      use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 
3197      the string being parsed, or at the beginning of a line. 
3198      """ 
3200          super(WordStart,self).__init__() 
3201          self.wordChars = set(wordChars) 
3202          self.errmsg = "Not at the start of a word" 
 3203   
3204 -    def parseImpl(self, instring, loc, doActions=True ): 
 3205          if loc != 0: 
3206              if (instring[loc-1] in self.wordChars or 
3207                  instring[loc] not in self.wordChars): 
3208                  raise ParseException(instring, loc, self.errmsg, self) 
3209          return loc, [] 
  3210   
3212      """ 
3213      Matches if the current position is at the end of a Word, and 
3214      is not followed by any character in a given set of C{wordChars} 
3215      (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 
3216      use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 
3217      the string being parsed, or at the end of a line. 
3218      """ 
3220          super(WordEnd,self).__init__() 
3221          self.wordChars = set(wordChars) 
3222          self.skipWhitespace = False 
3223          self.errmsg = "Not at the end of a word" 
 3224   
3225 -    def parseImpl(self, instring, loc, doActions=True ): 
 3226          instrlen = len(instring) 
3227          if instrlen>0 and loc<instrlen: 
3228              if (instring[loc] in self.wordChars or 
3229                  instring[loc-1] not in self.wordChars): 
3230                  raise ParseException(instring, loc, self.errmsg, self) 
3231          return loc, [] 
  3232   
3235      """ 
3236      Abstract subclass of ParserElement, for combining and post-processing parsed tokens. 
3237      """ 
3238 -    def __init__( self, exprs, savelist = False ): 
 3239          super(ParseExpression,self).__init__(savelist) 
3240          if isinstance( exprs, _generatorType ): 
3241              exprs = list(exprs) 
3242   
3243          if isinstance( exprs, basestring ): 
3244              self.exprs = [ ParserElement._literalStringClass( exprs ) ] 
3245          elif isinstance( exprs, collections.Iterable ): 
3246              exprs = list(exprs) 
3247               
3248              if all(isinstance(expr, basestring) for expr in exprs): 
3249                  exprs = map(ParserElement._literalStringClass, exprs) 
3250              self.exprs = list(exprs) 
3251          else: 
3252              try: 
3253                  self.exprs = list( exprs ) 
3254              except TypeError: 
3255                  self.exprs = [ exprs ] 
3256          self.callPreparse = False 
 3257   
3259          return self.exprs[i] 
 3260   
3262          self.exprs.append( other ) 
3263          self.strRepr = None 
3264          return self 
 3265   
3267          """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 
3268             all contained expressions.""" 
3269          self.skipWhitespace = False 
3270          self.exprs = [ e.copy() for e in self.exprs ] 
3271          for e in self.exprs: 
3272              e.leaveWhitespace() 
3273          return self 
 3274   
3276          if isinstance( other, Suppress ): 
3277              if other not in self.ignoreExprs: 
3278                  super( ParseExpression, self).ignore( other ) 
3279                  for e in self.exprs: 
3280                      e.ignore( self.ignoreExprs[-1] ) 
3281          else: 
3282              super( ParseExpression, self).ignore( other ) 
3283              for e in self.exprs: 
3284                  e.ignore( self.ignoreExprs[-1] ) 
3285          return self 
 3286   
3288          try: 
3289              return super(ParseExpression,self).__str__() 
3290          except Exception: 
3291              pass 
3292   
3293          if self.strRepr is None: 
3294              self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 
3295          return self.strRepr 
 3296   
3298          super(ParseExpression,self).streamline() 
3299   
3300          for e in self.exprs: 
3301              e.streamline() 
3302   
3303           
3304           
3305           
3306          if ( len(self.exprs) == 2 ): 
3307              other = self.exprs[0] 
3308              if ( isinstance( other, self.__class__ ) and 
3309                    not(other.parseAction) and 
3310                    other.resultsName is None and 
3311                    not other.debug ): 
3312                  self.exprs = other.exprs[:] + [ self.exprs[1] ] 
3313                  self.strRepr = None 
3314                  self.mayReturnEmpty |= other.mayReturnEmpty 
3315                  self.mayIndexError  |= other.mayIndexError 
3316   
3317              other = self.exprs[-1] 
3318              if ( isinstance( other, self.__class__ ) and 
3319                    not(other.parseAction) and 
3320                    other.resultsName is None and 
3321                    not other.debug ): 
3322                  self.exprs = self.exprs[:-1] + other.exprs[:] 
3323                  self.strRepr = None 
3324                  self.mayReturnEmpty |= other.mayReturnEmpty 
3325                  self.mayIndexError  |= other.mayIndexError 
3326   
3327          self.errmsg = "Expected " + _ustr(self) 
3328           
3329          return self 
 3330   
3334   
3335 -    def validate( self, validateTrace=[] ): 
 3336          tmp = validateTrace[:]+[self] 
3337          for e in self.exprs: 
3338              e.validate(tmp) 
3339          self.checkRecursion( [] ) 
 3340           
 3345   
3346 -class And(ParseExpression): 
 3347      """ 
3348      Requires all given C{ParseExpression}s to be found in the given order. 
3349      Expressions may be separated by whitespace. 
3350      May be constructed using the C{'+'} operator. 
3351      May also be constructed using the C{'-'} operator, which will suppress backtracking. 
3352   
3353      Example:: 
3354          integer = Word(nums) 
3355          name_expr = OneOrMore(Word(alphas)) 
3356   
3357          expr = And([integer("id"),name_expr("name"),integer("age")]) 
3358          # more easily written as: 
3359          expr = integer("id") + name_expr("name") + integer("age") 
3360      """ 
3361   
3367   
3368 -    def __init__( self, exprs, savelist = True ): 
 3369          super(And,self).__init__(exprs, savelist) 
3370          self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 
3371          self.setWhitespaceChars( self.exprs[0].whiteChars ) 
3372          self.skipWhitespace = self.exprs[0].skipWhitespace 
3373          self.callPreparse = True 
 3374   
3375 -    def parseImpl( self, instring, loc, doActions=True ): 
 3376           
3377           
3378          loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 
3379          errorStop = False 
3380          for e in self.exprs[1:]: 
3381              if isinstance(e, And._ErrorStop): 
3382                  errorStop = True 
3383                  continue 
3384              if errorStop: 
3385                  try: 
3386                      loc, exprtokens = e._parse( instring, loc, doActions ) 
3387                  except ParseSyntaxException: 
3388                      raise 
3389                  except ParseBaseException as pe: 
3390                      pe.__traceback__ = None 
3391                      raise ParseSyntaxException._from_exception(pe) 
3392                  except IndexError: 
3393                      raise ParseSyntaxException(instring, len(instring), self.errmsg, self) 
3394              else: 
3395                  loc, exprtokens = e._parse( instring, loc, doActions ) 
3396              if exprtokens or exprtokens.haskeys(): 
3397                  resultlist += exprtokens 
3398          return loc, resultlist 
 3399   
3401          if isinstance( other, basestring ): 
3402              other = ParserElement._literalStringClass( other ) 
3403          return self.append( other )  
 3404   
3406          subRecCheckList = parseElementList[:] + [ self ] 
3407          for e in self.exprs: 
3408              e.checkRecursion( subRecCheckList ) 
3409              if not e.mayReturnEmpty: 
3410                  break 
 3411   
3413          if hasattr(self,"name"): 
3414              return self.name 
3415   
3416          if self.strRepr is None: 
3417              self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 
3418   
3419          return self.strRepr 
  3420   
3421   
3422 -class Or(ParseExpression): 
 3423      """ 
3424      Requires that at least one C{ParseExpression} is found. 
3425      If two expressions match, the expression that matches the longest string will be used. 
3426      May be constructed using the C{'^'} operator. 
3427   
3428      Example:: 
3429          # construct Or using '^' operator 
3430           
3431          number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 
3432          print(number.searchString("123 3.1416 789")) 
3433      prints:: 
3434          [['123'], ['3.1416'], ['789']] 
3435      """ 
3436 -    def __init__( self, exprs, savelist = False ): 
 3437          super(Or,self).__init__(exprs, savelist) 
3438          if self.exprs: 
3439              self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 
3440          else: 
3441              self.mayReturnEmpty = True 
 3442   
3443 -    def parseImpl( self, instring, loc, doActions=True ): 
 3444          maxExcLoc = -1 
3445          maxException = None 
3446          matches = [] 
3447          for e in self.exprs: 
3448              try: 
3449                  loc2 = e.tryParse( instring, loc ) 
3450              except ParseException as err: 
3451                  err.__traceback__ = None 
3452                  if err.loc > maxExcLoc: 
3453                      maxException = err 
3454                      maxExcLoc = err.loc 
3455              except IndexError: 
3456                  if len(instring) > maxExcLoc: 
3457                      maxException = ParseException(instring,len(instring),e.errmsg,self) 
3458                      maxExcLoc = len(instring) 
3459              else: 
3460                   
3461                  matches.append((loc2, e)) 
3462   
3463          if matches: 
3464              matches.sort(key=lambda x: -x[0]) 
3465              for _,e in matches: 
3466                  try: 
3467                      return e._parse( instring, loc, doActions ) 
3468                  except ParseException as err: 
3469                      err.__traceback__ = None 
3470                      if err.loc > maxExcLoc: 
3471                          maxException = err 
3472                          maxExcLoc = err.loc 
3473   
3474          if maxException is not None: 
3475              maxException.msg = self.errmsg 
3476              raise maxException 
3477          else: 
3478              raise ParseException(instring, loc, "no defined alternatives to match", self) 
 3479   
3480   
3482          if isinstance( other, basestring ): 
3483              other = ParserElement._literalStringClass( other ) 
3484          return self.append( other )  
 3485   
3487          if hasattr(self,"name"): 
3488              return self.name 
3489   
3490          if self.strRepr is None: 
3491              self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 
3492   
3493          return self.strRepr 
 3494   
3496          subRecCheckList = parseElementList[:] + [ self ] 
3497          for e in self.exprs: 
3498              e.checkRecursion( subRecCheckList ) 
  3499   
3502      """ 
3503      Requires that at least one C{ParseExpression} is found. 
3504      If two expressions match, the first one listed is the one that will match. 
3505      May be constructed using the C{'|'} operator. 
3506   
3507      Example:: 
3508          # construct MatchFirst using '|' operator 
3509           
3510          # watch the order of expressions to match 
3511          number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 
3512          print(number.searchString("123 3.1416 789")) #  Fail! -> [['123'], ['3'], ['1416'], ['789']] 
3513   
3514          # put more selective expression first 
3515          number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 
3516          print(number.searchString("123 3.1416 789")) #  Better -> [['123'], ['3.1416'], ['789']] 
3517      """ 
3518 -    def __init__( self, exprs, savelist = False ): 
 3519          super(MatchFirst,self).__init__(exprs, savelist) 
3520          if self.exprs: 
3521              self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 
3522          else: 
3523              self.mayReturnEmpty = True 
 3524   
3525 -    def parseImpl( self, instring, loc, doActions=True ): 
 3526          maxExcLoc = -1 
3527          maxException = None 
3528          for e in self.exprs: 
3529              try: 
3530                  ret = e._parse( instring, loc, doActions ) 
3531                  return ret 
3532              except ParseException as err: 
3533                  if err.loc > maxExcLoc: 
3534                      maxException = err 
3535                      maxExcLoc = err.loc 
3536              except IndexError: 
3537                  if len(instring) > maxExcLoc: 
3538                      maxException = ParseException(instring,len(instring),e.errmsg,self) 
3539                      maxExcLoc = len(instring) 
3540   
3541           
3542          else: 
3543              if maxException is not None: 
3544                  maxException.msg = self.errmsg 
3545                  raise maxException 
3546              else: 
3547                  raise ParseException(instring, loc, "no defined alternatives to match", self) 
 3548   
3550          if isinstance( other, basestring ): 
3551              other = ParserElement._literalStringClass( other ) 
3552          return self.append( other )  
 3553   
3555          if hasattr(self,"name"): 
3556              return self.name 
3557   
3558          if self.strRepr is None: 
3559              self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 
3560   
3561          return self.strRepr 
 3562   
3564          subRecCheckList = parseElementList[:] + [ self ] 
3565          for e in self.exprs: 
3566              e.checkRecursion( subRecCheckList ) 
  3567   
3568   
3569 -class Each(ParseExpression): 
 3570      """ 
3571      Requires all given C{ParseExpression}s to be found, but in any order. 
3572      Expressions may be separated by whitespace. 
3573      May be constructed using the C{'&'} operator. 
3574   
3575      Example:: 
3576          color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 
3577          shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 
3578          integer = Word(nums) 
3579          shape_attr = "shape:" + shape_type("shape") 
3580          posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 
3581          color_attr = "color:" + color("color") 
3582          size_attr = "size:" + integer("size") 
3583   
3584          # use Each (using operator '&') to accept attributes in any order  
3585          # (shape and posn are required, color and size are optional) 
3586          shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) 
3587   
3588          shape_spec.runTests(''' 
3589              shape: SQUARE color: BLACK posn: 100, 120 
3590              shape: CIRCLE size: 50 color: BLUE posn: 50,80 
3591              color:GREEN size:20 shape:TRIANGLE posn:20,40 
3592              ''' 
3593              ) 
3594      prints:: 
3595          shape: SQUARE color: BLACK posn: 100, 120 
3596          ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 
3597          - color: BLACK 
3598          - posn: ['100', ',', '120'] 
3599            - x: 100 
3600            - y: 120 
3601          - shape: SQUARE 
3602   
3603   
3604          shape: CIRCLE size: 50 color: BLUE posn: 50,80 
3605          ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 
3606          - color: BLUE 
3607          - posn: ['50', ',', '80'] 
3608            - x: 50 
3609            - y: 80 
3610          - shape: CIRCLE 
3611          - size: 50 
3612   
3613   
3614          color: GREEN size: 20 shape: TRIANGLE posn: 20,40 
3615          ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 
3616          - color: GREEN 
3617          - posn: ['20', ',', '40'] 
3618            - x: 20 
3619            - y: 40 
3620          - shape: TRIANGLE 
3621          - size: 20 
3622      """ 
3623 -    def __init__( self, exprs, savelist = True ): 
 3624          super(Each,self).__init__(exprs, savelist) 
3625          self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 
3626          self.skipWhitespace = True 
3627          self.initExprGroups = True 
 3628   
3629 -    def parseImpl( self, instring, loc, doActions=True ): 
 3630          if self.initExprGroups: 
3631              self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) 
3632              opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 
3633              opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] 
3634              self.optionals = opt1 + opt2 
3635              self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 
3636              self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 
3637              self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 
3638              self.required += self.multirequired 
3639              self.initExprGroups = False 
3640          tmpLoc = loc 
3641          tmpReqd = self.required[:] 
3642          tmpOpt  = self.optionals[:] 
3643          matchOrder = [] 
3644   
3645          keepMatching = True 
3646          while keepMatching: 
3647              tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 
3648              failed = [] 
3649              for e in tmpExprs: 
3650                  try: 
3651                      tmpLoc = e.tryParse( instring, tmpLoc ) 
3652                  except ParseException: 
3653                      failed.append(e) 
3654                  else: 
3655                      matchOrder.append(self.opt1map.get(id(e),e)) 
3656                      if e in tmpReqd: 
3657                          tmpReqd.remove(e) 
3658                      elif e in tmpOpt: 
3659                          tmpOpt.remove(e) 
3660              if len(failed) == len(tmpExprs): 
3661                  keepMatching = False 
3662   
3663          if tmpReqd: 
3664              missing = ", ".join(_ustr(e) for e in tmpReqd) 
3665              raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 
3666   
3667           
3668          matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 
3669   
3670          resultlist = [] 
3671          for e in matchOrder: 
3672              loc,results = e._parse(instring,loc,doActions) 
3673              resultlist.append(results) 
3674   
3675          finalResults = sum(resultlist, ParseResults([])) 
3676          return loc, finalResults 
 3677   
3679          if hasattr(self,"name"): 
3680              return self.name 
3681   
3682          if self.strRepr is None: 
3683              self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 
3684   
3685          return self.strRepr 
 3686   
3688          subRecCheckList = parseElementList[:] + [ self ] 
3689          for e in self.exprs: 
3690              e.checkRecursion( subRecCheckList ) 
  3691   
3694      """ 
3695      Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens. 
3696      """ 
3697 -    def __init__( self, expr, savelist=False ): 
 3698          super(ParseElementEnhance,self).__init__(savelist) 
3699          if isinstance( expr, basestring ): 
3700              if issubclass(ParserElement._literalStringClass, Token): 
3701                  expr = ParserElement._literalStringClass(expr) 
3702              else: 
3703                  expr = ParserElement._literalStringClass(Literal(expr)) 
3704          self.expr = expr 
3705          self.strRepr = None 
3706          if expr is not None: 
3707              self.mayIndexError = expr.mayIndexError 
3708              self.mayReturnEmpty = expr.mayReturnEmpty 
3709              self.setWhitespaceChars( expr.whiteChars ) 
3710              self.skipWhitespace = expr.skipWhitespace 
3711              self.saveAsList = expr.saveAsList 
3712              self.callPreparse = expr.callPreparse 
3713              self.ignoreExprs.extend(expr.ignoreExprs) 
 3714   
3715 -    def parseImpl( self, instring, loc, doActions=True ): 
 3716          if self.expr is not None: 
3717              return self.expr._parse( instring, loc, doActions, callPreParse=False ) 
3718          else: 
3719              raise ParseException("",loc,self.errmsg,self) 
 3720   
3722          self.skipWhitespace = False 
3723          self.expr = self.expr.copy() 
3724          if self.expr is not None: 
3725              self.expr.leaveWhitespace() 
3726          return self 
 3727   
3729          if isinstance( other, Suppress ): 
3730              if other not in self.ignoreExprs: 
3731                  super( ParseElementEnhance, self).ignore( other ) 
3732                  if self.expr is not None: 
3733                      self.expr.ignore( self.ignoreExprs[-1] ) 
3734          else: 
3735              super( ParseElementEnhance, self).ignore( other ) 
3736              if self.expr is not None: 
3737                  self.expr.ignore( self.ignoreExprs[-1] ) 
3738          return self 
 3739   
3745   
3747          if self in parseElementList: 
3748              raise RecursiveGrammarException( parseElementList+[self] ) 
3749          subRecCheckList = parseElementList[:] + [ self ] 
3750          if self.expr is not None: 
3751              self.expr.checkRecursion( subRecCheckList ) 
 3752   
3753 -    def validate( self, validateTrace=[] ): 
 3754          tmp = validateTrace[:]+[self] 
3755          if self.expr is not None: 
3756              self.expr.validate(tmp) 
3757          self.checkRecursion( [] ) 
 3758   
3760          try: 
3761              return super(ParseElementEnhance,self).__str__() 
3762          except Exception: 
3763              pass 
3764   
3765          if self.strRepr is None and self.expr is not None: 
3766              self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 
3767          return self.strRepr 
  3768   
3771      """ 
3772      Lookahead matching of the given parse expression.  C{FollowedBy} 
3773      does I{not} advance the parsing position within the input string, it only 
3774      verifies that the specified parse expression matches at the current 
3775      position.  C{FollowedBy} always returns a null token list. 
3776   
3777      Example:: 
3778          # use FollowedBy to match a label only if it is followed by a ':' 
3779          data_word = Word(alphas) 
3780          label = data_word + FollowedBy(':') 
3781          attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 
3782           
3783          OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() 
3784      prints:: 
3785          [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 
3786      """ 
3790   
3791 -    def parseImpl( self, instring, loc, doActions=True ): 
 3792          self.expr.tryParse( instring, loc ) 
3793          return loc, [] 
  3794   
3795   
3796 -class NotAny(ParseElementEnhance): 
 3797      """ 
3798      Lookahead to disallow matching with the given parse expression.  C{NotAny} 
3799      does I{not} advance the parsing position within the input string, it only 
3800      verifies that the specified parse expression does I{not} match at the current 
3801      position.  Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny} 
3802      always returns a null token list.  May be constructed using the '~' operator. 
3803   
3804      Example:: 
3805           
3806      """ 
3808          super(NotAny,self).__init__(expr) 
3809           
3810          self.skipWhitespace = False   
3811          self.mayReturnEmpty = True 
3812          self.errmsg = "Found unwanted token, "+_ustr(self.expr) 
 3813   
3814 -    def parseImpl( self, instring, loc, doActions=True ): 
 3818   
3820          if hasattr(self,"name"): 
3821              return self.name 
3822   
3823          if self.strRepr is None: 
3824              self.strRepr = "~{" + _ustr(self.expr) + "}" 
3825   
3826          return self.strRepr 
  3827   
3829 -    def __init__( self, expr, stopOn=None): 
 3830          super(_MultipleMatch, self).__init__(expr) 
3831          self.saveAsList = True 
3832          ender = stopOn 
3833          if isinstance(ender, basestring): 
3834              ender = ParserElement._literalStringClass(ender) 
3835          self.not_ender = ~ender if ender is not None else None 
 3836   
3837 -    def parseImpl( self, instring, loc, doActions=True ): 
 3838          self_expr_parse = self.expr._parse 
3839          self_skip_ignorables = self._skipIgnorables 
3840          check_ender = self.not_ender is not None 
3841          if check_ender: 
3842              try_not_ender = self.not_ender.tryParse 
3843           
3844           
3845           
3846          if check_ender: 
3847              try_not_ender(instring, loc) 
3848          loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) 
3849          try: 
3850              hasIgnoreExprs = (not not self.ignoreExprs) 
3851              while 1: 
3852                  if check_ender: 
3853                      try_not_ender(instring, loc) 
3854                  if hasIgnoreExprs: 
3855                      preloc = self_skip_ignorables( instring, loc ) 
3856                  else: 
3857                      preloc = loc 
3858                  loc, tmptokens = self_expr_parse( instring, preloc, doActions ) 
3859                  if tmptokens or tmptokens.haskeys(): 
3860                      tokens += tmptokens 
3861          except (ParseException,IndexError): 
3862              pass 
3863   
3864          return loc, tokens 
 3867      """ 
3868      Repetition of one or more of the given expression. 
3869       
3870      Parameters: 
3871       - expr - expression that must match one or more times 
3872       - stopOn - (default=C{None}) - expression for a terminating sentinel 
3873            (only required if the sentinel would ordinarily match the repetition  
3874            expression)           
3875   
3876      Example:: 
3877          data_word = Word(alphas) 
3878          label = data_word + FollowedBy(':') 
3879          attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 
3880   
3881          text = "shape: SQUARE posn: upper left color: BLACK" 
3882          OneOrMore(attr_expr).parseString(text).pprint()  # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 
3883   
3884          # use stopOn attribute for OneOrMore to avoid reading label string as part of the data 
3885          attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 
3886          OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 
3887           
3888          # could also be written as 
3889          (attr_expr * (1,)).parseString(text).pprint() 
3890      """ 
3891   
3893          if hasattr(self,"name"): 
3894              return self.name 
3895   
3896          if self.strRepr is None: 
3897              self.strRepr = "{" + _ustr(self.expr) + "}..." 
3898   
3899          return self.strRepr 
  3900   
3902      """ 
3903      Optional repetition of zero or more of the given expression. 
3904       
3905      Parameters: 
3906       - expr - expression that must match zero or more times 
3907       - stopOn - (default=C{None}) - expression for a terminating sentinel 
3908            (only required if the sentinel would ordinarily match the repetition  
3909            expression)           
3910   
3911      Example: similar to L{OneOrMore} 
3912      """ 
3913 -    def __init__( self, expr, stopOn=None): 
 3916           
3917 -    def parseImpl( self, instring, loc, doActions=True ): 
 3922   
3924          if hasattr(self,"name"): 
3925              return self.name 
3926   
3927          if self.strRepr is None: 
3928              self.strRepr = "[" + _ustr(self.expr) + "]..." 
3929   
3930          return self.strRepr 
  3931   
3938   
3939  _optionalNotMatched = _NullToken() 
3941      """ 
3942      Optional matching of the given expression. 
3943   
3944      Parameters: 
3945       - expr - expression that must match zero or more times 
3946       - default (optional) - value to be returned if the optional expression is not found. 
3947   
3948      Example:: 
3949          # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 
3950          zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4))) 
3951          zip.runTests(''' 
3952              # traditional ZIP code 
3953              12345 
3954               
3955              # ZIP+4 form 
3956              12101-0001 
3957               
3958              # invalid ZIP 
3959              98765- 
3960              ''') 
3961      prints:: 
3962          # traditional ZIP code 
3963          12345 
3964          ['12345'] 
3965   
3966          # ZIP+4 form 
3967          12101-0001 
3968          ['12101-0001'] 
3969   
3970          # invalid ZIP 
3971          98765- 
3972               ^ 
3973          FAIL: Expected end of text (at char 5), (line:1, col:6) 
3974      """ 
3976          super(Optional,self).__init__( expr, savelist=False ) 
3977          self.saveAsList = self.expr.saveAsList 
3978          self.defaultValue = default 
3979          self.mayReturnEmpty = True 
 3980   
3981 -    def parseImpl( self, instring, loc, doActions=True ): 
 3982          try: 
3983              loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 
3984          except (ParseException,IndexError): 
3985              if self.defaultValue is not _optionalNotMatched: 
3986                  if self.expr.resultsName: 
3987                      tokens = ParseResults([ self.defaultValue ]) 
3988                      tokens[self.expr.resultsName] = self.defaultValue 
3989                  else: 
3990                      tokens = [ self.defaultValue ] 
3991              else: 
3992                  tokens = [] 
3993          return loc, tokens 
 3994   
3996          if hasattr(self,"name"): 
3997              return self.name 
3998   
3999          if self.strRepr is None: 
4000              self.strRepr = "[" + _ustr(self.expr) + "]" 
4001   
4002          return self.strRepr 
  4003   
4004 -class SkipTo(ParseElementEnhance): 
 4005      """ 
4006      Token for skipping over all undefined text until the matched expression is found. 
4007   
4008      Parameters: 
4009       - expr - target expression marking the end of the data to be skipped 
4010       - include - (default=C{False}) if True, the target expression is also parsed  
4011            (the skipped text and target expression are returned as a 2-element list). 
4012       - ignore - (default=C{None}) used to define grammars (typically quoted strings and  
4013            comments) that might contain false matches to the target expression 
4014       - failOn - (default=C{None}) define expressions that are not allowed to be  
4015            included in the skipped test; if found before the target expression is found,  
4016            the SkipTo is not a match 
4017   
4018      Example:: 
4019          report = ''' 
4020              Outstanding Issues Report - 1 Jan 2000 
4021   
4022                 # | Severity | Description                               |  Days Open 
4023              -----+----------+-------------------------------------------+----------- 
4024               101 | Critical | Intermittent system crash                 |          6 
4025                94 | Cosmetic | Spelling error on Login ('log|n')         |         14 
4026                79 | Minor    | System slow when running too many reports |         47 
4027              ''' 
4028          integer = Word(nums) 
4029          SEP = Suppress('|') 
4030          # use SkipTo to simply match everything up until the next SEP 
4031          # - ignore quoted strings, so that a '|' character inside a quoted string does not match 
4032          # - parse action will call token.strip() for each matched token, i.e., the description body 
4033          string_data = SkipTo(SEP, ignore=quotedString) 
4034          string_data.setParseAction(tokenMap(str.strip)) 
4035          ticket_expr = (integer("issue_num") + SEP  
4036                        + string_data("sev") + SEP  
4037                        + string_data("desc") + SEP  
4038                        + integer("days_open")) 
4039           
4040          for tkt in ticket_expr.searchString(report): 
4041              print tkt.dump() 
4042      prints:: 
4043          ['101', 'Critical', 'Intermittent system crash', '6'] 
4044          - days_open: 6 
4045          - desc: Intermittent system crash 
4046          - issue_num: 101 
4047          - sev: Critical 
4048          ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 
4049          - days_open: 14 
4050          - desc: Spelling error on Login ('log|n') 
4051          - issue_num: 94 
4052          - sev: Cosmetic 
4053          ['79', 'Minor', 'System slow when running too many reports', '47'] 
4054          - days_open: 47 
4055          - desc: System slow when running too many reports 
4056          - issue_num: 79 
4057          - sev: Minor 
4058      """ 
4059 -    def __init__( self, other, include=False, ignore=None, failOn=None ): 
 4060          super( SkipTo, self ).__init__( other ) 
4061          self.ignoreExpr = ignore 
4062          self.mayReturnEmpty = True 
4063          self.mayIndexError = False 
4064          self.includeMatch = include 
4065          self.asList = False 
4066          if isinstance(failOn, basestring): 
4067              self.failOn = ParserElement._literalStringClass(failOn) 
4068          else: 
4069              self.failOn = failOn 
4070          self.errmsg = "No match found for "+_ustr(self.expr) 
 4071   
4072 -    def parseImpl( self, instring, loc, doActions=True ): 
 4073          startloc = loc 
4074          instrlen = len(instring) 
4075          expr = self.expr 
4076          expr_parse = self.expr._parse 
4077          self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None 
4078          self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 
4079           
4080          tmploc = loc 
4081          while tmploc <= instrlen: 
4082              if self_failOn_canParseNext is not None: 
4083                   
4084                  if self_failOn_canParseNext(instring, tmploc): 
4085                      break 
4086                       
4087              if self_ignoreExpr_tryParse is not None: 
4088                   
4089                  while 1: 
4090                      try: 
4091                          tmploc = self_ignoreExpr_tryParse(instring, tmploc) 
4092                      except ParseBaseException: 
4093                          break 
4094               
4095              try: 
4096                  expr_parse(instring, tmploc, doActions=False, callPreParse=False) 
4097              except (ParseException, IndexError): 
4098                   
4099                  tmploc += 1 
4100              else: 
4101                   
4102                  break 
4103   
4104          else: 
4105               
4106              raise ParseException(instring, loc, self.errmsg, self) 
4107   
4108           
4109          loc = tmploc 
4110          skiptext = instring[startloc:loc] 
4111          skipresult = ParseResults(skiptext) 
4112           
4113          if self.includeMatch: 
4114              loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) 
4115              skipresult += mat 
4116   
4117          return loc, skipresult 
  4118   
4119 -class Forward(ParseElementEnhance): 
 4120      """ 
4121      Forward declaration of an expression to be defined later - 
4122      used for recursive grammars, such as algebraic infix notation. 
4123      When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 
4124   
4125      Note: take care when assigning to C{Forward} not to overlook precedence of operators. 
4126      Specifically, '|' has a lower precedence than '<<', so that:: 
4127          fwdExpr << a | b | c 
4128      will actually be evaluated as:: 
4129          (fwdExpr << a) | b | c 
4130      thereby leaving b and c out as parseable alternatives.  It is recommended that you 
4131      explicitly group the values inserted into the C{Forward}:: 
4132          fwdExpr << (a | b | c) 
4133      Converting to use the '<<=' operator instead will avoid this problem. 
4134   
4135      See L{ParseResults.pprint} for an example of a recursive parser created using 
4136      C{Forward}. 
4137      """ 
4140   
4142          if isinstance( other, basestring ): 
4143              other = ParserElement._literalStringClass(other) 
4144          self.expr = other 
4145          self.strRepr = None 
4146          self.mayIndexError = self.expr.mayIndexError 
4147          self.mayReturnEmpty = self.expr.mayReturnEmpty 
4148          self.setWhitespaceChars( self.expr.whiteChars ) 
4149          self.skipWhitespace = self.expr.skipWhitespace 
4150          self.saveAsList = self.expr.saveAsList 
4151          self.ignoreExprs.extend(self.expr.ignoreExprs) 
4152          return self 
 4153           
4155          return self << other 
 4156       
4158          self.skipWhitespace = False 
4159          return self 
 4160   
4162          if not self.streamlined: 
4163              self.streamlined = True 
4164              if self.expr is not None: 
4165                  self.expr.streamline() 
4166          return self 
 4167   
4168 -    def validate( self, validateTrace=[] ): 
 4169          if self not in validateTrace: 
4170              tmp = validateTrace[:]+[self] 
4171              if self.expr is not None: 
4172                  self.expr.validate(tmp) 
4173          self.checkRecursion([]) 
 4174   
4176          if hasattr(self,"name"): 
4177              return self.name 
4178          return self.__class__.__name__ + ": ..." 
4179   
4180           
4181          self._revertClass = self.__class__ 
4182          self.__class__ = _ForwardNoRecurse 
4183          try: 
4184              if self.expr is not None: 
4185                  retString = _ustr(self.expr) 
4186              else: 
4187                  retString = "None" 
4188          finally: 
4189              self.__class__ = self._revertClass 
4190          return self.__class__.__name__ + ": " + retString 
 4191   
4193          if self.expr is not None: 
4194              return super(Forward,self).copy() 
4195          else: 
4196              ret = Forward() 
4197              ret <<= self 
4198              return ret 
  4199   
4203   
4205      """ 
4206      Abstract subclass of C{ParseExpression}, for converting parsed results. 
4207      """ 
4208 -    def __init__( self, expr, savelist=False ): 
  4211   
4213      """ 
4214      Converter to concatenate all matching tokens to a single string. 
4215      By default, the matching patterns must also be contiguous in the input string; 
4216      this can be disabled by specifying C{'adjacent=False'} in the constructor. 
4217   
4218      Example:: 
4219          real = Word(nums) + '.' + Word(nums) 
4220          print(real.parseString('3.1416')) # -> ['3', '.', '1416'] 
4221          # will also erroneously match the following 
4222          print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] 
4223   
4224          real = Combine(Word(nums) + '.' + Word(nums)) 
4225          print(real.parseString('3.1416')) # -> ['3.1416'] 
4226          # no match when there are internal spaces 
4227          print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) 
4228      """ 
4229 -    def __init__( self, expr, joinString="", adjacent=True ): 
 4230          super(Combine,self).__init__( expr ) 
4231           
4232          if adjacent: 
4233              self.leaveWhitespace() 
4234          self.adjacent = adjacent 
4235          self.skipWhitespace = True 
4236          self.joinString = joinString 
4237          self.callPreparse = True 
 4238   
4245   
4246 -    def postParse( self, instring, loc, tokenlist ): 
 4247          retToks = tokenlist.copy() 
4248          del retToks[:] 
4249          retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 
4250   
4251          if self.resultsName and retToks.haskeys(): 
4252              return [ retToks ] 
4253          else: 
4254              return retToks 
  4255   
4256 -class Group(TokenConverter): 
 4257      """ 
4258      Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions. 
4259   
4260      Example:: 
4261          ident = Word(alphas) 
4262          num = Word(nums) 
4263          term = ident | num 
4264          func = ident + Optional(delimitedList(term)) 
4265          print(func.parseString("fn a,b,100"))  # -> ['fn', 'a', 'b', '100'] 
4266   
4267          func = ident + Group(Optional(delimitedList(term))) 
4268          print(func.parseString("fn a,b,100"))  # -> ['fn', ['a', 'b', '100']] 
4269      """ 
4271          super(Group,self).__init__( expr ) 
4272          self.saveAsList = True 
 4273   
4274 -    def postParse( self, instring, loc, tokenlist ): 
 4275          return [ tokenlist ] 
  4276   
4277 -class Dict(TokenConverter): 
 4278      """ 
4279      Converter to return a repetitive expression as a list, but also as a dictionary. 
4280      Each element can also be referenced using the first token in the expression as its key. 
4281      Useful for tabular report scraping when the first column can be used as a item key. 
4282   
4283      Example:: 
4284          data_word = Word(alphas) 
4285          label = data_word + FollowedBy(':') 
4286          attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 
4287   
4288          text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 
4289          attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 
4290           
4291          # print attributes as plain groups 
4292          print(OneOrMore(attr_expr).parseString(text).dump()) 
4293           
4294          # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names 
4295          result = Dict(OneOrMore(Group(attr_expr))).parseString(text) 
4296          print(result.dump()) 
4297           
4298          # access named fields as dict entries, or output as dict 
4299          print(result['shape'])         
4300          print(result.asDict()) 
4301      prints:: 
4302          ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 
4303   
4304          [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 
4305          - color: light blue 
4306          - posn: upper left 
4307          - shape: SQUARE 
4308          - texture: burlap 
4309          SQUARE 
4310          {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 
4311      See more examples at L{ParseResults} of accessing fields by results name. 
4312      """ 
4314          super(Dict,self).__init__( expr ) 
4315          self.saveAsList = True 
 4316   
4317 -    def postParse( self, instring, loc, tokenlist ): 
 4318          for i,tok in enumerate(tokenlist): 
4319              if len(tok) == 0: 
4320                  continue 
4321              ikey = tok[0] 
4322              if isinstance(ikey,int): 
4323                  ikey = _ustr(tok[0]).strip() 
4324              if len(tok)==1: 
4325                  tokenlist[ikey] = _ParseResultsWithOffset("",i) 
4326              elif len(tok)==2 and not isinstance(tok[1],ParseResults): 
4327                  tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 
4328              else: 
4329                  dictvalue = tok.copy()  
4330                  del dictvalue[0] 
4331                  if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 
4332                      tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 
4333                  else: 
4334                      tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 
4335   
4336          if self.resultsName: 
4337              return [ tokenlist ] 
4338          else: 
4339              return tokenlist 
  4340   
4343      """ 
4344      Converter for ignoring the results of a parsed expression. 
4345   
4346      Example:: 
4347          source = "a, b, c,d" 
4348          wd = Word(alphas) 
4349          wd_list1 = wd + ZeroOrMore(',' + wd) 
4350          print(wd_list1.parseString(source)) 
4351   
4352          # often, delimiters that are useful during parsing are just in the 
4353          # way afterward - use Suppress to keep them out of the parsed output 
4354          wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) 
4355          print(wd_list2.parseString(source)) 
4356      prints:: 
4357          ['a', ',', 'b', ',', 'c', ',', 'd'] 
4358          ['a', 'b', 'c', 'd'] 
4359      (See also L{delimitedList}.) 
4360      """ 
4361 -    def postParse( self, instring, loc, tokenlist ): 
 4363   
 4366   
4369      """ 
4370      Wrapper for parse actions, to ensure they are only called once. 
4371      """ 
4373          self.callable = _trim_arity(methodCall) 
4374          self.called = False 
 4376          if not self.called: 
4377              results = self.callable(s,l,t) 
4378              self.called = True 
4379              return results 
4380          raise ParseException(s,l,"") 
  4383   
4385      """ 
4386      Decorator for debugging parse actions.  
4387       
4388      When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".} 
4389      When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised. 
4390   
4391      Example:: 
4392          wd = Word(alphas) 
4393   
4394          @traceParseAction 
4395          def remove_duplicate_chars(tokens): 
4396              return ''.join(sorted(set(''.join(tokens))) 
4397   
4398          wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) 
4399          print(wds.parseString("slkdjs sld sldd sdlf sdljf")) 
4400      prints:: 
4401          >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 
4402          <<leaving remove_duplicate_chars (ret: 'dfjkls') 
4403          ['dfjkls'] 
4404      """ 
4405      f = _trim_arity(f) 
4406      def z(*paArgs): 
4407          thisFunc = f.__name__ 
4408          s,l,t = paArgs[-3:] 
4409          if len(paArgs)>3: 
4410              thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 
4411          sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) ) 
4412          try: 
4413              ret = f(*paArgs) 
4414          except Exception as exc: 
4415              sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 
4416              raise 
4417          sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) ) 
4418          return ret 
 4419      try: 
4420          z.__name__ = f.__name__ 
4421      except AttributeError: 
4422          pass 
4423      return z 
4424   
4425   
4426   
4427   
4428 -def delimitedList( expr, delim=",", combine=False ): 
 4429      """ 
4430      Helper to define a delimited list of expressions - the delimiter defaults to ','. 
4431      By default, the list elements and delimiters can have intervening whitespace, and 
4432      comments, but this can be overridden by passing C{combine=True} in the constructor. 
4433      If C{combine} is set to C{True}, the matching tokens are returned as a single token 
4434      string, with the delimiters included; otherwise, the matching tokens are returned 
4435      as a list of tokens, with the delimiters suppressed. 
4436   
4437      Example:: 
4438          delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 
4439          delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 
4440      """ 
4441      dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 
4442      if combine: 
4443          return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 
4444      else: 
4445          return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) 
 4446   
4448      """ 
4449      Helper to define a counted list of expressions. 
4450      This helper defines a pattern of the form:: 
4451          integer expr expr expr... 
4452      where the leading integer tells how many expr expressions follow. 
4453      The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 
4454       
4455      If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value. 
4456   
4457      Example:: 
4458          countedArray(Word(alphas)).parseString('2 ab cd ef')  # -> ['ab', 'cd'] 
4459   
4460          # in this parser, the leading integer value is given in binary, 
4461          # '10' indicating that 2 values are in the array 
4462          binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2)) 
4463          countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef')  # -> ['ab', 'cd'] 
4464      """ 
4465      arrayExpr = Forward() 
4466      def countFieldParseAction(s,l,t): 
4467          n = t[0] 
4468          arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 
4469          return [] 
 4470      if intExpr is None: 
4471          intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 
4472      else: 
4473          intExpr = intExpr.copy() 
4474      intExpr.setName("arrayLen") 
4475      intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 
4476      return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') 
4477   
4479      ret = [] 
4480      for i in L: 
4481          if isinstance(i,list): 
4482              ret.extend(_flatten(i)) 
4483          else: 
4484              ret.append(i) 
4485      return ret 
 4486   
4488      """ 
4489      Helper to define an expression that is indirectly defined from 
4490      the tokens matched in a previous expression, that is, it looks 
4491      for a 'repeat' of a previous expression.  For example:: 
4492          first = Word(nums) 
4493          second = matchPreviousLiteral(first) 
4494          matchExpr = first + ":" + second 
4495      will match C{"1:1"}, but not C{"1:2"}.  Because this matches a 
4496      previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 
4497      If this is not desired, use C{matchPreviousExpr}. 
4498      Do I{not} use with packrat parsing enabled. 
4499      """ 
4500      rep = Forward() 
4501      def copyTokenToRepeater(s,l,t): 
4502          if t: 
4503              if len(t) == 1: 
4504                  rep << t[0] 
4505              else: 
4506                   
4507                  tflat = _flatten(t.asList()) 
4508                  rep << And(Literal(tt) for tt in tflat) 
4509          else: 
4510              rep << Empty() 
 4511      expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 
4512      rep.setName('(prev) ' + _ustr(expr)) 
4513      return rep 
4514   
4516      """ 
4517      Helper to define an expression that is indirectly defined from 
4518      the tokens matched in a previous expression, that is, it looks 
4519      for a 'repeat' of a previous expression.  For example:: 
4520          first = Word(nums) 
4521          second = matchPreviousExpr(first) 
4522          matchExpr = first + ":" + second 
4523      will match C{"1:1"}, but not C{"1:2"}.  Because this matches by 
4524      expressions, will I{not} match the leading C{"1:1"} in C{"1:10"}; 
4525      the expressions are evaluated first, and then compared, so 
4526      C{"1"} is compared with C{"10"}. 
4527      Do I{not} use with packrat parsing enabled. 
4528      """ 
4529      rep = Forward() 
4530      e2 = expr.copy() 
4531      rep <<= e2 
4532      def copyTokenToRepeater(s,l,t): 
4533          matchTokens = _flatten(t.asList()) 
4534          def mustMatchTheseTokens(s,l,t): 
4535              theseTokens = _flatten(t.asList()) 
4536              if  theseTokens != matchTokens: 
4537                  raise ParseException("",0,"") 
 4538          rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 
4539      expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 
4540      rep.setName('(prev) ' + _ustr(expr)) 
4541      return rep 
4542   
4544       
4545      for c in r"\^-]": 
4546          s = s.replace(c,_bslash+c) 
4547      s = s.replace("\n",r"\n") 
4548      s = s.replace("\t",r"\t") 
4549      return _ustr(s) 
 4550   
4551 -def oneOf( strs, caseless=False, useRegex=True ): 
 4552      """ 
4553      Helper to quickly define a set of alternative Literals, and makes sure to do 
4554      longest-first testing when there is a conflict, regardless of the input order, 
4555      but returns a C{L{MatchFirst}} for best performance. 
4556   
4557      Parameters: 
4558       - strs - a string of space-delimited literals, or a collection of string literals 
4559       - caseless - (default=C{False}) - treat all literals as caseless 
4560       - useRegex - (default=C{True}) - as an optimization, will generate a Regex 
4561            object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 
4562            if creating a C{Regex} raises an exception) 
4563   
4564      Example:: 
4565          comp_oper = oneOf("< = > <= >= !=") 
4566          var = Word(alphas) 
4567          number = Word(nums) 
4568          term = var | number 
4569          comparison_expr = term + comp_oper + term 
4570          print(comparison_expr.searchString("B = 12  AA=23 B<=AA AA>12")) 
4571      prints:: 
4572          [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 
4573      """ 
4574      if caseless: 
4575          isequal = ( lambda a,b: a.upper() == b.upper() ) 
4576          masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 
4577          parseElementClass = CaselessLiteral 
4578      else: 
4579          isequal = ( lambda a,b: a == b ) 
4580          masks = ( lambda a,b: b.startswith(a) ) 
4581          parseElementClass = Literal 
4582   
4583      symbols = [] 
4584      if isinstance(strs,basestring): 
4585          symbols = strs.split() 
4586      elif isinstance(strs, collections.Iterable): 
4587          symbols = list(strs) 
4588      else: 
4589          warnings.warn("Invalid argument to oneOf, expected string or iterable", 
4590                  SyntaxWarning, stacklevel=2) 
4591      if not symbols: 
4592          return NoMatch() 
4593   
4594      i = 0 
4595      while i < len(symbols)-1: 
4596          cur = symbols[i] 
4597          for j,other in enumerate(symbols[i+1:]): 
4598              if ( isequal(other, cur) ): 
4599                  del symbols[i+j+1] 
4600                  break 
4601              elif ( masks(cur, other) ): 
4602                  del symbols[i+j+1] 
4603                  symbols.insert(i,other) 
4604                  cur = other 
4605                  break 
4606          else: 
4607              i += 1 
4608   
4609      if not caseless and useRegex: 
4610           
4611          try: 
4612              if len(symbols)==len("".join(symbols)): 
4613                  return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) 
4614              else: 
4615                  return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) 
4616          except Exception: 
4617              warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 
4618                      SyntaxWarning, stacklevel=2) 
4619   
4620   
4621       
4622      return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols)) 
 4623   
4625      """ 
4626      Helper to easily and clearly define a dictionary by specifying the respective patterns 
4627      for the key and value.  Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 
4628      in the proper order.  The key pattern can include delimiting markers or punctuation, 
4629      as long as they are suppressed, thereby leaving the significant key text.  The value 
4630      pattern can include named results, so that the C{Dict} results can include named token 
4631      fields. 
4632   
4633      Example:: 
4634          text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 
4635          attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 
4636          print(OneOrMore(attr_expr).parseString(text).dump()) 
4637           
4638          attr_label = label 
4639          attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) 
4640   
4641          # similar to Dict, but simpler call format 
4642          result = dictOf(attr_label, attr_value).parseString(text) 
4643          print(result.dump()) 
4644          print(result['shape']) 
4645          print(result.shape)  # object attribute access works too 
4646          print(result.asDict()) 
4647      prints:: 
4648          [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 
4649          - color: light blue 
4650          - posn: upper left 
4651          - shape: SQUARE 
4652          - texture: burlap 
4653          SQUARE 
4654          SQUARE 
4655          {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} 
4656      """ 
4657      return Dict( ZeroOrMore( Group ( key + value ) ) ) 
 4658   
4659 -def originalTextFor(expr, asString=True): 
 4660      """ 
4661      Helper to return the original, untokenized text for a given expression.  Useful to 
4662      restore the parsed fields of an HTML start tag into the raw tag text itself, or to 
4663      revert separate tokens with intervening whitespace back to the original matching 
4664      input text. By default, returns astring containing the original parsed text.   
4665          
4666      If the optional C{asString} argument is passed as C{False}, then the return value is a  
4667      C{L{ParseResults}} containing any results names that were originally matched, and a  
4668      single token containing the original matched text from the input string.  So if  
4669      the expression passed to C{L{originalTextFor}} contains expressions with defined 
4670      results names, you must set C{asString} to C{False} if you want to preserve those 
4671      results name values. 
4672   
4673      Example:: 
4674          src = "this is test <b> bold <i>text</i> </b> normal text " 
4675          for tag in ("b","i"): 
4676              opener,closer = makeHTMLTags(tag) 
4677              patt = originalTextFor(opener + SkipTo(closer) + closer) 
4678              print(patt.searchString(src)[0]) 
4679      prints:: 
4680          ['<b> bold <i>text</i> </b>'] 
4681          ['<i>text</i>'] 
4682      """ 
4683      locMarker = Empty().setParseAction(lambda s,loc,t: loc) 
4684      endlocMarker = locMarker.copy() 
4685      endlocMarker.callPreparse = False 
4686      matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 
4687      if asString: 
4688          extractText = lambda s,l,t: s[t._original_start:t._original_end] 
4689      else: 
4690          def extractText(s,l,t): 
4691              t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]] 
 4692      matchExpr.setParseAction(extractText) 
4693      matchExpr.ignoreExprs = expr.ignoreExprs 
4694      return matchExpr 
4695   
4697      """ 
4698      Helper to undo pyparsing's default grouping of And expressions, even 
4699      if all but one are non-empty. 
4700      """ 
4701      return TokenConverter(expr).setParseAction(lambda t:t[0]) 
4702   
4704      """ 
4705      Helper to decorate a returned token with its starting and ending locations in the input string. 
4706      This helper adds the following results names: 
4707       - locn_start = location where matched expression begins 
4708       - locn_end = location where matched expression ends 
4709       - value = the actual parsed results 
4710   
4711      Be careful if the input text contains C{<TAB>} characters, you may want to call 
4712      C{L{ParserElement.parseWithTabs}} 
4713   
4714      Example:: 
4715          wd = Word(alphas) 
4716          for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): 
4717              print(match) 
4718      prints:: 
4719          [[0, 'ljsdf', 5]] 
4720          [[8, 'lksdjjf', 15]] 
4721          [[18, 'lkkjj', 23]] 
4722      """ 
4723      locator = Empty().setParseAction(lambda s,l,t: l) 
4724      return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end")) 
 4725   
4726   
4727   
4728  empty       = Empty().setName("empty") 
4729  lineStart   = LineStart().setName("lineStart") 
4730  lineEnd     = LineEnd().setName("lineEnd") 
4731  stringStart = StringStart().setName("stringStart") 
4732  stringEnd   = StringEnd().setName("stringEnd") 
4733   
4734  _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 
4735  _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 
4736  _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 
4737  _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE) 
4738  _charRange = Group(_singleChar + Suppress("-") + _singleChar) 
4739  _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 
4742      r""" 
4743      Helper to easily define string ranges for use in Word construction.  Borrows 
4744      syntax from regexp '[]' string range definitions:: 
4745          srange("[0-9]")   -> "0123456789" 
4746          srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz" 
4747          srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 
4748      The input string must be enclosed in []'s, and the returned string is the expanded 
4749      character set joined into a single string. 
4750      The values enclosed in the []'s may be: 
4751       - a single character 
4752       - an escaped character with a leading backslash (such as C{\-} or C{\]}) 
4753       - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)  
4754           (C{\0x##} is also supported for backwards compatibility)  
4755       - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) 
4756       - a range of any of the above, separated by a dash (C{'a-z'}, etc.) 
4757       - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) 
4758      """ 
4759      _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 
4760      try: 
4761          return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 
4762      except Exception: 
4763          return "" 
 4764   
4766      """ 
4767      Helper method for defining parse actions that require matching at a specific 
4768      column in the input text. 
4769      """ 
4770      def verifyCol(strg,locn,toks): 
4771          if col(locn,strg) != n: 
4772              raise ParseException(strg,locn,"matched token not at column %d" % n) 
 4773      return verifyCol 
4774   
4776      """ 
4777      Helper method for common parse actions that simply return a literal value.  Especially 
4778      useful when used with C{L{transformString<ParserElement.transformString>}()}. 
4779   
4780      Example:: 
4781          num = Word(nums).setParseAction(lambda toks: int(toks[0])) 
4782          na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) 
4783          term = na | num 
4784           
4785          OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] 
4786      """ 
4787      return lambda s,l,t: [replStr] 
 4788   
4790      """ 
4791      Helper parse action for removing quotation marks from parsed quoted strings. 
4792   
4793      Example:: 
4794          # by default, quotation marks are included in parsed results 
4795          quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] 
4796   
4797          # use removeQuotes to strip quotation marks from parsed results 
4798          quotedString.setParseAction(removeQuotes) 
4799          quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] 
4800      """ 
4801      return t[0][1:-1] 
 4802   
4804      """ 
4805      Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional  
4806      args are passed, they are forwarded to the given function as additional arguments after 
4807      the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the 
4808      parsed data to an integer using base 16. 
4809   
4810      Example (compare the last to example in L{ParserElement.transformString}:: 
4811          hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) 
4812          hex_ints.runTests(''' 
4813              00 11 22 aa FF 0a 0d 1a 
4814              ''') 
4815           
4816          upperword = Word(alphas).setParseAction(tokenMap(str.upper)) 
4817          OneOrMore(upperword).runTests(''' 
4818              my kingdom for a horse 
4819              ''') 
4820   
4821          wd = Word(alphas).setParseAction(tokenMap(str.title)) 
4822          OneOrMore(wd).setParseAction(' '.join).runTests(''' 
4823              now is the winter of our discontent made glorious summer by this sun of york 
4824              ''') 
4825      prints:: 
4826          00 11 22 aa FF 0a 0d 1a 
4827          [0, 17, 34, 170, 255, 10, 13, 26] 
4828   
4829          my kingdom for a horse 
4830          ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 
4831   
4832          now is the winter of our discontent made glorious summer by this sun of york 
4833          ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 
4834      """ 
4835      def pa(s,l,t): 
4836          return [func(tokn, *args) for tokn in t] 
 4837   
4838      try: 
4839          func_name = getattr(func, '__name__',  
4840                              getattr(func, '__class__').__name__) 
4841      except Exception: 
4842          func_name = str(func) 
4843      pa.__name__ = func_name 
4844   
4845      return pa 
4846   
4847  upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) 
4848  """(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}""" 
4849   
4850  downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) 
4851  """(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}""" 
4881   
4900   
4909   
4911      """ 
4912      Helper to create a validating parse action to be used with start tags created 
4913      with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 
4914      with a required attribute value, to avoid false matches on common tags such as 
4915      C{<TD>} or C{<DIV>}. 
4916   
4917      Call C{withAttribute} with a series of attribute names and values. Specify the list 
4918      of filter attributes names and values as: 
4919       - keyword arguments, as in C{(align="right")}, or 
4920       - as an explicit dict with C{**} operator, when an attribute name is also a Python 
4921            reserved word, as in C{**{"class":"Customer", "align":"right"}} 
4922       - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 
4923      For attribute names with a namespace prefix, you must use the second form.  Attribute 
4924      names are matched insensitive to upper/lower case. 
4925          
4926      If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 
4927   
4928      To verify that the attribute exists, but without specifying a value, pass 
4929      C{withAttribute.ANY_VALUE} as the value. 
4930   
4931      Example:: 
4932          html = ''' 
4933              <div> 
4934              Some text 
4935              <div type="grid">1 4 0 1 0</div> 
4936              <div type="graph">1,3 2,3 1,1</div> 
4937              <div>this has no type</div> 
4938              </div> 
4939                   
4940          ''' 
4941          div,div_end = makeHTMLTags("div") 
4942   
4943          # only match div tag having a type attribute with value "grid" 
4944          div_grid = div().setParseAction(withAttribute(type="grid")) 
4945          grid_expr = div_grid + SkipTo(div | div_end)("body") 
4946          for grid_header in grid_expr.searchString(html): 
4947              print(grid_header.body) 
4948           
4949          # construct a match with any div tag having a type attribute, regardless of the value 
4950          div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) 
4951          div_expr = div_any_type + SkipTo(div | div_end)("body") 
4952          for div_header in div_expr.searchString(html): 
4953              print(div_header.body) 
4954      prints:: 
4955          1 4 0 1 0 
4956   
4957          1 4 0 1 0 
4958          1,3 2,3 1,1 
4959      """ 
4960      if args: 
4961          attrs = args[:] 
4962      else: 
4963          attrs = attrDict.items() 
4964      attrs = [(k,v) for k,v in attrs] 
4965      def pa(s,l,tokens): 
4966          for attrName,attrValue in attrs: 
4967              if attrName not in tokens: 
4968                  raise ParseException(s,l,"no matching attribute " + attrName) 
4969              if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 
4970                  raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 
4971                                              (attrName, tokens[attrName], attrValue)) 
 4972      return pa 
4973  withAttribute.ANY_VALUE = object() 
4974   
4975 -def withClass(classname, namespace=''): 
 4976      """ 
4977      Simplified version of C{L{withAttribute}} when matching on a div class - made 
4978      difficult because C{class} is a reserved word in Python. 
4979   
4980      Example:: 
4981          html = ''' 
4982              <div> 
4983              Some text 
4984              <div class="grid">1 4 0 1 0</div> 
4985              <div class="graph">1,3 2,3 1,1</div> 
4986              <div>this <div> has no class</div> 
4987              </div> 
4988                   
4989          ''' 
4990          div,div_end = makeHTMLTags("div") 
4991          div_grid = div().setParseAction(withClass("grid")) 
4992           
4993          grid_expr = div_grid + SkipTo(div | div_end)("body") 
4994          for grid_header in grid_expr.searchString(html): 
4995              print(grid_header.body) 
4996           
4997          div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) 
4998          div_expr = div_any_type + SkipTo(div | div_end)("body") 
4999          for div_header in div_expr.searchString(html): 
5000              print(div_header.body) 
5001      prints:: 
5002          1 4 0 1 0 
5003   
5004          1 4 0 1 0 
5005          1,3 2,3 1,1 
5006      """ 
5007      classattr = "%s:class" % namespace if namespace else "class" 
5008      return withAttribute(**{classattr : classname})         
 5009   
5010  opAssoc = _Constants() 
5011  opAssoc.LEFT = object() 
5012  opAssoc.RIGHT = object() 
5015      """ 
5016      Helper method for constructing grammars of expressions made up of 
5017      operators working in a precedence hierarchy.  Operators may be unary or 
5018      binary, left- or right-associative.  Parse actions can also be attached 
5019      to operator expressions. The generated parser will also recognize the use  
5020      of parentheses to override operator precedences (see example below). 
5021       
5022      Note: if you define a deep operator list, you may see performance issues 
5023      when using infixNotation. See L{ParserElement.enablePackrat} for a 
5024      mechanism to potentially improve your parser performance. 
5025   
5026      Parameters: 
5027       - baseExpr - expression representing the most basic element for the nested 
5028       - opList - list of tuples, one for each operator precedence level in the 
5029        expression grammar; each tuple is of the form 
5030        (opExpr, numTerms, rightLeftAssoc, parseAction), where: 
5031         - opExpr is the pyparsing expression for the operator; 
5032            may also be a string, which will be converted to a Literal; 
5033            if numTerms is 3, opExpr is a tuple of two expressions, for the 
5034            two operators separating the 3 terms 
5035         - numTerms is the number of terms for this operator (must 
5036            be 1, 2, or 3) 
5037         - rightLeftAssoc is the indicator whether the operator is 
5038            right or left associative, using the pyparsing-defined 
5039            constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 
5040         - parseAction is the parse action to be associated with 
5041            expressions matching this operator expression (the 
5042            parse action tuple member may be omitted); if the parse action 
5043            is passed a tuple or list of functions, this is equivalent to 
5044            calling C{setParseAction(*fn)} (L{ParserElement.setParseAction}) 
5045       - lpar - expression for matching left-parentheses (default=C{Suppress('(')}) 
5046       - rpar - expression for matching right-parentheses (default=C{Suppress(')')}) 
5047   
5048      Example:: 
5049          # simple example of four-function arithmetic with ints and variable names 
5050          integer = pyparsing_common.signed_integer 
5051          varname = pyparsing_common.identifier  
5052           
5053          arith_expr = infixNotation(integer | varname, 
5054              [ 
5055              ('-', 1, opAssoc.RIGHT), 
5056              (oneOf('* /'), 2, opAssoc.LEFT), 
5057              (oneOf('+ -'), 2, opAssoc.LEFT), 
5058              ]) 
5059           
5060          arith_expr.runTests(''' 
5061              5+3*6 
5062              (5+3)*6 
5063              -2--11 
5064              ''', fullDump=False) 
5065      prints:: 
5066          5+3*6 
5067          [[5, '+', [3, '*', 6]]] 
5068   
5069          (5+3)*6 
5070          [[[5, '+', 3], '*', 6]] 
5071   
5072          -2--11 
5073          [[['-', 2], '-', ['-', 11]]] 
5074      """ 
5075      ret = Forward() 
5076      lastExpr = baseExpr | ( lpar + ret + rpar ) 
5077      for i,operDef in enumerate(opList): 
5078          opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 
5079          termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr 
5080          if arity == 3: 
5081              if opExpr is None or len(opExpr) != 2: 
5082                  raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 
5083              opExpr1, opExpr2 = opExpr 
5084          thisExpr = Forward().setName(termName) 
5085          if rightLeftAssoc == opAssoc.LEFT: 
5086              if arity == 1: 
5087                  matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 
5088              elif arity == 2: 
5089                  if opExpr is not None: 
5090                      matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 
5091                  else: 
5092                      matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 
5093              elif arity == 3: 
5094                  matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 
5095                              Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 
5096              else: 
5097                  raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 
5098          elif rightLeftAssoc == opAssoc.RIGHT: 
5099              if arity == 1: 
5100                   
5101                  if not isinstance(opExpr, Optional): 
5102                      opExpr = Optional(opExpr) 
5103                  matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 
5104              elif arity == 2: 
5105                  if opExpr is not None: 
5106                      matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 
5107                  else: 
5108                      matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 
5109              elif arity == 3: 
5110                  matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 
5111                              Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 
5112              else: 
5113                  raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 
5114          else: 
5115              raise ValueError("operator must indicate right or left associativity") 
5116          if pa: 
5117              if isinstance(pa, (tuple, list)): 
5118                  matchExpr.setParseAction(*pa) 
5119              else: 
5120                  matchExpr.setParseAction(pa) 
5121          thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) 
5122          lastExpr = thisExpr 
5123      ret <<= lastExpr 
5124      return ret 
 5125   
5126  operatorPrecedence = infixNotation 
5127  """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release.""" 
5128   
5129  dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") 
5130  sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") 
5131  quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| 
5132                         Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") 
5133  unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal") 
5136      """ 
5137      Helper method for defining nested lists enclosed in opening and closing 
5138      delimiters ("(" and ")" are the default). 
5139   
5140      Parameters: 
5141       - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression 
5142       - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression 
5143       - content - expression for items within the nested lists (default=C{None}) 
5144       - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString}) 
5145   
5146      If an expression is not provided for the content argument, the nested 
5147      expression will capture all whitespace-delimited content between delimiters 
5148      as a list of separate values. 
5149   
5150      Use the C{ignoreExpr} argument to define expressions that may contain 
5151      opening or closing characters that should not be treated as opening 
5152      or closing characters for nesting, such as quotedString or a comment 
5153      expression.  Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 
5154      The default is L{quotedString}, but if no expressions are to be ignored, 
5155      then pass C{None} for this argument. 
5156   
5157      Example:: 
5158          data_type = oneOf("void int short long char float double") 
5159          decl_data_type = Combine(data_type + Optional(Word('*'))) 
5160          ident = Word(alphas+'_', alphanums+'_') 
5161          number = pyparsing_common.number 
5162          arg = Group(decl_data_type + ident) 
5163          LPAR,RPAR = map(Suppress, "()") 
5164   
5165          code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) 
5166   
5167          c_function = (decl_data_type("type")  
5168                        + ident("name") 
5169                        + LPAR + Optional(delimitedList(arg), [])("args") + RPAR  
5170                        + code_body("body")) 
5171          c_function.ignore(cStyleComment) 
5172           
5173          source_code = ''' 
5174              int is_odd(int x) {  
5175                  return (x%2);  
5176              } 
5177                   
5178              int dec_to_hex(char hchar) {  
5179                  if (hchar >= '0' && hchar <= '9') {  
5180                      return (ord(hchar)-ord('0'));  
5181                  } else {  
5182                      return (10+ord(hchar)-ord('A')); 
5183                  }  
5184              } 
5185          ''' 
5186          for func in c_function.searchString(source_code): 
5187              print("%(name)s (%(type)s) args: %(args)s" % func) 
5188   
5189      prints:: 
5190          is_odd (int) args: [['int', 'x']] 
5191          dec_to_hex (int) args: [['char', 'hchar']] 
5192      """ 
5193      if opener == closer: 
5194          raise ValueError("opening and closing strings cannot be the same") 
5195      if content is None: 
5196          if isinstance(opener,basestring) and isinstance(closer,basestring): 
5197              if len(opener) == 1 and len(closer)==1: 
5198                  if ignoreExpr is not None: 
5199                      content = (Combine(OneOrMore(~ignoreExpr + 
5200                                      CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 
5201                                  ).setParseAction(lambda t:t[0].strip())) 
5202                  else: 
5203                      content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 
5204                                  ).setParseAction(lambda t:t[0].strip())) 
5205              else: 
5206                  if ignoreExpr is not None: 
5207                      content = (Combine(OneOrMore(~ignoreExpr +  
5208                                      ~Literal(opener) + ~Literal(closer) + 
5209                                      CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 
5210                                  ).setParseAction(lambda t:t[0].strip())) 
5211                  else: 
5212                      content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 
5213                                      CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 
5214                                  ).setParseAction(lambda t:t[0].strip())) 
5215          else: 
5216              raise ValueError("opening and closing arguments must be strings if no content expression is given") 
5217      ret = Forward() 
5218      if ignoreExpr is not None: 
5219          ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 
5220      else: 
5221          ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content )  + Suppress(closer) ) 
5222      ret.setName('nested %s%s expression' % (opener,closer)) 
5223      return ret 
 5224   
5225 -def indentedBlock(blockStatementExpr, indentStack, indent=True): 
 5226      """ 
5227      Helper method for defining space-delimited indentation blocks, such as 
5228      those used to define block statements in Python source code. 
5229   
5230      Parameters: 
5231       - blockStatementExpr - expression defining syntax of statement that 
5232              is repeated within the indented block 
5233       - indentStack - list created by caller to manage indentation stack 
5234              (multiple statementWithIndentedBlock expressions within a single grammar 
5235              should share a common indentStack) 
5236       - indent - boolean indicating whether block must be indented beyond the 
5237              the current level; set to False for block of left-most statements 
5238              (default=C{True}) 
5239   
5240      A valid block must contain at least one C{blockStatement}. 
5241   
5242      Example:: 
5243          data = ''' 
5244          def A(z): 
5245            A1 
5246            B = 100 
5247            G = A2 
5248            A2 
5249            A3 
5250          B 
5251          def BB(a,b,c): 
5252            BB1 
5253            def BBA(): 
5254              bba1 
5255              bba2 
5256              bba3 
5257          C 
5258          D 
5259          def spam(x,y): 
5260               def eggs(z): 
5261                   pass 
5262          ''' 
5263   
5264   
5265          indentStack = [1] 
5266          stmt = Forward() 
5267   
5268          identifier = Word(alphas, alphanums) 
5269          funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":") 
5270          func_body = indentedBlock(stmt, indentStack) 
5271          funcDef = Group( funcDecl + func_body ) 
5272   
5273          rvalue = Forward() 
5274          funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") 
5275          rvalue << (funcCall | identifier | Word(nums)) 
5276          assignment = Group(identifier + "=" + rvalue) 
5277          stmt << ( funcDef | assignment | identifier ) 
5278   
5279          module_body = OneOrMore(stmt) 
5280   
5281          parseTree = module_body.parseString(data) 
5282          parseTree.pprint() 
5283      prints:: 
5284          [['def', 
5285            'A', 
5286            ['(', 'z', ')'], 
5287            ':', 
5288            [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 
5289           'B', 
5290           ['def', 
5291            'BB', 
5292            ['(', 'a', 'b', 'c', ')'], 
5293            ':', 
5294            [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 
5295           'C', 
5296           'D', 
5297           ['def', 
5298            'spam', 
5299            ['(', 'x', 'y', ')'], 
5300            ':', 
5301            [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]  
5302      """ 
5303      def checkPeerIndent(s,l,t): 
5304          if l >= len(s): return 
5305          curCol = col(l,s) 
5306          if curCol != indentStack[-1]: 
5307              if curCol > indentStack[-1]: 
5308                  raise ParseFatalException(s,l,"illegal nesting") 
5309              raise ParseException(s,l,"not a peer entry") 
 5310   
5311      def checkSubIndent(s,l,t): 
5312          curCol = col(l,s) 
5313          if curCol > indentStack[-1]: 
5314              indentStack.append( curCol ) 
5315          else: 
5316              raise ParseException(s,l,"not a subentry") 
5317   
5318      def checkUnindent(s,l,t): 
5319          if l >= len(s): return 
5320          curCol = col(l,s) 
5321          if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 
5322              raise ParseException(s,l,"not an unindent") 
5323          indentStack.pop() 
5324   
5325      NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 
5326      INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') 
5327      PEER   = Empty().setParseAction(checkPeerIndent).setName('') 
5328      UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') 
5329      if indent: 
5330          smExpr = Group( Optional(NL) + 
5331               
5332              INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 
5333      else: 
5334          smExpr = Group( Optional(NL) + 
5335              (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 
5336      blockStatementExpr.ignore(_bslash + LineEnd()) 
5337      return smExpr.setName('indented block') 
5338   
5339  alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 
5340  punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 
5341   
5342  anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) 
5343  _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) 
5344  commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity") 
5346      """Helper parser action to replace common HTML entities with their special characters""" 
5347      return _htmlEntityMap.get(t.entity) 
 5348   
5349   
5350  cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") 
5351  "Comment of the form C{/* ... */}" 
5352   
5353  htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") 
5354  "Comment of the form C{<!-- ... -->}" 
5355   
5356  restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") 
5357  dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") 
5358  "Comment of the form C{// ... (to end of line)}" 
5359   
5360  cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") 
5361  "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" 
5362   
5363  javaStyleComment = cppStyleComment 
5364  "Same as C{L{cppStyleComment}}" 
5365   
5366  pythonStyleComment = Regex(r"#.*").setName("Python style comment") 
5367  "Comment of the form C{# ... (to end of line)}" 
5368   
5369  _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 
5370                                    Optional( Word(" \t") + 
5371                                              ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 
5372  commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 
5373  """(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas. 
5374     This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}.""" 
5378      """ 
5379      Here are some common low-level expressions that may be useful in jump-starting parser development: 
5380       - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>}) 
5381       - common L{programming identifiers<identifier>} 
5382       - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>}) 
5383       - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>} 
5384       - L{UUID<uuid>} 
5385       - L{comma-separated list<comma_separated_list>} 
5386      Parse actions: 
5387       - C{L{convertToInteger}} 
5388       - C{L{convertToFloat}} 
5389       - C{L{convertToDate}} 
5390       - C{L{convertToDatetime}} 
5391       - C{L{stripHTMLTags}} 
5392       - C{L{upcaseTokens}} 
5393       - C{L{downcaseTokens}} 
5394   
5395      Example:: 
5396          pyparsing_common.number.runTests(''' 
5397              # any int or real number, returned as the appropriate type 
5398              100 
5399              -100 
5400              +100 
5401              3.14159 
5402              6.02e23 
5403              1e-12 
5404              ''') 
5405   
5406          pyparsing_common.fnumber.runTests(''' 
5407              # any int or real number, returned as float 
5408              100 
5409              -100 
5410              +100 
5411              3.14159 
5412              6.02e23 
5413              1e-12 
5414              ''') 
5415   
5416          pyparsing_common.hex_integer.runTests(''' 
5417              # hex numbers 
5418              100 
5419              FF 
5420              ''') 
5421   
5422          pyparsing_common.fraction.runTests(''' 
5423              # fractions 
5424              1/2 
5425              -3/4 
5426              ''') 
5427   
5428          pyparsing_common.mixed_integer.runTests(''' 
5429              # mixed fractions 
5430              1 
5431              1/2 
5432              -3/4 
5433              1-3/4 
5434              ''') 
5435   
5436          import uuid 
5437          pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 
5438          pyparsing_common.uuid.runTests(''' 
5439              # uuid 
5440              12345678-1234-5678-1234-567812345678 
5441              ''') 
5442      prints:: 
5443          # any int or real number, returned as the appropriate type 
5444          100 
5445          [100] 
5446   
5447          -100 
5448          [-100] 
5449   
5450          +100 
5451          [100] 
5452   
5453          3.14159 
5454          [3.14159] 
5455   
5456          6.02e23 
5457          [6.02e+23] 
5458   
5459          1e-12 
5460          [1e-12] 
5461   
5462          # any int or real number, returned as float 
5463          100 
5464          [100.0] 
5465   
5466          -100 
5467          [-100.0] 
5468   
5469          +100 
5470          [100.0] 
5471   
5472          3.14159 
5473          [3.14159] 
5474   
5475          6.02e23 
5476          [6.02e+23] 
5477   
5478          1e-12 
5479          [1e-12] 
5480   
5481          # hex numbers 
5482          100 
5483          [256] 
5484   
5485          FF 
5486          [255] 
5487   
5488          # fractions 
5489          1/2 
5490          [0.5] 
5491   
5492          -3/4 
5493          [-0.75] 
5494   
5495          # mixed fractions 
5496          1 
5497          [1] 
5498   
5499          1/2 
5500          [0.5] 
5501   
5502          -3/4 
5503          [-0.75] 
5504   
5505          1-3/4 
5506          [1.75] 
5507   
5508          # uuid 
5509          12345678-1234-5678-1234-567812345678 
5510          [UUID('12345678-1234-5678-1234-567812345678')] 
5511      """ 
5512   
5513      convertToInteger = tokenMap(int) 
5514      """ 
5515      Parse action for converting parsed integers to Python int 
5516      """ 
5517   
5518      convertToFloat = tokenMap(float) 
5519      """ 
5520      Parse action for converting parsed numbers to Python float 
5521      """ 
5522   
5523      integer = Word(nums).setName("integer").setParseAction(convertToInteger) 
5524      """expression that parses an unsigned integer, returns an int""" 
5525   
5526      hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16)) 
5527      """expression that parses a hexadecimal integer, returns an int""" 
5528   
5529      signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) 
5530      """expression that parses an integer with optional leading sign, returns an int""" 
5531   
5532      fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") 
5533      """fractional expression of an integer divided by an integer, returns a float""" 
5534      fraction.addParseAction(lambda t: t[0]/t[-1]) 
5535   
5536      mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") 
5537      """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" 
5538      mixed_integer.addParseAction(sum) 
5539   
5540      real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) 
5541      """expression that parses a floating point number and returns a float""" 
5542   
5543      sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) 
5544      """expression that parses a floating point number with optional scientific notation and returns a float""" 
5545   
5546       
5547      number = (sci_real | real | signed_integer).streamline() 
5548      """any numeric expression, returns the corresponding Python type""" 
5549   
5550      fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) 
5551      """any int or real number, returned as float""" 
5552       
5553      identifier = Word(alphas+'_', alphanums+'_').setName("identifier") 
5554      """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" 
5555       
5556      ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") 
5557      "IPv4 address (C{0.0.0.0 - 255.255.255.255})" 
5558   
5559      _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") 
5560      _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address") 
5561      _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address") 
5562      _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) 
5563      _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") 
5564      ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") 
5565      "IPv6 address (long, short, or mixed form)" 
5566       
5567      mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") 
5568      "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" 
5569   
5570      @staticmethod 
5572          """ 
5573          Helper to create a parse action for converting parsed date string to Python datetime.date 
5574   
5575          Params - 
5576           - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"}) 
5577   
5578          Example:: 
5579              date_expr = pyparsing_common.iso8601_date.copy() 
5580              date_expr.setParseAction(pyparsing_common.convertToDate()) 
5581              print(date_expr.parseString("1999-12-31")) 
5582          prints:: 
5583              [datetime.date(1999, 12, 31)] 
5584          """ 
5585          def cvt_fn(s,l,t): 
5586              try: 
5587                  return datetime.strptime(t[0], fmt).date() 
5588              except ValueError as ve: 
5589                  raise ParseException(s, l, str(ve)) 
 5590          return cvt_fn 
 5591   
5592      @staticmethod 
5594          """ 
5595          Helper to create a parse action for converting parsed datetime string to Python datetime.datetime 
5596   
5597          Params - 
5598           - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"}) 
5599   
5600          Example:: 
5601              dt_expr = pyparsing_common.iso8601_datetime.copy() 
5602              dt_expr.setParseAction(pyparsing_common.convertToDatetime()) 
5603              print(dt_expr.parseString("1999-12-31T23:59:59.999")) 
5604          prints:: 
5605              [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] 
5606          """ 
5607          def cvt_fn(s,l,t): 
5608              try: 
5609                  return datetime.strptime(t[0], fmt) 
5610              except ValueError as ve: 
5611                  raise ParseException(s, l, str(ve)) 
 5612          return cvt_fn 
5613   
5614      iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date") 
5615      "ISO8601 date (C{yyyy-mm-dd})" 
5616   
5617      iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") 
5618      "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" 
5619   
5620      uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") 
5621      "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" 
5622   
5623      _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() 
5624      @staticmethod 
5638   
5639      _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')  
5640                                          + Optional( White(" \t") ) ) ).streamline().setName("commaItem") 
5641      comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list") 
5642      """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" 
5643   
5644      upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) 
5645      """Parse action to convert tokens to upper case.""" 
5646   
5647      downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) 
5648      """Parse action to convert tokens to lower case.""" 
5649   
5650   
5651  if __name__ == "__main__": 
5652   
5653      selectToken    = CaselessLiteral("select") 
5654      fromToken      = CaselessLiteral("from") 
5655   
5656      ident          = Word(alphas, alphanums + "_$") 
5657   
5658      columnName     = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 
5659      columnNameList = Group(delimitedList(columnName)).setName("columns") 
5660      columnSpec     = ('*' | columnNameList) 
5661   
5662      tableName      = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 
5663      tableNameList  = Group(delimitedList(tableName)).setName("tables") 
5664       
5665      simpleSQL      = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") 
5666   
5667       
5668      simpleSQL.runTests(""" 
5669          # '*' as column list and dotted table name 
5670          select * from SYS.XYZZY 
5671   
5672          # caseless match on "SELECT", and casts back to "select" 
5673          SELECT * from XYZZY, ABC 
5674   
5675          # list of column names, and mixed case SELECT keyword 
5676          Select AA,BB,CC from Sys.dual 
5677   
5678          # multiple tables 
5679          Select A, B, C from Sys.dual, Table2 
5680   
5681          # invalid SELECT keyword - should fail 
5682          Xelect A, B, C from Sys.dual 
5683   
5684          # incomplete command - should fail 
5685          Select 
5686   
5687          # invalid column name - should fail 
5688          Select ^^^ frox Sys.dual 
5689   
5690          """) 
5691   
5692      pyparsing_common.number.runTests(""" 
5693          100 
5694          -100 
5695          +100 
5696          3.14159 
5697          6.02e23 
5698          1e-12 
5699          """) 
5700   
5701       
5702      pyparsing_common.fnumber.runTests(""" 
5703          100 
5704          -100 
5705          +100 
5706          3.14159 
5707          6.02e23 
5708          1e-12 
5709          """) 
5710   
5711      pyparsing_common.hex_integer.runTests(""" 
5712          100 
5713          FF 
5714          """) 
5715   
5716      import uuid 
5717      pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 
5718      pyparsing_common.uuid.runTests(""" 
5719          12345678-1234-5678-1234-567812345678 
5720          """) 
5721