"""
#
# pso.parser.py - Python Service Objects Parser
#
# Author: Thanos Vassilakis thanos@0x01.com
#
# Copyright (c) thanos vassilakis 2000,2001, 2002
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public License
# as published by the Free Software Foundation; either version 2.1 of the
# License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Lesser General Public License for more details.
#
# See terms of license at gnu.org.
#
# $Id: parser.py,v 1.18 2004/07/12 04:04:43 thanos Exp $
#
SYNOPSIS
Senario One
First you make a template - this is just a normal HTML file with a few extra tags:
For example, test.tmpl:
My Email: thanos@0x01.com
My Name:
Now create the mypanel package, mypanel.py
from pso.parser import Tag
class Email(Tag):
"renders email as a uri"
def render(self, cdata):
return '%s' % ( cdata, cdata)
class Name(Tag):
"renders name as a uri"
def render(self, cdata=''):
return '%(name)s' % self.getAttrs()
Save it in the python path.
Now write the script:
from pso.parser import Parser
psoParser = Parser()
psoTree = psoParser.parseFile("test.tmpl")
print psoTree.render()
Running this script should generate:
My Email: thanos@0x01.com
My Name: Thanos Vassilakis
Senario Two
Now you might want to just extract the tags and document them...
class TagDocumentor:
def document(self, object, cdata=''):
if object:
self.documentation += '\t
%s | %s
\n' % (object.__class__, object.__class__.__doc__)
def do(self, infile):
self.documentation =''
psoParser = Parser()
psoTree = psoParser.parseFile(infile, noCache=1)
psoTree.render(self.document)
print "%s
"
TagDocumentor().do(test.tmpl)
And you will get:
mypanel.Email | - renders email as a uri
mypanel.Name | - renders name as a uri
Senario Three
You want to write a robot (this is simplistic - beware):
class a(Tag):
def getUrl(self, cdata):
return self.getAttrs()['href'], cdata
class A(a): pass
class Robot:
def process(self, object, cdata=''):
if object:
url, linkText = object.getUrl(cdata)
if url not in self.links:
self.links[url] = linkText
self.do(url)
def do(self, url):
psoParser = Parser()
psoTree = psoParser.parseFile(self.getPage(url), allTags=1)
psoTree.render(self.process)
def run(self, startUrl):
self.links = {}
self.do(startUrl)
for url, linkText in self.link.items():
print '%s' % (url, linkText)
"""
__version__="$Revision: 1.18 $"
__all__ =["Tag", "Parser", "CachedParser"]
from types import *
from time import time
from string import letters, digits, whitespace
from os.path import split, join, exists
from stat import ST_MTIME
from statcache import stat, forget
from cPickle import dump, load
from copy import copy
import traceback
import sys
from util import log
from table import CIMap
silent=1
class PSOParts:
def __init__(self, parent=None, name ='', className='', start=0, end=0, text='', attribute=None):
self.setup(parent, name, className, start, end, text, attribute)
self.children=[]
self.attribute=attribute
def setup(self, parent, name, className, start, end, text, attribute):
self.parent = parent
self.name = name
self.className= className
self.text = text
self.start = start
self.end = end
self.attribute = attribute
def psoSetup(object, parent, name, className, start, end, text, attribute):
if isinstance(object, PSO):
object.psoParts.setup(parent, name, className, start, end, text, attribute)
else:
object.psoParts = PSOParts(parent, name, className, start, end, text, attribute)
class PSO:
def __init__(self, **kw):
"""
@param kw: the kew word argumenst match the tags set attributes
"""
self.attrs= CIMap(kw)
self.init()
self.psoParts=PSOParts()
def init(self):
"""
Overide to setup the tags values.
"""
def getAttrs(self): return self.attrs
def setup(self, parent, name, start, end, text):
"""
Sets up a tag with its:
@param name: the tags name. has name="A"
@param start: character start position of tag in template.
@param end: character end position of tag in template.
@param text: the actuals tags text n the template before rendering.
"""
self.psoParts = PSOParts(parent, name, start, end, text)
def getChildren(self):
"""
@return a list of the tags nested within this one.A
"""
return self.psoParts.children
def append(self, child):
children = self.psoParts.children
if children and type(children[-1]) is StringType and type(child) is StringType:
children[-1] += child
else:
children.append(child)
def preProcess(self, renderer=None):
"""
Called before renderer has visited the nested tags. Overirde this method when
you need to do validation before the nested tags are rendered.
"""
return ''
def travers(self, renderer=None):
result = self.preProcess(renderer.im_self)
try:
for child in self.psoParts.children:
if type(child) is StringType:
retval = renderer(None, child)
else:
retval = str(child.travers( renderer))
if retval:
result +=retval
if self.psoParts.parent:
result = renderer(self, result)
except Exception, e:
try:
result= ("%s: %s\n\n" % (self.psoParts.name ,e, "\n".join(traceback.format_exception( *sys.exc_info()))))
except:
result = ("%s\n\n" % (e, "\n".join(traceback.format_exception( *sys.exc_info()))))
return result
class PSOTree(PSO):
def renderer(self, object, cdata=''):
if not object:
return cdata
return object.render(self, cdata)
def render(self, renderer = None):
if not renderer:
renderer = self.renderer
return self.travers( renderer)
class Token:pass
class CData: pass
class Comment: pass
class StartTag(Token): pass
class SingleTag(Token): pass
class EndTag(Token): posstart=2
def getName(text, start=1, oldname=''):
if oldname:
return oldname
text = text[start:]
ind = text.find(' ')
if ind > -1:
name=text[:ind]
else:
if '/' in text:
name = text[:-2]
else:
name=text[:-1]
return name
firstKeyChar = letters + digits+'_'
keyChars = letters + digits+'_.-'
def getAttrs(attrs):
ind = attrs.find(' ')
if ind > -1 and len(attrs) - ind > 2:
attrs = attrs[ind:-1]
else:
return {}
dict ={}
if not attrs:
return dict
state='start'
while attrs:
ch = attrs[0]
attrs=attrs[1:]
if state == 'start':
key=value=''
if ch in whitespace:
continue
if ch in firstKeyChar:
state='key'
key += ch
elif state =='key':
if ch in keyChars:
state='key'
key += ch
elif ch in whitespace:
state= '='
elif ch == '=':
state= 'value'
delim=''
elif state =='=':
if ch in whitespace:
continue
elif ch == '=':
state='value'
delim=''
elif ch not in whitespace:
dict[key]=''
state='start'
attrs = ch + attrs
elif state =='value':
if not delim:
if ch in '"\'':
delim = ch
elif ch not in whitespace:
delim = whitespace
value +=ch
else:
if ch in delim:
dict[key] = value
state='start'
else:
value +=ch
else:
if key:
dict[key] = value
return dict
class Tag(PSO):
def processAttrs(self, **kws):
attrs={}
for k,v in self.getAttrs().items():
attrs[k] = v
for k,v in kws.items():
attrs[k] = v
return attrs
def buildAttrList(self, attrs):
list = ""
for k,v in attrs.items():
if k =='tagname':
continue
if v is None:
list = "%s %s" % ( list, k)
elif '"' in str(v):
list = "%s %s='%s'" % ( list, k, v)
else:
list = '%s %s="%s"' % ( list, k, v)
return list
def buildAttrs(self, **kws):
return self.buildAttrList( self.processAttrs( **kws ))
def render(self, renderer, cdata=''):
if self.psoParts.attribute:
return getattr(self, self.psoParts.attribute)(renderer, cdata)
return self(renderer, cdata)
def __call__(self, renderer, cdata=''):
"""
Override to render tag. Here is where everything happens!
"""
return cdata
class PSOTag(Tag):pass
class Tokenizer:
TagToFind=0
PsoTagFound=1
TagFound =2
TagStart=3
def __init__(self, text, allTags=0):
self.text = text
self.allTags= allTags
self.textLength= len(self.text)
self.reset()
def reset(self):
self.state=self.TagToFind
self.index=0
def getToken2(self, defaultModule, reject={}, accept={}):
blockstart = self.index
rejectTag = 0
while self.index < self.textLength:
try:
if self.state is self.TagToFind:
self.index = self.text.index('<', self.index)
ch1 = self.text[self.index+1]
if ch1 == '/':
dif = 1
ch1 = self.text[self.index+dif+1]
else:
dif = 0
if ch1 == 'p'or ch1=='P':
ch2=self.text[self.index+dif+2]
if ch2 == 's'or ch2=='S':
ch3=self.text[self.index+dif+3]
if ch3 == 'o'or ch3=='o':
self.state = self.PsoTagFound
return CData, '', blockstart, self.index, self.text[blockstart:self.index]
if self.allTags and ch1 != '!':
self.state = self.TagFound
return CData, '', blockstart, self.index, self.text[blockstart:self.index]
self.index = self.text.index('>', self.index+1)+1
elif self.state in (self.PsoTagFound, self.TagFound):
tagstart = self.index
self.index = self.text.index('>', tagstart)+1
text = self.text[tagstart:self.index]
if text[-2] =='/':
tag, startpos = SingleTag, 1
elif text[1] =='/':
tag, startpos = EndTag,2
else:
tag, startpos = StartTag,1
if self.state == self.PsoTagFound:
if tag is not EndTag:
attrs = getAttrs(text)
else:
attrs= {'pso':'pso'}
else:
name = getName(text, startpos)
if name.find(':') < 0:
if defaultModule:
name = defaultModule+':'+name
else:
rejectTag = 1
attrs= {'pso':name}
if not rejectTag and not reject.has_key(attrs['pso']):
return tag, attrs, tagstart, self.index, text
self.state = self.TagToFind
rejectTag =0
except ValueError:
start= blockstart
self.state=self.TagToFind
self.index = len(self.text)
return CData, '', start, self.index, self.text[start:self.index]
return None, '', 0, 0, ''
def getToken(self, defaultModule, reject={}, accept={}):
blockstart = self.index
while self.index < self.textLength:
try:
if self.state is self.TagToFind:
self.index = self.text.index('<', self.index)
if self.text[self.index+1] !='!':
self.state = self.TagFound
return CData, '', blockstart, self.index, self.text[blockstart:self.index]
self.index = self.text.index('>', self.index+1)+1
elif self.state is self.TagFound:
tagstart = self.index
self.index = self.text.index('>', tagstart)+1
self.state = self.TagToFind
text = self.text[tagstart:self.index]
if text[-2] =='/':
tag, startpos = SingleTag, 1
elif text[1] =='/':
tag, startpos = EndTag,2
else:
tag, startpos = StartTag,1
name = getName(text, startpos)
attrs={}
if name =='pso' or name =='/pso':
attrs = {'tagname':'pso', 'pso':name}
attrs.update(getAttrs(text))
else:
attrs = {'tagname':name, 'pso':name}
attrs.update(getAttrs(text))
if not reject.has_key(name):
return tag, attrs, tagstart, self.index, text
except ValueError:
start= blockstart
self.state=self.TagToFind
self.index = len(self.text)
return CData, '', start, self.index, self.text[start:self.index]
return None, '', 0, 0, ''
def PSOimport(module, object=None, doReload=0):
try:
if not object:
mod = __import__(module)
if doReload:
reload(mod)
comps = name.split('.')
for c in comps[1:]:
mod = getattr(mod, c)
obj = mod
else:
m = __import__(module, globals(), locals(), [object,])
if doReload:
reload(m)
obj = getattr(m, object)
return obj
except:
if not silent:
import traceback
traceback.print_exc()
class Parser:
"""
pso.Parser(defaultModule) - Creates a new parser. The default module is the actual parser module unless its given.
When a parser is created with pso.Parser("mytags") tags such as or will be treated as if they were writen or .
"""
sShared={}
def __init__(self, defaultModule=""):
self.tokenTree=PSOTree()
self.sTagsAccepted={}
self.sTagsRejected={}
self.defaultModule =defaultModule
def clear(self):
self.sTagsAccepted={}
self.sTagsRejected={}
self.__class__.sShared={}
def parseFile(self, filePath, oPath='', noCache=1, reload=0, allTags=0):
if noCache:
f = open(filePath)
self.parse(f.read(), reload, allTags)
else:
ttime = stat(filePath)[ST_MTIME]
if oPath:
path, file = split(filePath)
ofilePath= join(oPath, file)
else:
idx = filePath.rindex('.')
if idx > -1:
ofilePath= filePath[:idx] + '.pso'
else:
ofilePath = filePath+'.pso'
try:
otime = stat(ofilePath)[ST_MTIME]
if otime < ttime:
raise 'do parse'
tagsAccepted, tagsRejected, self.tokenTree = load(open(ofilePath))
self.sTagsAccepted.update(tagsAccepted)
self.sTagsRejected.update(tagsRejected)
except:
if not silent:
import traceback
traceback.print_exc()
f = open(filePath)
self.parse(f.read(), reload, allTags)
if not noCache:
dump((self.sTagsAccepted, self.sTagsRejected, self.tokenTree), open(ofilePath,'wb'))
forget(ofilePath)
forget(filePath)
return self.tokenTree
def parse(self, text, reload=0, allTags=0):
self.reload= reload
self.tokenizer = Tokenizer(text, allTags)
return self.processNode(self.tokenTree)
def getPSO(self, parent, args):
tag, attrs, start, end, text = args
className = attrs['pso']
attribute=None
if self.sTagsRejected.has_key(className):
return text
renderer = None
if self.sTagsAccepted.has_key(className):
renderer, attribute= self.sTagsAccepted[className]
#return renderer
else:
renderer, attribute = self.findObject(className)
if renderer:
objectType = type(renderer)
if objectType is ClassType:
if len(attrs) == 1:
attrs.update(getAttrs(text))
whatToSave = renderer, attribute
renderer = renderer(**attrs)
psoSetup(renderer, parent, attrs['tagname'], className, start, end, text, attribute)
self.sTagsAccepted[className] = whatToSave
elif objectType is not StringType:
renderer = str(renderer)
whatToSave= renderer, None
self.sTagsAccepted[className] = whatToSave
return renderer
else:
self.sTagsRejected[className]=None
tag = None
return text
def findObject(self, tagName):
objName=''
moduleName=''
if hasattr(self, '%s' % tagName):
return getattr(self, 'tag_%s' % tagName), None
elif hasattr(self, 'tag_%s' % tagName):
return getattr(self, 'tag_%s' % tagName), None
elif globals().has_key(tagName):
return globals()[tagName], None
#elif tagName in dir(__builtins__):
# return getattr(__builtins__, tagName)
else:
indx = tagName.find(':')
if indx > -1:
moduleName = tagName[:indx]
objName = tagName[indx+1:]
elif self.defaultModule:
moduleName = self.defaultModule
objName = tagName
tagName = moduleName+':'+objName
if objName:
indx = objName.find('.')
if indx > -1:
attribute = objName[indx+1:]
objName = objName[:indx]
return PSOimport(moduleName, objName, doReload= self.reload), attribute
else:
return PSOimport(moduleName, objName, doReload= self.reload), None
return None, None
def processNode(self, currentNode):
doAppend = currentNode.append
cAppend = currentNode.getChildren().append
getToken = self.tokenizer.getToken
tagRejected = self.sTagsRejected
tagsAccepted = self.sTagsAccepted
while 1:
token, attrs, start, end, text = getToken( self.defaultModule, self.sTagsRejected, self.sTagsAccepted)
if not token:
return currentNode
if token is CData:
doAppend(text)
elif token is StartTag:
node = self.getPSO(currentNode, (StartTag, attrs, start, end, text))
if type(node) is StringType:
doAppend(node)
else:
cAppend(self.processNode(node))
elif token is SingleTag:
node = self.getPSO(currentNode, (SingleTag, attrs, start, end, text))
if type(node) is StringType:
doAppend(node)
else:
cAppend(node)
elif token is EndTag:
if hasattr(currentNode,'psoParts'):
tokenName = currentNode.psoParts.name
if currentNode.psoParts.name == attrs['tagname']:
break
doAppend(text)
else:
doAppend(text)
return currentNode
class CachedParser(Parser):
trees={}
def parseFile(self, filePath, oPath=''):
tree = self.trees.get(filePath)
if not tree:
tree = Parser.parseFile(self, filePath, oPath)
self.trees[filePath] = tree
return tree
class PSOParser(Parser):pass
if __name__ =='__main__':
file1 = 'templates/contractor_detail1.html'
print '-'*25
class TagDocumentor:
def render(self, object, cdata=''):
if object:
self.documentation += ("""%s | %s
\n""" % (object.__class__, object.__class__.__doc__))
def do(self, infile, outfile):
self.documentation =''
psoParser = Parser()
psoTree = psoParser.parseFile(infile, noCache=1)
psoTree.render(self.render)
open(outfile, 'w').write("%s
" % self.documentation)
class TagIndexer:
index = 0
def render(self, object, cdata=''):
if object:
self.index +=1
index = self.index
if cdata:
return ":<%s>%s%s>:" % (index, cdata, index)
return ":<%s />:" % index
return cdata
def do(self, infile, outfile):
self.index =0
psoParser = Parser()
psoTree = psoParser.parseFile(infile, )
open(outfile, 'w').write(psoTree.render(self.render))
import time
class TagTimer:
def timer(self, object, cdata=''):
if object:
t = time.time()
r = self.render(object, cdata)
self.tagTimes[object.__class__.__name__] = time.time() - t
return r
return self.render(object, cdata)
def do(self, infile):
self.tagTimes={}
psoParser = Parser()
psoTree = psoParser.parseFile(infile)
print psoTree.render(self.timer)
for k,v in self.tagTimes.items():
print k, v
class IndexerTimer(TagTimer, TagIndexer): pass
try:
import sys
infile = file1 #sys.argv[1]
outfile = 'r3' #sys.argv[2]
TagIndexer().do(infile, 'r2')
TagDocumentor().do(infile, 'r3')
IndexerTimer().do(infile)
except Exception,e:
print e
print """
usage parser.py template_file output_file
"""