Skip to content

Commit

Permalink
[IMP] convert qweb to use lxml instead of minidom
Browse files Browse the repository at this point in the history
should be >20x faster at parsing document strings and gain close to 40s on crawling
  • Loading branch information
xmo-odoo committed Jul 2, 2014
1 parent 160ddc0 commit b18d83c
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 103 deletions.
14 changes: 7 additions & 7 deletions addons/website/models/ir_qweb.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,17 @@ class QWeb(orm.AbstractModel):
def add_template(self, qcontext, name, node):
# preprocessing for multilang static urls
if request.website:
for tag, attr in self.URL_ATTRS.items():
for e in node.getElementsByTagName(tag):
url = e.getAttribute(attr)
for tag, attr in self.URL_ATTRS.iteritems():
for e in node.iterdescendants(tag=tag):
url = e.get(attr)
if url:
e.setAttribute(attr, qcontext.get('url_for')(url))
e.set(attr, qcontext.get('url_for')(url))
super(QWeb, self).add_template(qcontext, name, node)

def render_att_att(self, element, attribute_name, attribute_value, qwebcontext):
att, val = super(QWeb, self).render_att_att(element, attribute_name, attribute_value, qwebcontext)

if request.website and att == self.URL_ATTRS.get(element.nodeName) and isinstance(val, basestring):
if request.website and att == self.URL_ATTRS.get(element.tag) and isinstance(val, basestring):
val = qwebcontext.get('url_for')(val)
return att, val

Expand All @@ -78,7 +78,7 @@ def attributes(self, cr, uid, field_name, record, options,
attrs = [('data-oe-translate', 1 if column.translate else 0)]

placeholder = options.get('placeholder') \
or source_element.getAttribute('placeholder') \
or source_element.get('placeholder') \
or getattr(column, 'placeholder', None)
if placeholder:
attrs.append(('placeholder', placeholder))
Expand Down Expand Up @@ -275,7 +275,7 @@ class Image(orm.AbstractModel):

def to_html(self, cr, uid, field_name, record, options,
source_element, t_att, g_att, qweb_context, context=None):
assert source_element.nodeName != 'img',\
assert source_element.tag != 'img',\
"Oddly enough, the root tag of an image field can not be img. " \
"That is because the image goes into the tag, or it gets the " \
"hose again."
Expand Down
14 changes: 5 additions & 9 deletions addons/website/tests/test_converter.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
# -*- coding: utf-8 -*-
import textwrap
import unittest2
from xml.dom.minidom import getDOMImplementation

from lxml import html
from lxml import etree, html
from lxml.builder import E

from openerp.tests import common
from openerp.addons.base.ir import ir_qweb
from openerp.addons.website.models.ir_qweb import html_to_text
from openerp.addons.website.models.website import slugify, unslug

impl = getDOMImplementation()
document = impl.createDocument(None, None, None)

class TestUnslug(unittest2.TestCase):
def test_unslug(self):
tests = {
Expand Down Expand Up @@ -150,9 +146,9 @@ def field_rountrip_result(self, field, value, expected):
})
[record] = Model.browse(self.cr, self.uid, [id])

e = document.createElement('span')
e = etree.Element('span')
field_value = 'record.%s' % field
e.setAttribute('t-field', field_value)
e.set('t-field', field_value)

rendered = self.registry('website.qweb').render_tag_field(
e, {'field': field_value}, '', ir_qweb.QWebContext(self.cr, self.uid, {
Expand Down Expand Up @@ -231,9 +227,9 @@ def test_m2o(self):
id = Model.create(self.cr, self.uid, {field: sub_id})
[record] = Model.browse(self.cr, self.uid, [id])

e = document.createElement('span')
e = etree.Element('span')
field_value = 'record.%s' % field
e.setAttribute('t-field', field_value)
e.set('t-field', field_value)

rendered = self.registry('website.qweb').render_tag_field(
e, {'field': field_value}, '', ir_qweb.QWebContext(self.cr, self.uid, {
Expand Down
136 changes: 66 additions & 70 deletions openerp/addons/base/ir/ir_qweb.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,21 @@
import datetime
import hashlib
import json
import itertools
import logging
import math
import os
import re
import sys
import textwrap
import uuid
import xml # FIXME use lxml and etree
import itertools
import lxml.html
import werkzeug
from subprocess import Popen, PIPE
from urlparse import urlparse

import babel
import babel.dates
import werkzeug
from lxml import etree, html
from PIL import Image

import openerp.http
Expand Down Expand Up @@ -116,7 +115,6 @@ class QWeb(orm.AbstractModel):

_name = 'ir.qweb'

node = xml.dom.Node
_void_elements = frozenset([
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen',
'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr'])
Expand Down Expand Up @@ -164,18 +162,17 @@ def load_document(self, document, res_id, qwebcontext):
if hasattr(document, 'documentElement'):
dom = document
elif document.startswith("<?xml"):
dom = xml.dom.minidom.parseString(document)
dom = etree.fromstring(document)
else:
dom = xml.dom.minidom.parse(document)
dom = etree.parse(document)

for node in dom.documentElement.childNodes:
if node.nodeType == self.node.ELEMENT_NODE:
if node.getAttribute('t-name'):
name = str(node.getAttribute("t-name"))
self.add_template(qwebcontext, name, node)
if res_id and node.tagName == "t":
self.add_template(qwebcontext, res_id, node)
res_id = None
for node in dom:
if node.get('t-name'):
name = str(node.get("t-name"))
self.add_template(qwebcontext, name, node)
if res_id and node.tag == "t":
self.add_template(qwebcontext, res_id, node)
res_id = None

def get_template(self, name, qwebcontext):
origin_template = qwebcontext.get('__caller__') or qwebcontext['__stack__'][0]
Expand Down Expand Up @@ -247,52 +244,51 @@ def render(self, cr, uid, id_or_xml_id, qwebcontext=None, loader=None, context=N

def render_node(self, element, qwebcontext):
result = ""
if element.nodeType == self.node.TEXT_NODE or element.nodeType == self.node.CDATA_SECTION_NODE:
result = element.data.encode("utf8")
elif element.nodeType == self.node.ELEMENT_NODE:
generated_attributes = ""
t_render = None
template_attributes = {}
for (attribute_name, attribute_value) in element.attributes.items():
attribute_name = str(attribute_name)
if attribute_name == "groups":
cr = qwebcontext.get('request') and qwebcontext['request'].cr or None
uid = qwebcontext.get('request') and qwebcontext['request'].uid or None
can_see = self.user_has_groups(cr, uid, groups=attribute_value) if cr and uid else False
if not can_see:
if qwebcontext.get('editable') and not qwebcontext.get('editable_no_editor'):
errmsg = _("Editor disabled because some content can not be seen by a user who does not belong to the groups %s")
raise openerp.http.Retry(
_("User does not belong to groups %s") % attribute_value, {
'editable_no_editor': errmsg % attribute_value
})
return ''

if isinstance(attribute_value, unicode):
attribute_value = attribute_value.encode("utf8")
else:
attribute_value = attribute_value.nodeValue.encode("utf8")

if attribute_name.startswith("t-"):
for attribute in self._render_att:
if attribute_name[2:].startswith(attribute):
att, val = self._render_att[attribute](self, element, attribute_name, attribute_value, qwebcontext)
generated_attributes += val and ' %s="%s"' % (att, escape(val)) or " "
break
else:
if attribute_name[2:] in self._render_tag:
t_render = attribute_name[2:]
template_attributes[attribute_name[2:]] = attribute_value
else:
generated_attributes += ' %s="%s"' % (attribute_name, escape(attribute_value))

if 'debug' in template_attributes:
debugger = template_attributes.get('debug', 'pdb')
__import__(debugger).set_trace() # pdb, ipdb, pudb, ...
if t_render:
result = self._render_tag[t_render](self, element, template_attributes, generated_attributes, qwebcontext)
generated_attributes = ""
t_render = None
template_attributes = {}
for (attribute_name, attribute_value) in element.attrib.iteritems():
attribute_name = str(attribute_name)
if attribute_name == "groups":
cr = qwebcontext.get('request') and qwebcontext['request'].cr or None
uid = qwebcontext.get('request') and qwebcontext['request'].uid or None
can_see = self.user_has_groups(cr, uid, groups=attribute_value) if cr and uid else False
if not can_see:
if qwebcontext.get('editable') and not qwebcontext.get('editable_no_editor'):
errmsg = _("Editor disabled because some content can not be seen by a user who does not belong to the groups %s")
raise openerp.http.Retry(
_("User does not belong to groups %s") % attribute_value, {
'editable_no_editor': errmsg % attribute_value
})
return ''

attribute_value = attribute_value.encode("utf8")

if attribute_name.startswith("t-"):
for attribute in self._render_att:
if attribute_name[2:].startswith(attribute):
att, val = self._render_att[attribute](self, element, attribute_name, attribute_value, qwebcontext)
generated_attributes += val and ' %s="%s"' % (att, escape(val)) or " "
break
else:
if attribute_name[2:] in self._render_tag:
t_render = attribute_name[2:]
template_attributes[attribute_name[2:]] = attribute_value
else:
result = self.render_element(element, template_attributes, generated_attributes, qwebcontext)
generated_attributes += ' %s="%s"' % (attribute_name, escape(attribute_value))

if 'debug' in template_attributes:
debugger = template_attributes.get('debug', 'pdb')
__import__(debugger).set_trace() # pdb, ipdb, pudb, ...
if t_render:
result = self._render_tag[t_render](self, element, template_attributes, generated_attributes, qwebcontext)
else:
result = self.render_element(element, template_attributes, generated_attributes, qwebcontext)

if element.tail:
result += element.tail

if isinstance(result, unicode):
return result.encode('utf-8')
return result
Expand All @@ -306,16 +302,16 @@ def render_element(self, element, template_attributes, generated_attributes, qwe
if inner:
g_inner = inner
else:
g_inner = []
for current_node in element.childNodes:
g_inner = [] if element.text is None else [element.text]
for current_node in element:
try:
g_inner.append(self.render_node(current_node, qwebcontext))
except (QWebException, openerp.http.Retry):
raise
except Exception:
template = qwebcontext.get('__template__')
raise_qweb_exception(message="Could not render element %r" % element.nodeName, node=element, template=template)
name = str(element.nodeName)
raise_qweb_exception(message="Could not render element %r" % element.tag, node=element, template=template)
name = str(element.tag)
inner = "".join(g_inner)
trim = template_attributes.get("trim", 0)
if trim == 0:
Expand Down Expand Up @@ -417,7 +413,7 @@ def render_tag_call(self, element, template_attributes, generated_attributes, qw

def render_tag_call_assets(self, element, template_attributes, generated_attributes, qwebcontext):
""" This special 't-call' tag can be used in order to aggregate/minify javascript and css assets"""
if element.childNodes:
if len(element):
# An asset bundle is rendered in two differents contexts (when genereting html and
# when generating the bundle itself) so they must be qwebcontext free
# even '0' variable is forbidden
Expand All @@ -441,7 +437,7 @@ def render_tag_set(self, element, template_attributes, generated_attributes, qwe

def render_tag_field(self, element, template_attributes, generated_attributes, qwebcontext):
""" eg: <span t-record="browse_record(res.partner, 1)" t-field="phone">+1 555 555 8069</span>"""
node_name = element.nodeName
node_name = element.tag
assert node_name not in ("table", "tbody", "thead", "tfoot", "tr", "td",
"li", "ul", "ol", "dl", "dt", "dd"),\
"RTE widgets do not work correctly on %r elements" % node_name
Expand Down Expand Up @@ -1039,11 +1035,11 @@ def __init__(self, xmlid, debug=False, cr=None, uid=None, context=None, registry
self.parse()

def parse(self):
fragments = lxml.html.fragments_fromstring(self.html)
fragments = html.fragments_fromstring(self.html)
for el in fragments:
if isinstance(el, basestring):
self.remains.append(el)
elif isinstance(el, lxml.html.HtmlElement):
elif isinstance(el, html.HtmlElement):
src = el.get('src', '')
href = el.get('href', '')
atype = el.get('type')
Expand All @@ -1063,10 +1059,10 @@ def parse(self):
elif el.tag == 'script' and self.can_aggregate(src):
self.javascripts.append(JavascriptAsset(self, url=src))
else:
self.remains.append(lxml.html.tostring(el))
self.remains.append(html.tostring(el))
else:
try:
self.remains.append(lxml.html.tostring(el))
self.remains.append(html.tostring(el))
except Exception:
# notYETimplementederror
raise NotImplementedError
Expand Down Expand Up @@ -1256,7 +1252,7 @@ def stat(self):
except Exception:
raise AssetNotFound("Could not find %s" % self.name)

def to_html():
def to_html(self):
raise NotImplementedError()

@lazy_property
Expand Down
18 changes: 6 additions & 12 deletions openerp/addons/base/tests/test_qweb.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
# -*- coding: utf-8 -*-
import cgi
from xml.dom import minidom as dom

from lxml import etree

from openerp.tests import common
from openerp.addons.base.ir import ir_qweb

impl = dom.getDOMImplementation()
document = impl.createDocument(None, None, None)

class TestQWebTField(common.TransactionCase):
def setUp(self):
super(TestQWebTField, self).setUp()
Expand All @@ -18,8 +16,7 @@ def context(self, values):
self.cr, self.uid, values, context={'inherit_branding': True})

def test_trivial(self):
field = document.createElement('span')
field.setAttribute('t-field', u'company.name')
field = etree.Element('span', {'t-field': u'company.name'})

Companies = self.registry('res.company')
company_id = Companies.create(self.cr, self.uid, {
Expand All @@ -38,8 +35,7 @@ def test_trivial(self):
"My Test Company",))

def test_i18n(self):
field = document.createElement('span')
field.setAttribute('t-field', u'company.name')
field = etree.Element('span', {'t-field': u'company.name'})

Companies = self.registry('res.company')
s = u"Testing «ταБЬℓσ»: 1<2 & 4+1>3, now 20% off!"
Expand All @@ -59,8 +55,7 @@ def test_i18n(self):
cgi.escape(s.encode('utf-8')),))

def test_reject_crummy_tags(self):
field = document.createElement('td')
field.setAttribute('t-field', u'company.name')
field = etree.Element('td', {'t-field': u'company.name'})

with self.assertRaisesRegexp(
AssertionError,
Expand All @@ -70,8 +65,7 @@ def test_reject_crummy_tags(self):
}))

def test_reject_t_tag(self):
field = document.createElement('t')
field.setAttribute('t-field', u'company.name')
field = etree.Element('t', {'t-field': u'company.name'})

with self.assertRaisesRegexp(
AssertionError,
Expand Down
Loading

0 comments on commit b18d83c

Please sign in to comment.