[Psi-devel] first stab at xhtml filter for JEP complience

textshell-I1QKlO@neutronstar.dyndns.org textshell-I1QKlO at neutronstar.dyndns.org
Sat Jul 1 14:10:54 PDT 2006


On Sat, Jul 01, 2006 at 09:13:13PM +0200, textshell-I1QKlO at neutronstar.dyndns.org wrote:
> Hallo,
> 
> this is a simple filter to restrict a QDomElement xhtml subset allowed by
> xhtml-im.
> 

Ok, module speed stuff (hashification) this should be correct enough. 


diff -rN -u old-psi-1/iris/include/im.h new-psi/iris/include/im.h
--- old-psi-1/iris/include/im.h	2006-07-01 23:08:17.000000000 +0200
+++ new-psi/iris/include/im.h	2006-07-01 23:08:32.000000000 +0200
@@ -245,6 +245,8 @@
 		const QDomElement& body() const;
 		QString toString(const QString &rootTagName = "body") const;
 		QString text() const;
+		
+		static QDomElement filterXHtmlIM(const QDomElement &body);
 
 	private:
 		QDomElement body_;
diff -rN -u old-psi-1/iris/xmpp-im/types.cpp new-psi/iris/xmpp-im/types.cpp
--- old-psi-1/iris/xmpp-im/types.cpp	2006-07-01 23:08:17.000000000 +0200
+++ new-psi/iris/xmpp-im/types.cpp	2006-07-01 23:08:31.000000000 +0200
@@ -901,11 +901,11 @@
 {
 }
 
-HTMLElement::HTMLElement(const QDomElement &body) : body_(body) {}
+HTMLElement::HTMLElement(const QDomElement &body) : body_(filterXHtmlIM(body)) {}
 
 void HTMLElement::setBody(const QDomElement &body)
 {
-	body_ = body;
+	body_ = filterXHtmlIM(body);
 }
 
 const QDomElement& HTMLElement::body() const
@@ -913,6 +913,105 @@
 	return body_;
 }
 
+
+// xhtml filter todo: hashify?, filter style
+
+static char *allowed_xhtmlim_tags[] = {
+	/* Recommendations */
+	"body", "br", "p", "span", "ol", "ul", "li",
+	/* allowed */
+	"abbr", "acronym", "address", 
+	"cite", "code", "dfn", "div", "em", "h1", "h2", "h3", 
+	"h4", "h5", "h6", "kbd", "pre", "samp", "strong",
+	"var", "dl", "dt", "dd", 
+	0};
+
+
+static char *allowed_xhtmlim_attrs1[] = {
+	"class", "id", "title", "style", "accesskey",
+	"charset", "href", "hreflang", "rel", "rev", "tabindex", "type",
+	0};
+
+static char *allowed_xhtmlim_attrs2[] = {
+	"class", "id", "title", "style", "alt", "height", "longdesc",
+	"src", "width",
+	0};
+
+static char *allowed_xhtmlim_attrs3[] = {
+	"class", "id", "title", "style",
+	0};
+
+// currently ommited: head html title blockquote q
+
+static void filterXHtmlIM_helper(QDomElement current)
+{
+	QString tagname = current.tagName();
+	char **allowed_attrs = 0;
+	
+	
+	if (tagname == "a") {
+		allowed_attrs = allowed_xhtmlim_attrs1;
+		// allowed attributes: class, id, title; style; accesskey,
+		//       charset, href, hreflang, rel, rev, tabindex, type
+	} else if (tagname == "img") {
+		allowed_attrs = allowed_xhtmlim_attrs2;
+		// allowed attributes: class, id, title; style; alt, 
+		//                     height, longdesc, src, width
+	} else {
+		for (char **i = allowed_xhtmlim_tags; *i; ++i) {
+			if (tagname == *i) {
+				allowed_attrs = allowed_xhtmlim_attrs3;
+				// allowed attributes: class, id, title; style
+				break;
+			}
+		}
+	}
+	
+	
+	
+	if (allowed_attrs) {
+		QDomNamedNodeMap attrs = current.attributes();
+		uint i = 0;
+		while (i < attrs.length()) {
+			bool ok = false;
+			QDomNode a = attrs.item(i);
+			QString aname = a.localName();
+			for (char **j=allowed_attrs; *j; ++j) {
+				if (aname == *j) {
+					ok = true;
+					break;
+				} 
+			}
+			if (ok) {
+				i++;
+			} else {
+				i = 0;
+				attrs.removeNamedItem(aname);
+			}
+			printf("%d\n", attrs.length());
+		}
+		
+	} else current.setTagName("span");
+	
+	for(QDomNode n = current.firstChild(); !n.isNull(); n = n.nextSibling()) {
+		if (n.isElement()) {
+			filterXHtmlIM_helper(n.toElement());
+		}
+	}
+
+}
+
+QDomElement HTMLElement::filterXHtmlIM(const QDomElement &body)
+{
+	QDomElement filtered = body.cloneNode(true).toElement();
+	
+	filterXHtmlIM_helper(filtered);
+	
+	return filtered;
+}
+
+
+
 /**
  * Returns the string reperesentation of the HTML element.
  * By default, this is of the form <body ...>...</body>, but the


More information about the psi-devel mailing list