import re

html4_entities = { "lt" : "<", "gt" : ">", "amp" : "&", "quot" : '"' }
xml_entities = { "lt" : "<", "gt" : ">", "amp" : "&", "quot" : '"', "apos" : "'" }

_entity_re = re.compile("&([^;]*);")
def decode_entities(s, entities = xml_entities):
	"""Decode XML entities in a string."""
	def decode(match):
		e = match.group(1)
		if entities.has_key(e):
			return entities[e]
		elif e[0] == "#" and len(e) > 2:
			try:
				if e[1] == "x":
					return chr(int(e[2:], 16))
				else:
					return chr(int(e[1:]))
			except ValueError:
				pass
		return match.group(0)
	return _entity_re.sub(decode, s)

def encode_entities(s, entities = xml_entities):
	"""Encode a string using XML entities."""
	rev = {}
	for k in entities.keys():
		rev[entities[k]] = k
	def encode(match):
		return "&" + rev[match.group(0)] + ";"
	# Assumes that none of the characters to be replaced are special in
	# regexps.
	change_re = re.compile("[" + "".join(rev.keys()) + "]")
	return change_re.sub(encode, s)

if __name__ == "__main__":
	print decode_entities("""Test:
stock names: &lt; &gt; &amp; &quot; &apos;
decimal: &#65; &#66; &#67;
hex: &#x41; &#x42; &#x4f;
unknown names: &unknown; &fish;
unknown decimal: &#fish;
unknown hex: &#xfish;
incomplete: &amp &fish with random; semicolon""")
	s = "<p>Fish &gt; \"big\" wombats' elephants</p>"
	print encode_entities(s)
	assert decode_entities(encode_entities(s)) == s
