Context Navigation

cookielib.py

Visit:

Last change on this file was 3225, checked in by bird, 19 years ago
Python 2.5
File size: 62.7 KB

Rev	Line
[3225]	1	"""HTTP cookie handling for web clients.
	2
	3	This module has (now fairly distant) origins in Gisle Aas' Perl module
	4	HTTP::Cookies, from the libwww-perl library.
	5
	6	Docstrings, comments and debug strings in this code refer to the
	7	attributes of the HTTP cookie system as cookie-attributes, to distinguish
	8	them clearly from Python attributes.
	9
	10	Class diagram (note that BSDDBCookieJar and the MSIE* classes are not
	11	distributed with the Python standard library, but are available from
	12	http://wwwsearch.sf.net/):
	13
	14	CookieJar____
	15	/ \ \
	16	FileCookieJar \ \
	17	/ \| \ \ \
	18	MozillaCookieJar \| LWPCookieJar \ \
	19	\| \| \
	20	\| ---MSIEBase \| \
	21	\| / \| \| \
	22	\| / MSIEDBCookieJar BSDDBCookieJar
	23	\|/
	24	MSIECookieJar
	25
	26	"""
	27
	28	__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
	29	'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
	30
	31	import re, urlparse, copy, time, urllib
	32	try:
	33	import threading as _threading
	34	except ImportError:
	35	import dummy_threading as _threading
	36	import httplib # only for the default HTTP port
	37	from calendar import timegm
	38
	39	debug = False # set to True to enable debugging via the logging module
	40	logger = None
	41
	42	def _debug(*args):
	43	if not debug:
	44	return
	45	global logger
	46	if not logger:
	47	import logging
	48	logger = logging.getLogger("cookielib")
	49	return logger.debug(*args)
	50
	51
	52	DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)
	53	MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
	54	"instance initialised with one)")
	55
	56	def _warn_unhandled_exception():
	57	# There are a few catch-all except: statements in this module, for
	58	# catching input that's bad in unexpected ways. Warn if any
	59	# exceptions are caught there.
	60	import warnings, traceback, StringIO
	61	f = StringIO.StringIO()
	62	traceback.print_exc(None, f)
	63	msg = f.getvalue()
	64	warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2)
	65
	66
	67	# Date/time conversion
	68	# -----------------------------------------------------------------------------
	69
	70	EPOCH_YEAR = 1970
	71	def _timegm(tt):
	72	year, month, mday, hour, min, sec = tt[:6]
	73	if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
	74	(0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
	75	return timegm(tt)
	76	else:
	77	return None
	78
	79	DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
	80	MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
	81	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
	82	MONTHS_LOWER = []
	83	for month in MONTHS: MONTHS_LOWER.append(month.lower())
	84
	85	def time2isoz(t=None):
	86	"""Return a string representing time in seconds since epoch, t.
	87
	88	If the function is called without an argument, it will use the current
	89	time.
	90
	91	The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
	92	representing Universal Time (UTC, aka GMT). An example of this format is:
	93
	94	1994-11-24 08:49:37Z
	95
	96	"""
	97	if t is None: t = time.time()
	98	year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
	99	return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
	100	year, mon, mday, hour, min, sec)
	101
	102	def time2netscape(t=None):
	103	"""Return a string representing time in seconds since epoch, t.
	104
	105	If the function is called without an argument, it will use the current
	106	time.
	107
	108	The format of the returned string is like this:
	109
	110	Wed, DD-Mon-YYYY HH:MM:SS GMT
	111
	112	"""
	113	if t is None: t = time.time()
	114	year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
	115	return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
	116	DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec)
	117
	118
	119	UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
	120
	121	TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
	122	def offset_from_tz_string(tz):
	123	offset = None
	124	if tz in UTC_ZONES:
	125	offset = 0
	126	else:
	127	m = TIMEZONE_RE.search(tz)
	128	if m:
	129	offset = 3600 * int(m.group(2))
	130	if m.group(3):
	131	offset = offset + 60 * int(m.group(3))
	132	if m.group(1) == '-':
	133	offset = -offset
	134	return offset
	135
	136	def _str2time(day, mon, yr, hr, min, sec, tz):
	137	# translate month name to number
	138	# month numbers start with 1 (January)
	139	try:
	140	mon = MONTHS_LOWER.index(mon.lower())+1
	141	except ValueError:
	142	# maybe it's already a number
	143	try:
	144	imon = int(mon)
	145	except ValueError:
	146	return None
	147	if 1 <= imon <= 12:
	148	mon = imon
	149	else:
	150	return None
	151
	152	# make sure clock elements are defined
	153	if hr is None: hr = 0
	154	if min is None: min = 0
	155	if sec is None: sec = 0
	156
	157	yr = int(yr)
	158	day = int(day)
	159	hr = int(hr)
	160	min = int(min)
	161	sec = int(sec)
	162
	163	if yr < 1000:
	164	# find "obvious" year
	165	cur_yr = time.localtime(time.time())[0]
	166	m = cur_yr % 100
	167	tmp = yr
	168	yr = yr + cur_yr - m
	169	m = m - tmp
	170	if abs(m) > 50:
	171	if m > 0: yr = yr + 100
	172	else: yr = yr - 100
	173
	174	# convert UTC time tuple to seconds since epoch (not timezone-adjusted)
	175	t = _timegm((yr, mon, day, hr, min, sec, tz))
	176
	177	if t is not None:
	178	# adjust time using timezone string, to get absolute time since epoch
	179	if tz is None:
	180	tz = "UTC"
	181	tz = tz.upper()
	182	offset = offset_from_tz_string(tz)
	183	if offset is None:
	184	return None
	185	t = t - offset
	186
	187	return t
	188
	189	STRICT_DATE_RE = re.compile(
	190	r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
	191	"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
	192	WEEKDAY_RE = re.compile(
	193	r"^(?:Sun\|Mon\|Tue\|Wed\|Thu\|Fri\|Sat)[a-z],?\s", re.I)
	194	LOOSE_HTTP_DATE_RE = re.compile(
	195	r"""^
	196	(\d\d?) # day
	197	(?:\s+\|[-\/])
	198	(\w+) # month
	199	(?:\s+\|[-\/])
	200	(\d+) # year
	201	(?:
	202	(?:\s+\|:) # separator before clock
	203	(\d\d?):(\d\d) # hour:min
	204	(?::(\d\d))? # optional seconds
	205	)? # optional clock
	206	\s*
	207	([-+]?\d{2,4}\|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
	208	\s*
	209	(?:$\w+$)? # ASCII representation of timezone in parens.
	210	\s*$""", re.X)
	211	def http2time(text):
	212	"""Returns time in seconds since epoch of time represented by a string.
	213
	214	Return value is an integer.
	215
	216	None is returned if the format of str is unrecognized, the time is outside
	217	the representable range, or the timezone string is not recognized. If the
	218	string contains no timezone, UTC is assumed.
	219
	220	The timezone in the string may be numerical (like "-0800" or "+0100") or a
	221	string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
	222	timezone strings equivalent to UTC (zero offset) are known to the function.
	223
	224	The function loosely parses the following formats:
	225
	226	Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
	227	Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
	228	Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
	229	09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
	230	08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
	231	08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
	232
	233	The parser ignores leading and trailing whitespace. The time may be
	234	absent.
	235
	236	If the year is given with only 2 digits, the function will select the
	237	century that makes the year closest to the current date.
	238
	239	"""
	240	# fast exit for strictly conforming string
	241	m = STRICT_DATE_RE.search(text)
	242	if m:
	243	g = m.groups()
	244	mon = MONTHS_LOWER.index(g[1].lower()) + 1
	245	tt = (int(g[2]), mon, int(g[0]),
	246	int(g[3]), int(g[4]), float(g[5]))
	247	return _timegm(tt)
	248
	249	# No, we need some messy parsing...
	250
	251	# clean up
	252	text = text.lstrip()
	253	text = WEEKDAY_RE.sub("", text, 1) # Useless weekday
	254
	255	# tz is time zone specifier string
	256	day, mon, yr, hr, min, sec, tz = [None]*7
	257
	258	# loose regexp parse
	259	m = LOOSE_HTTP_DATE_RE.search(text)
	260	if m is not None:
	261	day, mon, yr, hr, min, sec, tz = m.groups()
	262	else:
	263	return None # bad format
	264
	265	return _str2time(day, mon, yr, hr, min, sec, tz)
	266
	267	ISO_DATE_RE = re.compile(
	268	"""^
	269	(\d{4}) # year
	270	[-\/]?
	271	(\d\d?) # numerical month
	272	[-\/]?
	273	(\d\d?) # day
	274	(?:
	275	(?:\s+\|[-:Tt]) # separator before clock
	276	(\d\d?):?(\d\d) # hour:min
	277	(?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
	278	)? # optional clock
	279	\s*
	280	([-+]?\d\d?:?(:?\d\d)?
	281	\|Z\|z)? # timezone (Z is "zero meridian", i.e. GMT)
	282	\s*$""", re.X)
	283	def iso2time(text):
	284	"""
	285	As for http2time, but parses the ISO 8601 formats:
	286
	287	1994-02-03 14:15:29 -0100 -- ISO 8601 format
	288	1994-02-03 14:15:29 -- zone is optional
	289	1994-02-03 -- only date
	290	1994-02-03T14:15:29 -- Use T as separator
	291	19940203T141529Z -- ISO 8601 compact format
	292	19940203 -- only date
	293
	294	"""
	295	# clean up
	296	text = text.lstrip()
	297
	298	# tz is time zone specifier string
	299	day, mon, yr, hr, min, sec, tz = [None]*7
	300
	301	# loose regexp parse
	302	m = ISO_DATE_RE.search(text)
	303	if m is not None:
	304	# XXX there's an extra bit of the timezone I'm ignoring here: is
	305	# this the right thing to do?
	306	yr, mon, day, hr, min, sec, tz, _ = m.groups()
	307	else:
	308	return None # bad format
	309
	310	return _str2time(day, mon, yr, hr, min, sec, tz)
	311
	312
	313	# Header parsing
	314	# -----------------------------------------------------------------------------
	315
	316	def unmatched(match):
	317	"""Return unmatched part of re.Match object."""
	318	start, end = match.span(0)
	319	return match.string[:start]+match.string[end:]
	320
	321	HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")
	322	HEADER_QUOTED_VALUE_RE = re.compile(r"^\s=\s\"([^\"\\](?:\\.[^\"\\])*)\"")
	323	HEADER_VALUE_RE = re.compile(r"^\s=\s([^\s;,]*)")
	324	HEADER_ESCAPE_RE = re.compile(r"\\(.)")
	325	def split_header_words(header_values):
	326	r"""Parse header values into a list of lists containing key,value pairs.
	327
	328	The function knows how to deal with ",", ";" and "=" as well as quoted
	329	values after "=". A list of space separated tokens are parsed as if they
	330	were separated by ";".
	331
	332	If the header_values passed as argument contains multiple values, then they
	333	are treated as if they were a single value separated by comma ",".
	334
	335	This means that this function is useful for parsing header fields that
	336	follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
	337	the requirement for tokens).
	338
	339	headers = #header
	340	header = (token \| parameter) *( [";"] (token \| parameter))
	341
	342	token = 1*<any CHAR except CTLs or separators>
	343	separators = "(" \| ")" \| "<" \| ">" \| "@"
	344	\| "," \| ";" \| ":" \| "\" \| <">
	345	\| "/" \| "[" \| "]" \| "?" \| "="
	346	\| "{" \| "}" \| SP \| HT
	347
	348	quoted-string = ( <"> *(qdtext \| quoted-pair ) <"> )
	349	qdtext = <any TEXT except <">>
	350	quoted-pair = "\" CHAR
	351
	352	parameter = attribute "=" value
	353	attribute = token
	354	value = token \| quoted-string
	355
	356	Each header is represented by a list of key/value pairs. The value for a
	357	simple token (not part of a parameter) is None. Syntactically incorrect
	358	headers will not necessarily be parsed as you would want.
	359
	360	This is easier to describe with some examples:
	361
	362	>>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
	363	[[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
	364	>>> split_header_words(['text/html; charset="iso-8859-1"'])
	365	[[('text/html', None), ('charset', 'iso-8859-1')]]
	366	>>> split_header_words([r'Basic realm="\"foo\bar\""'])
	367	[[('Basic', None), ('realm', '"foobar"')]]
	368
	369	"""
	370	assert not isinstance(header_values, basestring)
	371	result = []
	372	for text in header_values:
	373	orig_text = text
	374	pairs = []
	375	while text:
	376	m = HEADER_TOKEN_RE.search(text)
	377	if m:
	378	text = unmatched(m)
	379	name = m.group(1)
	380	m = HEADER_QUOTED_VALUE_RE.search(text)
	381	if m: # quoted value
	382	text = unmatched(m)
	383	value = m.group(1)
	384	value = HEADER_ESCAPE_RE.sub(r"\1", value)
	385	else:
	386	m = HEADER_VALUE_RE.search(text)
	387	if m: # unquoted value
	388	text = unmatched(m)
	389	value = m.group(1)
	390	value = value.rstrip()
	391	else:
	392	# no value, a lone token
	393	value = None
	394	pairs.append((name, value))
	395	elif text.lstrip().startswith(","):
	396	# concatenated headers, as per RFC 2616 section 4.2
	397	text = text.lstrip()[1:]
	398	if pairs: result.append(pairs)
	399	pairs = []
	400	else:
	401	# skip junk
	402	non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
	403	assert nr_junk_chars > 0, (
	404	"split_header_words bug: '%s', '%s', %s" %
	405	(orig_text, text, pairs))
	406	text = non_junk
	407	if pairs: result.append(pairs)
	408	return result
	409
	410	HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
	411	def join_header_words(lists):
	412	"""Do the inverse (almost) of the conversion done by split_header_words.
	413
	414	Takes a list of lists of (key, value) pairs and produces a single header
	415	value. Attribute values are quoted if needed.
	416
	417	>>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
	418	'text/plain; charset="iso-8859/1"'
	419	>>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
	420	'text/plain, charset="iso-8859/1"'
	421
	422	"""
	423	headers = []
	424	for pairs in lists:
	425	attr = []
	426	for k, v in pairs:
	427	if v is not None:
	428	if not re.search(r"^\w+$", v):
	429	v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
	430	v = '"%s"' % v
	431	k = "%s=%s" % (k, v)
	432	attr.append(k)
	433	if attr: headers.append("; ".join(attr))
	434	return ", ".join(headers)
	435
	436	def parse_ns_headers(ns_headers):
	437	"""Ad-hoc parser for Netscape protocol cookie-attributes.
	438
	439	The old Netscape cookie format for Set-Cookie can for instance contain
	440	an unquoted "," in the expires field, so we have to use this ad-hoc
	441	parser instead of split_header_words.
	442
	443	XXX This may not make the best possible effort to parse all the crap
	444	that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
	445	parser is probably better, so could do worse than following that if
	446	this ever gives any trouble.
	447
	448	Currently, this is also used for parsing RFC 2109 cookies.
	449
	450	"""
	451	known_attrs = ("expires", "domain", "path", "secure",
	452	# RFC 2109 attrs (may turn up in Netscape cookies, too)
	453	"port", "max-age")
	454
	455	result = []
	456	for ns_header in ns_headers:
	457	pairs = []
	458	version_set = False
	459	for ii, param in enumerate(re.split(r";\s*", ns_header)):
	460	param = param.rstrip()
	461	if param == "": continue
	462	if "=" not in param:
	463	k, v = param, None
	464	else:
	465	k, v = re.split(r"\s=\s", param, 1)
	466	k = k.lstrip()
	467	if ii != 0:
	468	lc = k.lower()
	469	if lc in known_attrs:
	470	k = lc
	471	if k == "version":
	472	# This is an RFC 2109 cookie.
	473	version_set = True
	474	if k == "expires":
	475	# convert expires date to seconds since epoch
	476	if v.startswith('"'): v = v[1:]
	477	if v.endswith('"'): v = v[:-1]
	478	v = http2time(v) # None if invalid
	479	pairs.append((k, v))
	480
	481	if pairs:
	482	if not version_set:
	483	pairs.append(("version", "0"))
	484	result.append(pairs)
	485
	486	return result
	487
	488
	489	IPV4_RE = re.compile(r"\.\d+$")
	490	def is_HDN(text):
	491	"""Return True if text is a host domain name."""
	492	# XXX
	493	# This may well be wrong. Which RFC is HDN defined in, if any (for
	494	# the purposes of RFC 2965)?
	495	# For the current implementation, what about IPv6? Remember to look
	496	# at other uses of IPV4_RE also, if change this.
	497	if IPV4_RE.search(text):
	498	return False
	499	if text == "":
	500	return False
	501	if text[0] == "." or text[-1] == ".":
	502	return False
	503	return True
	504
	505	def domain_match(A, B):
	506	"""Return True if domain A domain-matches domain B, according to RFC 2965.
	507
	508	A and B may be host domain names or IP addresses.
	509
	510	RFC 2965, section 1:
	511
	512	Host names can be specified either as an IP address or a HDN string.
	513	Sometimes we compare one host name with another. (Such comparisons SHALL
	514	be case-insensitive.) Host A's name domain-matches host B's if
	515
	516	* their host name strings string-compare equal; or
	517
	518	* A is a HDN string and has the form NB, where N is a non-empty
	519	name string, B has the form .B', and B' is a HDN string. (So,
	520	x.y.com domain-matches .Y.com but not Y.com.)
	521
	522	Note that domain-match is not a commutative operation: a.b.c.com
	523	domain-matches .c.com, but not the reverse.
	524
	525	"""
	526	# Note that, if A or B are IP addresses, the only relevant part of the
	527	# definition of the domain-match algorithm is the direct string-compare.
	528	A = A.lower()
	529	B = B.lower()
	530	if A == B:
	531	return True
	532	if not is_HDN(A):
	533	return False
	534	i = A.rfind(B)
	535	if i == -1 or i == 0:
	536	# A does not have form NB, or N is the empty string
	537	return False
	538	if not B.startswith("."):
	539	return False
	540	if not is_HDN(B[1:]):
	541	return False
	542	return True
	543
	544	def liberal_is_HDN(text):
	545	"""Return True if text is a sort-of-like a host domain name.
	546
	547	For accepting/blocking domains.
	548
	549	"""
	550	if IPV4_RE.search(text):
	551	return False
	552	return True
	553
	554	def user_domain_match(A, B):
	555	"""For blocking/accepting domains.
	556
	557	A and B may be host domain names or IP addresses.
	558
	559	"""
	560	A = A.lower()
	561	B = B.lower()
	562	if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
	563	if A == B:
	564	# equal IP addresses
	565	return True
	566	return False
	567	initial_dot = B.startswith(".")
	568	if initial_dot and A.endswith(B):
	569	return True
	570	if not initial_dot and A == B:
	571	return True
	572	return False
	573
	574	cut_port_re = re.compile(r":\d+$")
	575	def request_host(request):
	576	"""Return request-host, as defined by RFC 2965.
	577
	578	Variation from RFC: returned value is lowercased, for convenient
	579	comparison.
	580
	581	"""
	582	url = request.get_full_url()
	583	host = urlparse.urlparse(url)[1]
	584	if host == "":
	585	host = request.get_header("Host", "")
	586
	587	# remove port, if present
	588	host = cut_port_re.sub("", host, 1)
	589	return host.lower()
	590
	591	def eff_request_host(request):
	592	"""Return a tuple (request-host, effective request-host name).
	593
	594	As defined by RFC 2965, except both are lowercased.
	595
	596	"""
	597	erhn = req_host = request_host(request)
	598	if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
	599	erhn = req_host + ".local"
	600	return req_host, erhn
	601
	602	def request_path(request):
	603	"""request-URI, as defined by RFC 2965."""
	604	url = request.get_full_url()
	605	#scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url)
	606	#req_path = escape_path("".join(urlparse.urlparse(url)[2:]))
	607	path, parameters, query, frag = urlparse.urlparse(url)[2:]
	608	if parameters:
	609	path = "%s;%s" % (path, parameters)
	610	path = escape_path(path)
	611	req_path = urlparse.urlunparse(("", "", path, "", query, frag))
	612	if not req_path.startswith("/"):
	613	# fix bad RFC 2396 absoluteURI
	614	req_path = "/"+req_path
	615	return req_path
	616
	617	def request_port(request):
	618	host = request.get_host()
	619	i = host.find(':')
	620	if i >= 0:
	621	port = host[i+1:]
	622	try:
	623	int(port)
	624	except ValueError:
	625	_debug("nonnumeric port: '%s'", port)
	626	return None
	627	else:
	628	port = DEFAULT_HTTP_PORT
	629	return port
	630
	631	# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
	632	# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
	633	HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
	634	ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
	635	def uppercase_escaped_char(match):
	636	return "%%%s" % match.group(1).upper()
	637	def escape_path(path):
	638	"""Escape any invalid characters in HTTP URL, and uppercase all escapes."""
	639	# There's no knowing what character encoding was used to create URLs
	640	# containing %-escapes, but since we have to pick one to escape invalid
	641	# path characters, we pick UTF-8, as recommended in the HTML 4.0
	642	# specification:
	643	# http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
	644	# And here, kind of: draft-fielding-uri-rfc2396bis-03
	645	# (And in draft IRI specification: draft-duerst-iri-05)
	646	# (And here, for new URI schemes: RFC 2718)
	647	if isinstance(path, unicode):
	648	path = path.encode("utf-8")
	649	path = urllib.quote(path, HTTP_PATH_SAFE)
	650	path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
	651	return path
	652
	653	def reach(h):
	654	"""Return reach of host h, as defined by RFC 2965, section 1.
	655
	656	The reach R of a host name H is defined as follows:
	657
	658	* If
	659
	660	- H is the host domain name of a host; and,
	661
	662	- H has the form A.B; and
	663
	664	- A has no embedded (that is, interior) dots; and
	665
	666	- B has at least one embedded dot, or B is the string "local".
	667	then the reach of H is .B.
	668
	669	* Otherwise, the reach of H is H.
	670
	671	>>> reach("www.acme.com")
	672	'.acme.com'
	673	>>> reach("acme.com")
	674	'acme.com'
	675	>>> reach("acme.local")
	676	'.local'
	677
	678	"""
	679	i = h.find(".")
	680	if i >= 0:
	681	#a = h[:i] # this line is only here to show what a is
	682	b = h[i+1:]
	683	i = b.find(".")
	684	if is_HDN(h) and (i >= 0 or b == "local"):
	685	return "."+b
	686	return h
	687
	688	def is_third_party(request):
	689	"""
	690
	691	RFC 2965, section 3.3.6:
	692
	693	An unverifiable transaction is to a third-party host if its request-
	694	host U does not domain-match the reach R of the request-host O in the
	695	origin transaction.
	696
	697	"""
	698	req_host = request_host(request)
	699	if not domain_match(req_host, reach(request.get_origin_req_host())):
	700	return True
	701	else:
	702	return False
	703
	704
	705	class Cookie:
	706	"""HTTP Cookie.
	707
	708	This class represents both Netscape and RFC 2965 cookies.
	709
	710	This is deliberately a very simple class. It just holds attributes. It's
	711	possible to construct Cookie instances that don't comply with the cookie
	712	standards. CookieJar.make_cookies is the factory function for Cookie
	713	objects -- it deals with cookie parsing, supplying defaults, and
	714	normalising to the representation used in this class. CookiePolicy is
	715	responsible for checking them to see whether they should be accepted from
	716	and returned to the server.
	717
	718	Note that the port may be present in the headers, but unspecified ("Port"
	719	rather than"Port=80", for example); if this is the case, port is None.
	720
	721	"""
	722
	723	def __init__(self, version, name, value,
	724	port, port_specified,
	725	domain, domain_specified, domain_initial_dot,
	726	path, path_specified,
	727	secure,
	728	expires,
	729	discard,
	730	comment,
	731	comment_url,
	732	rest,
	733	rfc2109=False,
	734	):
	735
	736	if version is not None: version = int(version)
	737	if expires is not None: expires = int(expires)
	738	if port is None and port_specified is True:
	739	raise ValueError("if port is None, port_specified must be false")
	740
	741	self.version = version
	742	self.name = name
	743	self.value = value
	744	self.port = port
	745	self.port_specified = port_specified
	746	# normalise case, as per RFC 2965 section 3.3.3
	747	self.domain = domain.lower()
	748	self.domain_specified = domain_specified
	749	# Sigh. We need to know whether the domain given in the
	750	# cookie-attribute had an initial dot, in order to follow RFC 2965
	751	# (as clarified in draft errata). Needed for the returned $Domain
	752	# value.
	753	self.domain_initial_dot = domain_initial_dot
	754	self.path = path
	755	self.path_specified = path_specified
	756	self.secure = secure
	757	self.expires = expires
	758	self.discard = discard
	759	self.comment = comment
	760	self.comment_url = comment_url
	761	self.rfc2109 = rfc2109
	762
	763	self._rest = copy.copy(rest)
	764
	765	def has_nonstandard_attr(self, name):
	766	return name in self._rest
	767	def get_nonstandard_attr(self, name, default=None):