source: trunk/essentials/dev-lang/python/Lib/pyclbr.py@ 3314

Last change on this file since 3314 was 3225, checked in by bird, 19 years ago

Python 2.5

File size: 13.0 KB
Line 
1"""Parse a Python module and describe its classes and methods.
2
3Parse enough of a Python file to recognize imports and class and
4method definitions, and to find out the superclasses of a class.
5
6The interface consists of a single function:
7 readmodule_ex(module [, path])
8where module is the name of a Python module, and path is an optional
9list of directories where the module is to be searched. If present,
10path is prepended to the system search path sys.path. The return
11value is a dictionary. The keys of the dictionary are the names of
12the classes defined in the module (including classes that are defined
13via the from XXX import YYY construct). The values are class
14instances of the class Class defined here. One special key/value pair
15is present for packages: the key '__path__' has a list as its value
16which contains the package search path.
17
18A class is described by the class Class in this module. Instances
19of this class have the following instance variables:
20 module -- the module name
21 name -- the name of the class
22 super -- a list of super classes (Class instances)
23 methods -- a dictionary of methods
24 file -- the file in which the class was defined
25 lineno -- the line in the file on which the class statement occurred
26The dictionary of methods uses the method names as keys and the line
27numbers on which the method was defined as values.
28If the name of a super class is not recognized, the corresponding
29entry in the list of super classes is not a class instance but a
30string giving the name of the super class. Since import statements
31are recognized and imported modules are scanned as well, this
32shouldn't happen often.
33
34A function is described by the class Function in this module.
35Instances of this class have the following instance variables:
36 module -- the module name
37 name -- the name of the class
38 file -- the file in which the class was defined
39 lineno -- the line in the file on which the class statement occurred
40"""
41
42import sys
43import imp
44import tokenize # Python tokenizer
45from token import NAME, DEDENT, NEWLINE, OP
46from operator import itemgetter
47
48__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
49
50_modules = {} # cache of modules we've seen
51
52# each Python class is represented by an instance of this class
53class Class:
54 '''Class to represent a Python class.'''
55 def __init__(self, module, name, super, file, lineno):
56 self.module = module
57 self.name = name
58 if super is None:
59 super = []
60 self.super = super
61 self.methods = {}
62 self.file = file
63 self.lineno = lineno
64
65 def _addmethod(self, name, lineno):
66 self.methods[name] = lineno
67
68class Function:
69 '''Class to represent a top-level Python function'''
70 def __init__(self, module, name, file, lineno):
71 self.module = module
72 self.name = name
73 self.file = file
74 self.lineno = lineno
75
76def readmodule(module, path=[]):
77 '''Backwards compatible interface.
78
79 Call readmodule_ex() and then only keep Class objects from the
80 resulting dictionary.'''
81
82 dict = _readmodule(module, path)
83 res = {}
84 for key, value in dict.items():
85 if isinstance(value, Class):
86 res[key] = value
87 return res
88
89def readmodule_ex(module, path=[]):
90 '''Read a module file and return a dictionary of classes.
91
92 Search for MODULE in PATH and sys.path, read and parse the
93 module and return a dictionary with one entry for each class
94 found in the module.
95
96 If INPACKAGE is true, it must be the dotted name of the package in
97 which we are searching for a submodule, and then PATH must be the
98 package search path; otherwise, we are searching for a top-level
99 module, and PATH is combined with sys.path.
100 '''
101 return _readmodule(module, path)
102
103def _readmodule(module, path, inpackage=None):
104 '''Do the hard work for readmodule[_ex].'''
105 # Compute the full module name (prepending inpackage if set)
106 if inpackage:
107 fullmodule = "%s.%s" % (inpackage, module)
108 else:
109 fullmodule = module
110
111 # Check in the cache
112 if fullmodule in _modules:
113 return _modules[fullmodule]
114
115 # Initialize the dict for this module's contents
116 dict = {}
117
118 # Check if it is a built-in module; we don't do much for these
119 if module in sys.builtin_module_names and not inpackage:
120 _modules[module] = dict
121 return dict
122
123 # Check for a dotted module name
124 i = module.rfind('.')
125 if i >= 0:
126 package = module[:i]
127 submodule = module[i+1:]
128 parent = _readmodule(package, path, inpackage)
129 if inpackage:
130 package = "%s.%s" % (inpackage, package)
131 return _readmodule(submodule, parent['__path__'], package)
132
133 # Search the path for the module
134 f = None
135 if inpackage:
136 f, file, (suff, mode, type) = imp.find_module(module, path)
137 else:
138 f, file, (suff, mode, type) = imp.find_module(module, path + sys.path)
139 if type == imp.PKG_DIRECTORY:
140 dict['__path__'] = [file]
141 path = [file] + path
142 f, file, (suff, mode, type) = imp.find_module('__init__', [file])
143 _modules[fullmodule] = dict
144 if type != imp.PY_SOURCE:
145 # not Python source, can't do anything with this module
146 f.close()
147 return dict
148
149 stack = [] # stack of (class, indent) pairs
150
151 g = tokenize.generate_tokens(f.readline)
152 try:
153 for tokentype, token, start, end, line in g:
154 if tokentype == DEDENT:
155 lineno, thisindent = start
156 # close nested classes and defs
157 while stack and stack[-1][1] >= thisindent:
158 del stack[-1]
159 elif token == 'def':
160 lineno, thisindent = start
161 # close previous nested classes and defs
162 while stack and stack[-1][1] >= thisindent:
163 del stack[-1]
164 tokentype, meth_name, start, end, line = g.next()
165 if tokentype != NAME:
166 continue # Syntax error
167 if stack:
168 cur_class = stack[-1][0]
169 if isinstance(cur_class, Class):
170 # it's a method
171 cur_class._addmethod(meth_name, lineno)
172 # else it's a nested def
173 else:
174 # it's a function
175 dict[meth_name] = Function(module, meth_name, file, lineno)
176 stack.append((None, thisindent)) # Marker for nested fns
177 elif token == 'class':
178 lineno, thisindent = start
179 # close previous nested classes and defs
180 while stack and stack[-1][1] >= thisindent:
181 del stack[-1]
182 tokentype, class_name, start, end, line = g.next()
183 if tokentype != NAME:
184 continue # Syntax error
185 # parse what follows the class name
186 tokentype, token, start, end, line = g.next()
187 inherit = None
188 if token == '(':
189 names = [] # List of superclasses
190 # there's a list of superclasses
191 level = 1
192 super = [] # Tokens making up current superclass
193 while True:
194 tokentype, token, start, end, line = g.next()
195 if token in (')', ',') and level == 1:
196 n = "".join(super)
197 if n in dict:
198 # we know this super class
199 n = dict[n]
200 else:
201 c = n.split('.')
202 if len(c) > 1:
203 # super class is of the form
204 # module.class: look in module for
205 # class
206 m = c[-2]
207 c = c[-1]
208 if m in _modules:
209 d = _modules[m]
210 if c in d:
211 n = d[c]
212 names.append(n)
213 super = []
214 if token == '(':
215 level += 1
216 elif token == ')':
217 level -= 1
218 if level == 0:
219 break
220 elif token == ',' and level == 1:
221 pass
222 # only use NAME and OP (== dot) tokens for type name
223 elif tokentype in (NAME, OP) and level == 1:
224 super.append(token)
225 # expressions in the base list are not supported
226 inherit = names
227 cur_class = Class(fullmodule, class_name, inherit, file, lineno)
228 if not stack:
229 dict[class_name] = cur_class
230 stack.append((cur_class, thisindent))
231 elif token == 'import' and start[1] == 0:
232 modules = _getnamelist(g)
233 for mod, mod2 in modules:
234 try: