You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

405 lines
13 KiB

  1. #!/usr/bin/env python
  2. """Doxygen XML to SWIG docstring converter.
  3. Converts Doxygen generated XML files into a file containing docstrings
  4. that can be used by SWIG >1.3.23
  5. Usage:
  6. extract-docstrings.py input_py_wrapper.py input_xml_dir output_directory
  7. input_py_wrapper.py is a swig generated file, with/without docstrings,
  8. so we can get to know which classes are inspected by swig
  9. input_xml_dir is your doxygen generated XML directory
  10. output_directory is the directory where output will be written
  11. """
  12. # This code is implemented using Mark Pilgrim's code as a guideline:
  13. # http://www.faqs.org/docs/diveintopython/kgp_divein.html
  14. # Based in doxy2swig.py
  15. # Author: Prabhu Ramachandran
  16. # License: BSD style
  17. from __future__ import print_function
  18. from xml.dom import minidom
  19. import re
  20. import textwrap
  21. import sys
  22. import os.path
  23. def my_open_read(source):
  24. if hasattr(source, "read"):
  25. return source
  26. else:
  27. return open(source)
  28. def my_open_write(dest):
  29. if hasattr(dest, "write"):
  30. return dest
  31. else:
  32. return open(dest, 'w')
  33. class Doxy2SWIG:
  34. """Converts Doxygen generated XML files into a file containing
  35. docstrings that can be used by SWIG-1.3.x that have support for
  36. feature("docstring"). Once the data is parsed it is stored in
  37. self.pieces.
  38. """
  39. def __init__(self, src):
  40. """Initialize the instance given a source object (file or
  41. filename).
  42. """
  43. f = my_open_read(src)
  44. self.my_dir = os.path.dirname(f.name)
  45. self.xmldoc = minidom.parse(f).documentElement
  46. f.close()
  47. self.pieces = []
  48. self.pieces.append('\n// File: %s\n'%\
  49. os.path.basename(f.name))
  50. self.space_re = re.compile(r'\s+')
  51. self.lead_spc = re.compile(r'^(%feature\S+\s+\S+\s*?)"\s+(\S)')
  52. self.multi = 0
  53. self.ignores = ('inheritancegraph', 'param', 'listofallmembers',
  54. 'innerclass', 'name', 'declname', 'incdepgraph',
  55. 'invincdepgraph', 'programlisting', 'type',
  56. 'references', 'referencedby', 'location',
  57. 'collaborationgraph', 'reimplements',
  58. 'reimplementedby', 'derivedcompoundref',
  59. 'basecompoundref')
  60. #self.generics = []
  61. def generate(self):
  62. """Parses the file set in the initialization. The resulting
  63. data is stored in `self.pieces`.
  64. """
  65. self.parse(self.xmldoc)
  66. def parse(self, node):
  67. """Parse a given node. This function in turn calls the
  68. `parse_<nodeType>` functions which handle the respective
  69. nodes.
  70. """
  71. pm = getattr(self, "parse_%s"%node.__class__.__name__)
  72. pm(node)
  73. def parse_Document(self, node):
  74. self.parse(node.documentElement)
  75. def parse_Text(self, node):
  76. txt = node.data
  77. txt = txt.replace('\\', r'\\\\')
  78. txt = txt.replace('"', r'\"')
  79. # ignore pure whitespace
  80. m = self.space_re.match(txt)
  81. if m and len(m.group()) == len(txt):
  82. pass
  83. else:
  84. self.add_text(textwrap.fill(txt))
  85. def parse_Element(self, node):
  86. """Parse an `ELEMENT_NODE`. This calls specific
  87. `do_<tagName>` handers for different elements. If no handler
  88. is available the `generic_parse` method is called. All
  89. tagNames specified in `self.ignores` are simply ignored.
  90. """
  91. name = node.tagName
  92. ignores = self.ignores
  93. if name in ignores:
  94. return
  95. attr = "do_%s" % name
  96. if hasattr(self, attr):
  97. handlerMethod = getattr(self, attr)
  98. handlerMethod(node)
  99. else:
  100. self.generic_parse(node)
  101. #if name not in self.generics: self.generics.append(name)
  102. def add_text(self, value):
  103. """Adds text corresponding to `value` into `self.pieces`."""
  104. if type(value) in (list, tuple):
  105. self.pieces.extend(value)
  106. else:
  107. self.pieces.append(value)
  108. def get_specific_nodes(self, node, names):
  109. """Given a node and a sequence of strings in `names`, return a
  110. dictionary containing the names as keys and child
  111. `ELEMENT_NODEs`, that have a `tagName` equal to the name.
  112. """
  113. nodes = [(x.tagName, x) for x in node.childNodes \
  114. if x.nodeType == x.ELEMENT_NODE and \
  115. x.tagName in names]
  116. return dict(nodes)
  117. def generic_parse(self, node, pad=0):
  118. """A Generic parser for arbitrary tags in a node.
  119. Parameters:
  120. - node: A node in the DOM.
  121. - pad: `int` (default: 0)
  122. If 0 the node data is not padded with newlines. If 1 it
  123. appends a newline after parsing the childNodes. If 2 it
  124. pads before and after the nodes are processed. Defaults to
  125. 0.
  126. """
  127. npiece = 0
  128. if pad:
  129. npiece = len(self.pieces)
  130. if pad == 2:
  131. self.add_text('\n')
  132. for n in node.childNodes:
  133. self.parse(n)
  134. if pad:
  135. if len(self.pieces) > npiece:
  136. self.add_text('\n')
  137. def space_parse(self, node):
  138. self.add_text(' ')
  139. self.generic_parse(node)
  140. do_ref = space_parse
  141. do_emphasis = space_parse
  142. do_bold = space_parse
  143. do_computeroutput = space_parse
  144. do_formula = space_parse
  145. def do_compoundname(self, node):
  146. self.add_text('\n\n')
  147. data = node.firstChild.data
  148. self.add_text('%%feature("docstring") %s "\n'%data)
  149. def do_compounddef(self, node):
  150. kind = node.attributes['kind'].value
  151. if kind in ('class', 'struct'):
  152. prot = node.attributes['prot'].value
  153. if prot != 'public':
  154. return
  155. names = ('compoundname', 'briefdescription',
  156. 'detaileddescription', 'includes')
  157. first = self.get_specific_nodes(node, names)
  158. for n in names:
  159. if n in first:
  160. self.parse(first[n])
  161. self.add_text(['";','\n'])
  162. for n in node.childNodes:
  163. if n not in first.values():
  164. self.parse(n)
  165. elif kind in ('file', 'namespace'):
  166. nodes = node.getElementsByTagName('sectiondef')
  167. for n in nodes:
  168. self.parse(n)
  169. def do_includes(self, node):
  170. self.add_text('C++ includes: ')
  171. self.generic_parse(node, pad=1)
  172. def do_parameterlist(self, node):
  173. self.add_text(['\n', '\n', 'Parameters:', '\n'])
  174. self.generic_parse(node, pad=1)
  175. def do_para(self, node):
  176. self.add_text('\n')
  177. self.generic_parse(node, pad=1)
  178. def do_parametername(self, node):
  179. self.add_text('\n')
  180. try:
  181. self.add_text("%s: "%node.firstChild.data)
  182. except AttributeError:
  183. self.add_text("???: ")
  184. def do_parameterdefinition(self, node):
  185. self.generic_parse(node, pad=1)
  186. def do_detaileddescription(self, node):
  187. self.generic_parse(node, pad=1)
  188. def do_briefdescription(self, node):
  189. self.generic_parse(node, pad=1)
  190. def do_memberdef(self, node):
  191. prot = node.attributes['prot'].value
  192. id = node.attributes['id'].value
  193. kind = node.attributes['kind'].value
  194. tmp = node.parentNode.parentNode.parentNode
  195. compdef = tmp.getElementsByTagName('compounddef')[0]
  196. cdef_kind = compdef.attributes['kind'].value
  197. if prot == 'public':
  198. first = self.get_specific_nodes(node, ('definition', 'name'))
  199. name = first['name'].firstChild.data
  200. if name[:8] == 'operator': # Don't handle operators yet.
  201. return
  202. defn = first['definition'].firstChild.data
  203. self.add_text('\n')
  204. self.add_text('%feature("docstring") ')
  205. anc = node.parentNode.parentNode
  206. if cdef_kind in ('file', 'namespace'):
  207. ns_node = anc.getElementsByTagName('innernamespace')
  208. if not ns_node and cdef_kind == 'namespace':
  209. ns_node = anc.getElementsByTagName('compoundname')
  210. if ns_node:
  211. ns = ns_node[0].firstChild.data
  212. self.add_text(' %s::%s "\n%s'%(ns, name, defn))
  213. else:
  214. self.add_text(' %s "\n%s'%(name, defn))
  215. elif cdef_kind in ('class', 'struct'):
  216. # Get the full function name.
  217. anc_node = anc.getElementsByTagName('compoundname')
  218. cname = anc_node[0].firstChild.data
  219. self.add_text(' %s::%s "\n%s'%(cname, name, defn))
  220. for n in node.childNodes:
  221. if n not in first.values():
  222. self.parse(n)
  223. self.add_text(['";', '\n'])
  224. def do_definition(self, node):
  225. data = node.firstChild.data
  226. self.add_text('%s "\n%s'%(data, data))
  227. def do_sectiondef(self, node):
  228. kind = node.attributes['kind'].value
  229. if kind in ('public-func', 'func'):
  230. self.generic_parse(node)
  231. def do_simplesect(self, node):
  232. kind = node.attributes['kind'].value
  233. if kind in ('date', 'rcs', 'version'):
  234. pass
  235. elif kind == 'warning':
  236. self.add_text(['\n', 'WARNING: '])
  237. self.generic_parse(node)
  238. elif kind == 'see':
  239. self.add_text('\n')
  240. self.add_text('See: ')
  241. self.generic_parse(node)
  242. else:
  243. self.generic_parse(node)
  244. def do_argsstring(self, node):
  245. self.generic_parse(node, pad=1)
  246. def do_member(self, node):
  247. kind = node.attributes['kind'].value
  248. refid = node.attributes['refid'].value
  249. if kind == 'function' and refid[:9] == 'namespace':
  250. self.generic_parse(node)
  251. def do_doxygenindex(self, node):
  252. self.multi = 1
  253. comps = node.getElementsByTagName('compound')
  254. for c in comps:
  255. refid = c.attributes['refid'].value
  256. fname = refid + '.xml'
  257. if not os.path.exists(fname):
  258. fname = os.path.join(self.my_dir, fname)
  259. print("parsing file: %s" % fname)
  260. p = Doxy2SWIG(fname)
  261. p.generate()
  262. self.pieces.extend(self.clean_pieces(p.pieces))
  263. def write(self, fname):
  264. o = my_open_write(fname)
  265. if self.multi:
  266. o.write("".join(self.pieces))
  267. else:
  268. o.write("".join(self.clean_pieces(self.pieces)))
  269. o.close()
  270. def clean_pieces(self, pieces):
  271. """Cleans the list of strings given as `pieces`. It replaces
  272. multiple newlines by a maximum of 2 and returns a new list.
  273. It also wraps the paragraphs nicely.
  274. """
  275. ret = []
  276. count = 0
  277. for i in pieces:
  278. if i == '\n':
  279. count = count + 1
  280. else:
  281. if i == '";':
  282. if count:
  283. ret.append('\n')
  284. elif count > 2:
  285. ret.append('\n\n')
  286. elif count:
  287. ret.append('\n'*count)
  288. count = 0
  289. ret.append(i)
  290. _data = "".join(ret)
  291. ret = []
  292. for i in _data.split('\n\n'):
  293. if i == 'Parameters:':
  294. ret.extend(['Parameters:\n-----------', '\n\n'])
  295. elif i.find('// File:') > -1: # leave comments alone.
  296. ret.extend([i, '\n'])
  297. else:
  298. _tmp = textwrap.fill(i.strip())
  299. _tmp = self.lead_spc.sub(r'\1"\2', _tmp)
  300. ret.extend([_tmp, '\n\n'])
  301. return ret
  302. def get_python_classes(input_py):
  303. with open(input_py) as f:
  304. data = f.read()
  305. classes_supers = re.findall(r'class[ ]+([\w_]+)(\([\w_, ]+\))?:',data)
  306. classes = (classname for classname,superclass in classes_supers)
  307. return classes
  308. return []
  309. def main(input_py, input_xml, output_dir):
  310. classes = get_python_classes(input_py)
  311. with open("%s/docstrings.i"%output_dir,'w') as f_index:
  312. for classname in classes:
  313. class_file = "%s/class%s.xml"%(input_xml,classname.replace("_","__"))
  314. swig_file = "%s/%s.i"%(output_dir,classname.lower())
  315. if os.path.isfile(class_file):
  316. print("processing:", class_file, " ->", swig_file)
  317. p = Doxy2SWIG(class_file)
  318. p.generate()
  319. p.write(swig_file)
  320. f_index.write('%%include "%s.i"\n'% classname.lower())
  321. #else:
  322. # print("ignoring class %s, as %s does not exist" % (classname,class_file))
  323. if __name__ == '__main__':
  324. print(sys.argv)
  325. if len(sys.argv) != 4:
  326. print(__doc__)
  327. sys.exit(1)
  328. main(sys.argv[1], sys.argv[2], sys.argv[3])