You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

506 lines
17 KiB

33 years ago
33 years ago
33 years ago
33 years ago
33 years ago
33 years ago
33 years ago
33 years ago
33 years ago
33 years ago
33 years ago
33 years ago
33 years ago
33 years ago
  1. #! /usr/bin/env python3
  2. # This file contains a class and a main program that perform three
  3. # related (though complimentary) formatting operations on Python
  4. # programs. When called as "pindent -c", it takes a valid Python
  5. # program as input and outputs a version augmented with block-closing
  6. # comments. When called as "pindent -d", it assumes its input is a
  7. # Python program with block-closing comments and outputs a commentless
  8. # version. When called as "pindent -r" it assumes its input is a
  9. # Python program with block-closing comments but with its indentation
  10. # messed up, and outputs a properly indented version.
  11. # A "block-closing comment" is a comment of the form '# end <keyword>'
  12. # where <keyword> is the keyword that opened the block. If the
  13. # opening keyword is 'def' or 'class', the function or class name may
  14. # be repeated in the block-closing comment as well. Here is an
  15. # example of a program fully augmented with block-closing comments:
  16. # def foobar(a, b):
  17. # if a == b:
  18. # a = a+1
  19. # elif a < b:
  20. # b = b-1
  21. # if b > a: a = a-1
  22. # # end if
  23. # else:
  24. # print 'oops!'
  25. # # end if
  26. # # end def foobar
  27. # Note that only the last part of an if...elif...else... block needs a
  28. # block-closing comment; the same is true for other compound
  29. # statements (e.g. try...except). Also note that "short-form" blocks
  30. # like the second 'if' in the example must be closed as well;
  31. # otherwise the 'else' in the example would be ambiguous (remember
  32. # that indentation is not significant when interpreting block-closing
  33. # comments).
  34. # The operations are idempotent (i.e. applied to their own output
  35. # they yield an identical result). Running first "pindent -c" and
  36. # then "pindent -r" on a valid Python program produces a program that
  37. # is semantically identical to the input (though its indentation may
  38. # be different). Running "pindent -e" on that output produces a
  39. # program that only differs from the original in indentation.
  40. # Other options:
  41. # -s stepsize: set the indentation step size (default 8)
  42. # -t tabsize : set the number of spaces a tab character is worth (default 8)
  43. # -e : expand TABs into spaces
  44. # file ... : input file(s) (default standard input)
  45. # The results always go to standard output
  46. # Caveats:
  47. # - comments ending in a backslash will be mistaken for continued lines
  48. # - continuations using backslash are always left unchanged
  49. # - continuations inside parentheses are not extra indented by -r
  50. # but must be indented for -c to work correctly (this breaks
  51. # idempotency!)
  52. # - continued lines inside triple-quoted strings are totally garbled
  53. # Secret feature:
  54. # - On input, a block may also be closed with an "end statement" --
  55. # this is a block-closing comment without the '#' sign.
  56. # Possible improvements:
  57. # - check syntax based on transitions in 'next' table
  58. # - better error reporting
  59. # - better error recovery
  60. # - check identifier after class/def
  61. # The following wishes need a more complete tokenization of the source:
  62. # - Don't get fooled by comments ending in backslash
  63. # - reindent continuation lines indicated by backslash
  64. # - handle continuation lines inside parentheses/braces/brackets
  65. # - handle triple quoted strings spanning lines
  66. # - realign comments
  67. # - optionally do much more thorough reformatting, a la C indent
  68. # Defaults
  69. STEPSIZE = 8
  70. TABSIZE = 8
  71. EXPANDTABS = False
  72. import io
  73. import re
  74. import sys
  75. next = {}
  76. next['if'] = next['elif'] = 'elif', 'else', 'end'
  77. next['while'] = next['for'] = 'else', 'end'
  78. next['try'] = 'except', 'finally'
  79. next['except'] = 'except', 'else', 'finally', 'end'
  80. next['else'] = next['finally'] = next['with'] = \
  81. next['def'] = next['class'] = 'end'
  82. next['end'] = ()
  83. start = 'if', 'while', 'for', 'try', 'with', 'def', 'class'
  84. class PythonIndenter:
  85. def __init__(self, fpi = sys.stdin, fpo = sys.stdout,
  86. indentsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  87. self.fpi = fpi
  88. self.fpo = fpo
  89. self.indentsize = indentsize
  90. self.tabsize = tabsize
  91. self.lineno = 0
  92. self.expandtabs = expandtabs
  93. self._write = fpo.write
  94. self.kwprog = re.compile(
  95. r'^(?:\s|\\\n)*(?P<kw>[a-z]+)'
  96. r'((?:\s|\\\n)+(?P<id>[a-zA-Z_]\w*))?'
  97. r'[^\w]')
  98. self.endprog = re.compile(
  99. r'^(?:\s|\\\n)*#?\s*end\s+(?P<kw>[a-z]+)'
  100. r'(\s+(?P<id>[a-zA-Z_]\w*))?'
  101. r'[^\w]')
  102. self.wsprog = re.compile(r'^[ \t]*')
  103. # end def __init__
  104. def write(self, line):
  105. if self.expandtabs:
  106. self._write(line.expandtabs(self.tabsize))
  107. else:
  108. self._write(line)
  109. # end if
  110. # end def write
  111. def readline(self):
  112. line = self.fpi.readline()
  113. if line: self.lineno += 1
  114. # end if
  115. return line
  116. # end def readline
  117. def error(self, fmt, *args):
  118. if args: fmt = fmt % args
  119. # end if
  120. sys.stderr.write('Error at line %d: %s\n' % (self.lineno, fmt))
  121. self.write('### %s ###\n' % fmt)
  122. # end def error
  123. def getline(self):
  124. line = self.readline()
  125. while line[-2:] == '\\\n':
  126. line2 = self.readline()
  127. if not line2: break
  128. # end if
  129. line += line2
  130. # end while
  131. return line
  132. # end def getline
  133. def putline(self, line, indent):
  134. tabs, spaces = divmod(indent*self.indentsize, self.tabsize)
  135. i = self.wsprog.match(line).end()
  136. line = line[i:]
  137. if line[:1] not in ('\n', '\r', ''):
  138. line = '\t'*tabs + ' '*spaces + line
  139. # end if
  140. self.write(line)
  141. # end def putline
  142. def reformat(self):
  143. stack = []
  144. while True:
  145. line = self.getline()
  146. if not line: break # EOF
  147. # end if
  148. m = self.endprog.match(line)
  149. if m:
  150. kw = 'end'
  151. kw2 = m.group('kw')
  152. if not stack:
  153. self.error('unexpected end')
  154. elif stack.pop()[0] != kw2:
  155. self.error('unmatched end')
  156. # end if
  157. self.putline(line, len(stack))
  158. continue
  159. # end if
  160. m = self.kwprog.match(line)
  161. if m:
  162. kw = m.group('kw')
  163. if kw in start:
  164. self.putline(line, len(stack))
  165. stack.append((kw, kw))
  166. continue
  167. # end if
  168. if kw in next and stack:
  169. self.putline(line, len(stack)-1)
  170. kwa, kwb = stack[-1]
  171. stack[-1] = kwa, kw
  172. continue
  173. # end if
  174. # end if
  175. self.putline(line, len(stack))
  176. # end while
  177. if stack:
  178. self.error('unterminated keywords')
  179. for kwa, kwb in stack:
  180. self.write('\t%s\n' % kwa)
  181. # end for
  182. # end if
  183. # end def reformat
  184. def delete(self):
  185. begin_counter = 0
  186. end_counter = 0
  187. while True:
  188. line = self.getline()
  189. if not line: break # EOF
  190. # end if
  191. m = self.endprog.match(line)
  192. if m:
  193. end_counter += 1
  194. continue
  195. # end if
  196. m = self.kwprog.match(line)
  197. if m:
  198. kw = m.group('kw')
  199. if kw in start:
  200. begin_counter += 1
  201. # end if
  202. # end if
  203. self.write(line)
  204. # end while
  205. if begin_counter - end_counter < 0:
  206. sys.stderr.write('Warning: input contained more end tags than expected\n')
  207. elif begin_counter - end_counter > 0:
  208. sys.stderr.write('Warning: input contained less end tags than expected\n')
  209. # end if
  210. # end def delete
  211. def complete(self):
  212. stack = []
  213. todo = []
  214. currentws = thisid = firstkw = lastkw = topid = ''
  215. while True:
  216. line = self.getline()
  217. i = self.wsprog.match(line).end()
  218. m = self.endprog.match(line)
  219. if m:
  220. thiskw = 'end'
  221. endkw = m.group('kw')
  222. thisid = m.group('id')
  223. else:
  224. m = self.kwprog.match(line)
  225. if m:
  226. thiskw = m.group('kw')
  227. if thiskw not in next:
  228. thiskw = ''
  229. # end if
  230. if thiskw in ('def', 'class'):
  231. thisid = m.group('id')
  232. else:
  233. thisid = ''
  234. # end if
  235. elif line[i:i+1] in ('\n', '#'):
  236. todo.append(line)
  237. continue
  238. else:
  239. thiskw = ''
  240. # end if
  241. # end if
  242. indentws = line[:i]
  243. indent = len(indentws.expandtabs(self.tabsize))
  244. current = len(currentws.expandtabs(self.tabsize))
  245. while indent < current:
  246. if firstkw:
  247. if topid:
  248. s = '# end %s %s\n' % (
  249. firstkw, topid)
  250. else:
  251. s = '# end %s\n' % firstkw
  252. # end if
  253. self.write(currentws + s)
  254. firstkw = lastkw = ''
  255. # end if
  256. currentws, firstkw, lastkw, topid = stack.pop()
  257. current = len(currentws.expandtabs(self.tabsize))
  258. # end while
  259. if indent == current and firstkw:
  260. if thiskw == 'end':
  261. if endkw != firstkw:
  262. self.error('mismatched end')
  263. # end if
  264. firstkw = lastkw = ''
  265. elif not thiskw or thiskw in start:
  266. if topid:
  267. s = '# end %s %s\n' % (
  268. firstkw, topid)
  269. else:
  270. s = '# end %s\n' % firstkw
  271. # end if
  272. self.write(currentws + s)
  273. firstkw = lastkw = topid = ''
  274. # end if
  275. # end if
  276. if indent > current:
  277. stack.append((currentws, firstkw, lastkw, topid))
  278. if thiskw and thiskw not in start:
  279. # error
  280. thiskw = ''
  281. # end if
  282. currentws, firstkw, lastkw, topid = \
  283. indentws, thiskw, thiskw, thisid
  284. # end if
  285. if thiskw:
  286. if thiskw in start:
  287. firstkw = lastkw = thiskw
  288. topid = thisid
  289. else:
  290. lastkw = thiskw
  291. # end if
  292. # end if
  293. for l in todo: self.write(l)
  294. # end for
  295. todo = []
  296. if not line: break
  297. # end if
  298. self.write(line)
  299. # end while
  300. # end def complete
  301. # end class PythonIndenter
  302. # Simplified user interface
  303. # - xxx_filter(input, output): read and write file objects
  304. # - xxx_string(s): take and return string object
  305. # - xxx_file(filename): process file in place, return true iff changed
  306. def complete_filter(input = sys.stdin, output = sys.stdout,
  307. stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  308. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  309. pi.complete()
  310. # end def complete_filter
  311. def delete_filter(input= sys.stdin, output = sys.stdout,
  312. stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  313. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  314. pi.delete()
  315. # end def delete_filter
  316. def reformat_filter(input = sys.stdin, output = sys.stdout,
  317. stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  318. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  319. pi.reformat()
  320. # end def reformat_filter
  321. def complete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  322. input = io.StringIO(source)
  323. output = io.StringIO()
  324. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  325. pi.complete()
  326. return output.getvalue()
  327. # end def complete_string
  328. def delete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  329. input = io.StringIO(source)
  330. output = io.StringIO()
  331. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  332. pi.delete()
  333. return output.getvalue()
  334. # end def delete_string
  335. def reformat_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  336. input = io.StringIO(source)
  337. output = io.StringIO()
  338. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  339. pi.reformat()
  340. return output.getvalue()
  341. # end def reformat_string
  342. def make_backup(filename):
  343. import os, os.path
  344. backup = filename + '~'
  345. if os.path.lexists(backup):
  346. try:
  347. os.remove(backup)
  348. except OSError:
  349. print("Can't remove backup %r" % (backup,), file=sys.stderr)
  350. # end try
  351. # end if
  352. try:
  353. os.rename(filename, backup)
  354. except OSError:
  355. print("Can't rename %r to %r" % (filename, backup), file=sys.stderr)
  356. # end try
  357. # end def make_backup
  358. def complete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  359. with open(filename, 'r') as f:
  360. source = f.read()
  361. # end with
  362. result = complete_string(source, stepsize, tabsize, expandtabs)
  363. if source == result: return 0
  364. # end if
  365. make_backup(filename)
  366. with open(filename, 'w') as f:
  367. f.write(result)
  368. # end with
  369. return 1
  370. # end def complete_file
  371. def delete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  372. with open(filename, 'r') as f:
  373. source = f.read()
  374. # end with
  375. result = delete_string(source, stepsize, tabsize, expandtabs)
  376. if source == result: return 0
  377. # end if
  378. make_backup(filename)
  379. with open(filename, 'w') as f:
  380. f.write(result)
  381. # end with
  382. return 1
  383. # end def delete_file
  384. def reformat_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  385. with open(filename, 'r') as f:
  386. source = f.read()
  387. # end with
  388. result = reformat_string(source, stepsize, tabsize, expandtabs)
  389. if source == result: return 0
  390. # end if
  391. make_backup(filename)
  392. with open(filename, 'w') as f:
  393. f.write(result)
  394. # end with
  395. return 1
  396. # end def reformat_file
  397. # Test program when called as a script
  398. usage = """
  399. usage: pindent (-c|-d|-r) [-s stepsize] [-t tabsize] [-e] [file] ...
  400. -c : complete a correctly indented program (add #end directives)
  401. -d : delete #end directives
  402. -r : reformat a completed program (use #end directives)
  403. -s stepsize: indentation step (default %(STEPSIZE)d)
  404. -t tabsize : the worth in spaces of a tab (default %(TABSIZE)d)
  405. -e : expand TABs into spaces (default OFF)
  406. [file] ... : files are changed in place, with backups in file~
  407. If no files are specified or a single - is given,
  408. the program acts as a filter (reads stdin, writes stdout).
  409. """ % vars()
  410. def error_both(op1, op2):
  411. sys.stderr.write('Error: You can not specify both '+op1+' and -'+op2[0]+' at the same time\n')
  412. sys.stderr.write(usage)
  413. sys.exit(2)
  414. # end def error_both
  415. def test():
  416. import getopt
  417. try:
  418. opts, args = getopt.getopt(sys.argv[1:], 'cdrs:t:e')
  419. except getopt.error as msg:
  420. sys.stderr.write('Error: %s\n' % msg)
  421. sys.stderr.write(usage)
  422. sys.exit(2)
  423. # end try
  424. action = None
  425. stepsize = STEPSIZE
  426. tabsize = TABSIZE
  427. expandtabs = EXPANDTABS
  428. for o, a in opts:
  429. if o == '-c':
  430. if action: error_both(o, action)
  431. # end if
  432. action = 'complete'
  433. elif o == '-d':
  434. if action: error_both(o, action)
  435. # end if
  436. action = 'delete'
  437. elif o == '-r':
  438. if action: error_both(o, action)
  439. # end if
  440. action = 'reformat'
  441. elif o == '-s':
  442. stepsize = int(a)
  443. elif o == '-t':
  444. tabsize = int(a)
  445. elif o == '-e':
  446. expandtabs = True
  447. # end if
  448. # end for
  449. if not action:
  450. sys.stderr.write(
  451. 'You must specify -c(omplete), -d(elete) or -r(eformat)\n')
  452. sys.stderr.write(usage)
  453. sys.exit(2)
  454. # end if
  455. if not args or args == ['-']:
  456. action = eval(action + '_filter')
  457. action(sys.stdin, sys.stdout, stepsize, tabsize, expandtabs)
  458. else:
  459. action = eval(action + '_file')
  460. for filename in args:
  461. action(filename, stepsize, tabsize, expandtabs)
  462. # end for
  463. # end if
  464. # end def test
  465. if __name__ == '__main__':
  466. test()
  467. # end if