You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

542 lines
18 KiB

32 years ago
32 years ago
32 years ago
32 years ago
32 years ago
32 years ago
32 years ago
32 years ago
32 years ago
32 years ago
32 years ago
32 years ago
32 years ago
32 years ago
  1. #! /usr/bin/env python3
  2. # This file contains a class and a main program that perform three
  3. # related (though complimentary) formatting operations on Python
  4. # programs. When called as "pindent -c", it takes a valid Python
  5. # program as input and outputs a version augmented with block-closing
  6. # comments. When called as "pindent -d", it assumes its input is a
  7. # Python program with block-closing comments and outputs a commentless
  8. # version. When called as "pindent -r" it assumes its input is a
  9. # Python program with block-closing comments but with its indentation
  10. # messed up, and outputs a properly indented version.
  11. # A "block-closing comment" is a comment of the form '# end <keyword>'
  12. # where <keyword> is the keyword that opened the block. If the
  13. # opening keyword is 'def' or 'class', the function or class name may
  14. # be repeated in the block-closing comment as well. Here is an
  15. # example of a program fully augmented with block-closing comments:
  16. # def foobar(a, b):
  17. # if a == b:
  18. # a = a+1
  19. # elif a < b:
  20. # b = b-1
  21. # if b > a: a = a-1
  22. # # end if
  23. # else:
  24. # print 'oops!'
  25. # # end if
  26. # # end def foobar
  27. # Note that only the last part of an if...elif...else... block needs a
  28. # block-closing comment; the same is true for other compound
  29. # statements (e.g. try...except). Also note that "short-form" blocks
  30. # like the second 'if' in the example must be closed as well;
  31. # otherwise the 'else' in the example would be ambiguous (remember
  32. # that indentation is not significant when interpreting block-closing
  33. # comments).
  34. # The operations are idempotent (i.e. applied to their own output
  35. # they yield an identical result). Running first "pindent -c" and
  36. # then "pindent -r" on a valid Python program produces a program that
  37. # is semantically identical to the input (though its indentation may
  38. # be different). Running "pindent -e" on that output produces a
  39. # program that only differs from the original in indentation.
  40. # Other options:
  41. # -s stepsize: set the indentation step size (default 8)
  42. # -t tabsize : set the number of spaces a tab character is worth (default 8)
  43. # -e : expand TABs into spaces
  44. # file ... : input file(s) (default standard input)
  45. # The results always go to standard output
  46. # Caveats:
  47. # - comments ending in a backslash will be mistaken for continued lines
  48. # - continuations using backslash are always left unchanged
  49. # - continuations inside parentheses are not extra indented by -r
  50. # but must be indented for -c to work correctly (this breaks
  51. # idempotency!)
  52. # - continued lines inside triple-quoted strings are totally garbled
  53. # Secret feature:
  54. # - On input, a block may also be closed with an "end statement" --
  55. # this is a block-closing comment without the '#' sign.
  56. # Possible improvements:
  57. # - check syntax based on transitions in 'next' table
  58. # - better error reporting
  59. # - better error recovery
  60. # - check identifier after class/def
  61. # The following wishes need a more complete tokenization of the source:
  62. # - Don't get fooled by comments ending in backslash
  63. # - reindent continuation lines indicated by backslash
  64. # - handle continuation lines inside parentheses/braces/brackets
  65. # - handle triple quoted strings spanning lines
  66. # - realign comments
  67. # - optionally do much more thorough reformatting, a la C indent
  68. # Defaults
  69. STEPSIZE = 8
  70. TABSIZE = 8
  71. EXPANDTABS = 0
  72. import re
  73. import sys
  74. next = {}
  75. next['if'] = next['elif'] = 'elif', 'else', 'end'
  76. next['while'] = next['for'] = 'else', 'end'
  77. next['try'] = 'except', 'finally'
  78. next['except'] = 'except', 'else', 'finally', 'end'
  79. next['else'] = next['finally'] = next['def'] = next['class'] = 'end'
  80. next['end'] = ()
  81. start = 'if', 'while', 'for', 'try', 'with', 'def', 'class'
  82. class PythonIndenter:
  83. def __init__(self, fpi = sys.stdin, fpo = sys.stdout,
  84. indentsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  85. self.fpi = fpi
  86. self.fpo = fpo
  87. self.indentsize = indentsize
  88. self.tabsize = tabsize
  89. self.lineno = 0
  90. self.expandtabs = expandtabs
  91. self._write = fpo.write
  92. self.kwprog = re.compile(
  93. r'^\s*(?P<kw>[a-z]+)'
  94. r'(\s+(?P<id>[a-zA-Z_]\w*))?'
  95. r'[^\w]')
  96. self.endprog = re.compile(
  97. r'^\s*#?\s*end\s+(?P<kw>[a-z]+)'
  98. r'(\s+(?P<id>[a-zA-Z_]\w*))?'
  99. r'[^\w]')
  100. self.wsprog = re.compile(r'^[ \t]*')
  101. # end def __init__
  102. def write(self, line):
  103. if self.expandtabs:
  104. self._write(line.expandtabs(self.tabsize))
  105. else:
  106. self._write(line)
  107. # end if
  108. # end def write
  109. def readline(self):
  110. line = self.fpi.readline()
  111. if line: self.lineno = self.lineno + 1
  112. # end if
  113. return line
  114. # end def readline
  115. def error(self, fmt, *args):
  116. if args: fmt = fmt % args
  117. # end if
  118. sys.stderr.write('Error at line %d: %s\n' % (self.lineno, fmt))
  119. self.write('### %s ###\n' % fmt)
  120. # end def error
  121. def getline(self):
  122. line = self.readline()
  123. while line[-2:] == '\\\n':
  124. line2 = self.readline()
  125. if not line2: break
  126. # end if
  127. line = line + line2
  128. # end while
  129. return line
  130. # end def getline
  131. def putline(self, line, indent = None):
  132. if indent is None:
  133. self.write(line)
  134. return
  135. # end if
  136. tabs, spaces = divmod(indent*self.indentsize, self.tabsize)
  137. i = 0
  138. m = self.wsprog.match(line)
  139. if m: i = m.end()
  140. # end if
  141. self.write('\t'*tabs + ' '*spaces + line[i:])
  142. # end def putline
  143. def reformat(self):
  144. stack = []
  145. while 1:
  146. line = self.getline()
  147. if not line: break # EOF
  148. # end if
  149. m = self.endprog.match(line)
  150. if m:
  151. kw = 'end'
  152. kw2 = m.group('kw')
  153. if not stack:
  154. self.error('unexpected end')
  155. elif stack[-1][0] != kw2:
  156. self.error('unmatched end')
  157. # end if
  158. del stack[-1:]
  159. self.putline(line, len(stack))
  160. continue
  161. # end if
  162. m = self.kwprog.match(line)
  163. if m:
  164. kw = m.group('kw')
  165. if kw in start:
  166. self.putline(line, len(stack))
  167. stack.append((kw, kw))
  168. continue
  169. # end if
  170. if kw in next and stack:
  171. self.putline(line, len(stack)-1)
  172. kwa, kwb = stack[-1]
  173. stack[-1] = kwa, kw
  174. continue
  175. # end if
  176. # end if
  177. self.putline(line, len(stack))
  178. # end while
  179. if stack:
  180. self.error('unterminated keywords')
  181. for kwa, kwb in stack:
  182. self.write('\t%s\n' % kwa)
  183. # end for
  184. # end if
  185. # end def reformat
  186. def delete(self):
  187. begin_counter = 0
  188. end_counter = 0
  189. while 1:
  190. line = self.getline()
  191. if not line: break # EOF
  192. # end if
  193. m = self.endprog.match(line)
  194. if m:
  195. end_counter = end_counter + 1
  196. continue
  197. # end if
  198. m = self.kwprog.match(line)
  199. if m:
  200. kw = m.group('kw')
  201. if kw in start:
  202. begin_counter = begin_counter + 1
  203. # end if
  204. # end if
  205. self.putline(line)
  206. # end while
  207. if begin_counter - end_counter < 0:
  208. sys.stderr.write('Warning: input contained more end tags than expected\n')
  209. elif begin_counter - end_counter > 0:
  210. sys.stderr.write('Warning: input contained less end tags than expected\n')
  211. # end if
  212. # end def delete
  213. def complete(self):
  214. self.indentsize = 1
  215. stack = []
  216. todo = []
  217. thisid = ''
  218. current, firstkw, lastkw, topid = 0, '', '', ''
  219. while 1:
  220. line = self.getline()
  221. i = 0
  222. m = self.wsprog.match(line)
  223. if m: i = m.end()
  224. # end if
  225. m = self.endprog.match(line)
  226. if m:
  227. thiskw = 'end'
  228. endkw = m.group('kw')
  229. thisid = m.group('id')
  230. else:
  231. m = self.kwprog.match(line)
  232. if m:
  233. thiskw = m.group('kw')
  234. if thiskw not in next:
  235. thiskw = ''
  236. # end if
  237. if thiskw in ('def', 'class'):
  238. thisid = m.group('id')
  239. else:
  240. thisid = ''
  241. # end if
  242. elif line[i:i+1] in ('\n', '#'):
  243. todo.append(line)
  244. continue
  245. else:
  246. thiskw = ''
  247. # end if
  248. # end if
  249. indent = len(line[:i].expandtabs(self.tabsize))
  250. while indent < current:
  251. if firstkw:
  252. if topid:
  253. s = '# end %s %s\n' % (
  254. firstkw, topid)
  255. else:
  256. s = '# end %s\n' % firstkw
  257. # end if
  258. self.putline(s, current)
  259. firstkw = lastkw = ''
  260. # end if
  261. current, firstkw, lastkw, topid = stack[-1]
  262. del stack[-1]
  263. # end while
  264. if indent == current and firstkw:
  265. if thiskw == 'end':
  266. if endkw != firstkw:
  267. self.error('mismatched end')
  268. # end if
  269. firstkw = lastkw = ''
  270. elif not thiskw or thiskw in start:
  271. if topid:
  272. s = '# end %s %s\n' % (
  273. firstkw, topid)
  274. else:
  275. s = '# end %s\n' % firstkw
  276. # end if
  277. self.putline(s, current)
  278. firstkw = lastkw = topid = ''
  279. # end if
  280. # end if
  281. if indent > current:
  282. stack.append((current, firstkw, lastkw, topid))
  283. if thiskw and thiskw not in start:
  284. # error
  285. thiskw = ''
  286. # end if
  287. current, firstkw, lastkw, topid = \
  288. indent, thiskw, thiskw, thisid
  289. # end if
  290. if thiskw:
  291. if thiskw in start:
  292. firstkw = lastkw = thiskw
  293. topid = thisid
  294. else:
  295. lastkw = thiskw
  296. # end if
  297. # end if
  298. for l in todo: self.write(l)
  299. # end for
  300. todo = []
  301. if not line: break
  302. # end if
  303. self.write(line)
  304. # end while
  305. # end def complete
  306. # end class PythonIndenter
  307. # Simplified user interface
  308. # - xxx_filter(input, output): read and write file objects
  309. # - xxx_string(s): take and return string object
  310. # - xxx_file(filename): process file in place, return true iff changed
  311. def complete_filter(input = sys.stdin, output = sys.stdout,
  312. stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  313. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  314. pi.complete()
  315. # end def complete_filter
  316. def delete_filter(input= sys.stdin, output = sys.stdout,
  317. stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  318. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  319. pi.delete()
  320. # end def delete_filter
  321. def reformat_filter(input = sys.stdin, output = sys.stdout,
  322. stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  323. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  324. pi.reformat()
  325. # end def reformat_filter
  326. class StringReader:
  327. def __init__(self, buf):
  328. self.buf = buf
  329. self.pos = 0
  330. self.len = len(self.buf)
  331. # end def __init__
  332. def read(self, n = 0):
  333. if n <= 0:
  334. n = self.len - self.pos
  335. else:
  336. n = min(n, self.len - self.pos)
  337. # end if
  338. r = self.buf[self.pos : self.pos + n]
  339. self.pos = self.pos + n
  340. return r
  341. # end def read
  342. def readline(self):
  343. i = self.buf.find('\n', self.pos)
  344. return self.read(i + 1 - self.pos)
  345. # end def readline
  346. def readlines(self):
  347. lines = []
  348. line = self.readline()
  349. while line:
  350. lines.append(line)
  351. line = self.readline()
  352. # end while
  353. return lines
  354. # end def readlines
  355. # seek/tell etc. are left as an exercise for the reader
  356. # end class StringReader
  357. class StringWriter:
  358. def __init__(self):
  359. self.buf = ''
  360. # end def __init__
  361. def write(self, s):
  362. self.buf = self.buf + s
  363. # end def write
  364. def getvalue(self):
  365. return self.buf
  366. # end def getvalue
  367. # end class StringWriter
  368. def complete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  369. input = StringReader(source)
  370. output = StringWriter()
  371. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  372. pi.complete()
  373. return output.getvalue()
  374. # end def complete_string
  375. def delete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  376. input = StringReader(source)
  377. output = StringWriter()
  378. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  379. pi.delete()
  380. return output.getvalue()
  381. # end def delete_string
  382. def reformat_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  383. input = StringReader(source)
  384. output = StringWriter()
  385. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  386. pi.reformat()
  387. return output.getvalue()
  388. # end def reformat_string
  389. def complete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  390. source = open(filename, 'r').read()
  391. result = complete_string(source, stepsize, tabsize, expandtabs)
  392. if source == result: return 0
  393. # end if
  394. import os
  395. try: os.rename(filename, filename + '~')
  396. except os.error: pass
  397. # end try
  398. f = open(filename, 'w')
  399. f.write(result)
  400. f.close()
  401. return 1
  402. # end def complete_file
  403. def delete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  404. source = open(filename, 'r').read()
  405. result = delete_string(source, stepsize, tabsize, expandtabs)
  406. if source == result: return 0
  407. # end if
  408. import os
  409. try: os.rename(filename, filename + '~')
  410. except os.error: pass
  411. # end try
  412. f = open(filename, 'w')
  413. f.write(result)
  414. f.close()
  415. return 1
  416. # end def delete_file
  417. def reformat_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  418. source = open(filename, 'r').read()
  419. result = reformat_string(source, stepsize, tabsize, expandtabs)
  420. if source == result: return 0
  421. # end if
  422. import os
  423. try: os.rename(filename, filename + '~')
  424. except os.error: pass
  425. # end try
  426. f = open(filename, 'w')
  427. f.write(result)
  428. f.close()
  429. return 1
  430. # end def reformat_file
  431. # Test program when called as a script
  432. usage = """
  433. usage: pindent (-c|-d|-r) [-s stepsize] [-t tabsize] [-e] [file] ...
  434. -c : complete a correctly indented program (add #end directives)
  435. -d : delete #end directives
  436. -r : reformat a completed program (use #end directives)
  437. -s stepsize: indentation step (default %(STEPSIZE)d)
  438. -t tabsize : the worth in spaces of a tab (default %(TABSIZE)d)
  439. -e : expand TABs into spaces (defailt OFF)
  440. [file] ... : files are changed in place, with backups in file~
  441. If no files are specified or a single - is given,
  442. the program acts as a filter (reads stdin, writes stdout).
  443. """ % vars()
  444. def error_both(op1, op2):
  445. sys.stderr.write('Error: You can not specify both '+op1+' and -'+op2[0]+' at the same time\n')
  446. sys.stderr.write(usage)
  447. sys.exit(2)
  448. # end def error_both
  449. def test():
  450. import getopt
  451. try:
  452. opts, args = getopt.getopt(sys.argv[1:], 'cdrs:t:e')
  453. except getopt.error as msg:
  454. sys.stderr.write('Error: %s\n' % msg)
  455. sys.stderr.write(usage)
  456. sys.exit(2)
  457. # end try
  458. action = None
  459. stepsize = STEPSIZE
  460. tabsize = TABSIZE
  461. expandtabs = EXPANDTABS
  462. for o, a in opts:
  463. if o == '-c':
  464. if action: error_both(o, action)
  465. # end if
  466. action = 'complete'
  467. elif o == '-d':
  468. if action: error_both(o, action)
  469. # end if
  470. action = 'delete'
  471. elif o == '-r':
  472. if action: error_both(o, action)
  473. # end if
  474. action = 'reformat'
  475. elif o == '-s':
  476. stepsize = int(a)
  477. elif o == '-t':
  478. tabsize = int(a)
  479. elif o == '-e':
  480. expandtabs = 1
  481. # end if
  482. # end for
  483. if not action:
  484. sys.stderr.write(
  485. 'You must specify -c(omplete), -d(elete) or -r(eformat)\n')
  486. sys.stderr.write(usage)
  487. sys.exit(2)
  488. # end if
  489. if not args or args == ['-']:
  490. action = eval(action + '_filter')
  491. action(sys.stdin, sys.stdout, stepsize, tabsize, expandtabs)
  492. else:
  493. action = eval(action + '_file')
  494. for filename in args:
  495. action(filename, stepsize, tabsize, expandtabs)
  496. # end for
  497. # end if
  498. # end def test
  499. if __name__ == '__main__':
  500. test()
  501. # end if