formatter.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. """Generic output formatting.
  2. Formatter objects transform an abstract flow of formatting events into
  3. specific output events on writer objects. Formatters manage several stack
  4. structures to allow various properties of a writer object to be changed and
  5. restored; writers need not be able to handle relative changes nor any sort
  6. of ``change back'' operation. Specific writer properties which may be
  7. controlled via formatter objects are horizontal alignment, font, and left
  8. margin indentations. A mechanism is provided which supports providing
  9. arbitrary, non-exclusive style settings to a writer as well. Additional
  10. interfaces facilitate formatting events which are not reversible, such as
  11. paragraph separation.
  12. Writer objects encapsulate device interfaces. Abstract devices, such as
  13. file formats, are supported as well as physical devices. The provided
  14. implementations all work with abstract devices. The interface makes
  15. available mechanisms for setting the properties which formatter objects
  16. manage and inserting data into the output.
  17. """
  18. import sys
  19. AS_IS = None
  20. class NullFormatter:
  21. """A formatter which does nothing.
  22. If the writer parameter is omitted, a NullWriter instance is created.
  23. No methods of the writer are called by NullFormatter instances.
  24. Implementations should inherit from this class if implementing a writer
  25. interface but don't need to inherit any implementation.
  26. """
  27. def __init__(self, writer=None):
  28. if writer is None:
  29. writer = NullWriter()
  30. self.writer = writer
  31. def end_paragraph(self, blankline): pass
  32. def add_line_break(self): pass
  33. def add_hor_rule(self, *args, **kw): pass
  34. def add_label_data(self, format, counter, blankline=None): pass
  35. def add_flowing_data(self, data): pass
  36. def add_literal_data(self, data): pass
  37. def flush_softspace(self): pass
  38. def push_alignment(self, align): pass
  39. def pop_alignment(self): pass
  40. def push_font(self, x): pass
  41. def pop_font(self): pass
  42. def push_margin(self, margin): pass
  43. def pop_margin(self): pass
  44. def set_spacing(self, spacing): pass
  45. def push_style(self, *styles): pass
  46. def pop_style(self, n=1): pass
  47. def assert_line_data(self, flag=1): pass
  48. class AbstractFormatter:
  49. """The standard formatter.
  50. This implementation has demonstrated wide applicability to many writers,
  51. and may be used directly in most circumstances. It has been used to
  52. implement a full-featured World Wide Web browser.
  53. """
  54. # Space handling policy: blank spaces at the boundary between elements
  55. # are handled by the outermost context. "Literal" data is not checked
  56. # to determine context, so spaces in literal data are handled directly
  57. # in all circumstances.
  58. def __init__(self, writer):
  59. self.writer = writer # Output device
  60. self.align = None # Current alignment
  61. self.align_stack = [] # Alignment stack
  62. self.font_stack = [] # Font state
  63. self.margin_stack = [] # Margin state
  64. self.spacing = None # Vertical spacing state
  65. self.style_stack = [] # Other state, e.g. color
  66. self.nospace = 1 # Should leading space be suppressed
  67. self.softspace = 0 # Should a space be inserted
  68. self.para_end = 1 # Just ended a paragraph
  69. self.parskip = 0 # Skipped space between paragraphs?
  70. self.hard_break = 1 # Have a hard break
  71. self.have_label = 0
  72. def end_paragraph(self, blankline):
  73. if not self.hard_break:
  74. self.writer.send_line_break()
  75. self.have_label = 0
  76. if self.parskip < blankline and not self.have_label:
  77. self.writer.send_paragraph(blankline - self.parskip)
  78. self.parskip = blankline
  79. self.have_label = 0
  80. self.hard_break = self.nospace = self.para_end = 1
  81. self.softspace = 0
  82. def add_line_break(self):
  83. if not (self.hard_break or self.para_end):
  84. self.writer.send_line_break()
  85. self.have_label = self.parskip = 0
  86. self.hard_break = self.nospace = 1
  87. self.softspace = 0
  88. def add_hor_rule(self, *args, **kw):
  89. if not self.hard_break:
  90. self.writer.send_line_break()
  91. self.writer.send_hor_rule(*args, **kw)
  92. self.hard_break = self.nospace = 1
  93. self.have_label = self.para_end = self.softspace = self.parskip = 0
  94. def add_label_data(self, format, counter, blankline = None):
  95. if self.have_label or not self.hard_break:
  96. self.writer.send_line_break()
  97. if not self.para_end:
  98. self.writer.send_paragraph((blankline and 1) or 0)
  99. if isinstance(format, str):
  100. self.writer.send_label_data(self.format_counter(format, counter))
  101. else:
  102. self.writer.send_label_data(format)
  103. self.nospace = self.have_label = self.hard_break = self.para_end = 1
  104. self.softspace = self.parskip = 0
  105. def format_counter(self, format, counter):
  106. label = ''
  107. for c in format:
  108. if c == '1':
  109. label = label + ('%d' % counter)
  110. elif c in 'aA':
  111. if counter > 0:
  112. label = label + self.format_letter(c, counter)
  113. elif c in 'iI':
  114. if counter > 0:
  115. label = label + self.format_roman(c, counter)
  116. else:
  117. label = label + c
  118. return label
  119. def format_letter(self, case, counter):
  120. label = ''
  121. while counter > 0:
  122. counter, x = divmod(counter-1, 26)
  123. # This makes a strong assumption that lowercase letters
  124. # and uppercase letters form two contiguous blocks, with
  125. # letters in order!
  126. s = chr(ord(case) + x)
  127. label = s + label
  128. return label
  129. def format_roman(self, case, counter):
  130. ones = ['i', 'x', 'c', 'm']
  131. fives = ['v', 'l', 'd']
  132. label, index = '', 0
  133. # This will die of IndexError when counter is too big
  134. while counter > 0:
  135. counter, x = divmod(counter, 10)
  136. if x == 9:
  137. label = ones[index] + ones[index+1] + label
  138. elif x == 4:
  139. label = ones[index] + fives[index] + label
  140. else:
  141. if x >= 5:
  142. s = fives[index]
  143. x = x-5
  144. else:
  145. s = ''
  146. s = s + ones[index]*x
  147. label = s + label
  148. index = index + 1
  149. if case == 'I':
  150. return label.upper()
  151. return label
  152. def add_flowing_data(self, data):
  153. if not data: return
  154. # The following looks a bit convoluted but is a great improvement over
  155. # data = regsub.gsub('[' + string.whitespace + ']+', ' ', data)
  156. prespace = data[:1].isspace()
  157. postspace = data[-1:].isspace()
  158. data = " ".join(data.split())
  159. if self.nospace and not data:
  160. return
  161. elif prespace or self.softspace:
  162. if not data:
  163. if not self.nospace:
  164. self.softspace = 1
  165. self.parskip = 0
  166. return
  167. if not self.nospace:
  168. data = ' ' + data
  169. self.hard_break = self.nospace = self.para_end = \
  170. self.parskip = self.have_label = 0
  171. self.softspace = postspace
  172. self.writer.send_flowing_data(data)
  173. def add_literal_data(self, data):
  174. if not data: return
  175. if self.softspace:
  176. self.writer.send_flowing_data(" ")
  177. self.hard_break = data[-1:] == '\n'
  178. self.nospace = self.para_end = self.softspace = \
  179. self.parskip = self.have_label = 0
  180. self.writer.send_literal_data(data)
  181. def flush_softspace(self):
  182. if self.softspace:
  183. self.hard_break = self.para_end = self.parskip = \
  184. self.have_label = self.softspace = 0
  185. self.nospace = 1
  186. self.writer.send_flowing_data(' ')
  187. def push_alignment(self, align):
  188. if align and align != self.align:
  189. self.writer.new_alignment(align)
  190. self.align = align
  191. self.align_stack.append(align)
  192. else:
  193. self.align_stack.append(self.align)
  194. def pop_alignment(self):
  195. if self.align_stack:
  196. del self.align_stack[-1]
  197. if self.align_stack:
  198. self.align = align = self.align_stack[-1]
  199. self.writer.new_alignment(align)
  200. else:
  201. self.align = None
  202. self.writer.new_alignment(None)
  203. def push_font(self, (size, i, b, tt)):
  204. if self.softspace:
  205. self.hard_break = self.para_end = self.softspace = 0
  206. self.nospace = 1
  207. self.writer.send_flowing_data(' ')
  208. if self.font_stack:
  209. csize, ci, cb, ctt = self.font_stack[-1]
  210. if size is AS_IS: size = csize
  211. if i is AS_IS: i = ci
  212. if b is AS_IS: b = cb
  213. if tt is AS_IS: tt = ctt
  214. font = (size, i, b, tt)
  215. self.font_stack.append(font)
  216. self.writer.new_font(font)
  217. def pop_font(self):
  218. if self.font_stack:
  219. del self.font_stack[-1]
  220. if self.font_stack:
  221. font = self.font_stack[-1]
  222. else:
  223. font = None
  224. self.writer.new_font(font)
  225. def push_margin(self, margin):
  226. self.margin_stack.append(margin)
  227. fstack = filter(None, self.margin_stack)
  228. if not margin and fstack:
  229. margin = fstack[-1]
  230. self.writer.new_margin(margin, len(fstack))
  231. def pop_margin(self):
  232. if self.margin_stack:
  233. del self.margin_stack[-1]
  234. fstack = filter(None, self.margin_stack)
  235. if fstack:
  236. margin = fstack[-1]
  237. else:
  238. margin = None
  239. self.writer.new_margin(margin, len(fstack))
  240. def set_spacing(self, spacing):
  241. self.spacing = spacing
  242. self.writer.new_spacing(spacing)
  243. def push_style(self, *styles):
  244. if self.softspace:
  245. self.hard_break = self.para_end = self.softspace = 0
  246. self.nospace = 1
  247. self.writer.send_flowing_data(' ')
  248. for style in styles:
  249. self.style_stack.append(style)
  250. self.writer.new_styles(tuple(self.style_stack))
  251. def pop_style(self, n=1):
  252. del self.style_stack[-n:]
  253. self.writer.new_styles(tuple(self.style_stack))
  254. def assert_line_data(self, flag=1):
  255. self.nospace = self.hard_break = not flag
  256. self.para_end = self.parskip = self.have_label = 0
  257. class NullWriter:
  258. """Minimal writer interface to use in testing & inheritance.
  259. A writer which only provides the interface definition; no actions are
  260. taken on any methods. This should be the base class for all writers
  261. which do not need to inherit any implementation methods.
  262. """
  263. def __init__(self): pass
  264. def flush(self): pass
  265. def new_alignment(self, align): pass
  266. def new_font(self, font): pass
  267. def new_margin(self, margin, level): pass
  268. def new_spacing(self, spacing): pass
  269. def new_styles(self, styles): pass
  270. def send_paragraph(self, blankline): pass
  271. def send_line_break(self): pass
  272. def send_hor_rule(self, *args, **kw): pass
  273. def send_label_data(self, data): pass
  274. def send_flowing_data(self, data): pass
  275. def send_literal_data(self, data): pass
  276. class AbstractWriter(NullWriter):
  277. """A writer which can be used in debugging formatters, but not much else.
  278. Each method simply announces itself by printing its name and
  279. arguments on standard output.
  280. """
  281. def new_alignment(self, align):
  282. print "new_alignment(%r)" % (align,)
  283. def new_font(self, font):
  284. print "new_font(%r)" % (font,)
  285. def new_margin(self, margin, level):
  286. print "new_margin(%r, %d)" % (margin, level)
  287. def new_spacing(self, spacing):
  288. print "new_spacing(%r)" % (spacing,)
  289. def new_styles(self, styles):
  290. print "new_styles(%r)" % (styles,)
  291. def send_paragraph(self, blankline):
  292. print "send_paragraph(%r)" % (blankline,)
  293. def send_line_break(self):
  294. print "send_line_break()"
  295. def send_hor_rule(self, *args, **kw):
  296. print "send_hor_rule()"
  297. def send_label_data(self, data):
  298. print "send_label_data(%r)" % (data,)
  299. def send_flowing_data(self, data):
  300. print "send_flowing_data(%r)" % (data,)
  301. def send_literal_data(self, data):
  302. print "send_literal_data(%r)" % (data,)
  303. class DumbWriter(NullWriter):
  304. """Simple writer class which writes output on the file object passed in
  305. as the file parameter or, if file is omitted, on standard output. The
  306. output is simply word-wrapped to the number of columns specified by
  307. the maxcol parameter. This class is suitable for reflowing a sequence
  308. of paragraphs.
  309. """
  310. def __init__(self, file=None, maxcol=72):
  311. self.file = file or sys.stdout
  312. self.maxcol = maxcol
  313. NullWriter.__init__(self)
  314. self.reset()
  315. def reset(self):
  316. self.col = 0
  317. self.atbreak = 0
  318. def send_paragraph(self, blankline):
  319. self.file.write('\n'*blankline)
  320. self.col = 0
  321. self.atbreak = 0
  322. def send_line_break(self):
  323. self.file.write('\n')
  324. self.col = 0
  325. self.atbreak = 0
  326. def send_hor_rule(self, *args, **kw):
  327. self.file.write('\n')
  328. self.file.write('-'*self.maxcol)
  329. self.file.write('\n')
  330. self.col = 0
  331. self.atbreak = 0
  332. def send_literal_data(self, data):
  333. self.file.write(data)
  334. i = data.rfind('\n')
  335. if i >= 0:
  336. self.col = 0
  337. data = data[i+1:]
  338. data = data.expandtabs()
  339. self.col = self.col + len(data)
  340. self.atbreak = 0
  341. def send_flowing_data(self, data):
  342. if not data: return
  343. atbreak = self.atbreak or data[0].isspace()
  344. col = self.col
  345. maxcol = self.maxcol
  346. write = self.file.write
  347. for word in data.split():
  348. if atbreak:
  349. if col + len(word) >= maxcol:
  350. write('\n')
  351. col = 0
  352. else:
  353. write(' ')
  354. col = col + 1
  355. write(word)
  356. col = col + len(word)
  357. atbreak = 1
  358. self.col = col
  359. self.atbreak = data[-1].isspace()
  360. def test(file = None):
  361. w = DumbWriter()
  362. f = AbstractFormatter(w)
  363. if file is not None:
  364. fp = open(file)
  365. elif sys.argv[1:]:
  366. fp = open(sys.argv[1])
  367. else:
  368. fp = sys.stdin
  369. while 1:
  370. line = fp.readline()
  371. if not line:
  372. break
  373. if line == '\n':
  374. f.end_paragraph(1)
  375. else:
  376. f.add_flowing_data(line)
  377. f.end_paragraph(0)
  378. if __name__ == '__main__':
  379. test()