mirror of
git://repo.or.cz/docutils.git
synced 2025-10-05 16:22:48 +02:00
rST parser: allow for combining characters in grid tables.
Ignore combining characters when extracting a grid table block and when parsing the grid table structure. Allow for combining characters when extracting 2d-block with cell content. Missing part of the fixes in [r7231]. Fixes [bugs:#128] and [bugs:#512]. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@10251 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
This commit is contained in:
@@ -17,7 +17,17 @@
|
||||
Release 0.22.3b1.dev (unpublished)
|
||||
==================================
|
||||
|
||||
.
|
||||
* docutils/parsers/rst/states.py
|
||||
|
||||
- Ignore combining characters when extracting a grid table block
|
||||
|
||||
* docutils/parsers/rst/tableparser.py
|
||||
|
||||
- Ignore combining characters when parsing the grid table structure.
|
||||
|
||||
* docutils/statemachine.py
|
||||
|
||||
- Fix handling of combining characters when extracting 2d-block.
|
||||
|
||||
|
||||
Release 0.22.2 (2025-09-20)
|
||||
|
@@ -266,7 +266,9 @@ Misc
|
||||
Release 0.22.3b1.dev (unpublished)
|
||||
==================================
|
||||
|
||||
.
|
||||
Rst parser:
|
||||
Allow for combining characters in grid tables.
|
||||
Fixes bugs #128 and #512.
|
||||
|
||||
|
||||
Release 0.22.2 (2025-09-20)
|
||||
@@ -278,19 +280,11 @@ Remove a spurious vim .swp-file.
|
||||
Release 0.22.1 (2025-09-17)
|
||||
===========================
|
||||
|
||||
* docutils/parsers/rst/states.py
|
||||
|
||||
Rst parser:
|
||||
- Relax "section title" system messages from SEVERE to ERROR.
|
||||
- Fix behaviour with nested parsing into a detached node
|
||||
(cf. bugs #508 and #509).
|
||||
- New attribute `NestedStateMachine.parent_state_machine`.
|
||||
Use case: update the "current node" of parent state machine(s)
|
||||
after nested parsing.
|
||||
- Better error messages for grid table markup errors (bug #504),
|
||||
based on patch #214 by Jynn Nelson.
|
||||
|
||||
* docutils/writers/latex2e/__init__.py
|
||||
- New attribute `parsers.rst.states.NestedStateMachine.parent_state_machine`.
|
||||
|
||||
LaTeX writer:
|
||||
- Add cross-reference anchors (``\phantomsection\label{...}``)
|
||||
for elements with IDs (fixes bug #503).
|
||||
- Fix cross-reference anchor placement in figures, images,
|
||||
|
@@ -116,7 +116,7 @@ from docutils.nodes import fully_normalize_name as normalize_name
|
||||
from docutils.nodes import unescape, whitespace_normalize_name
|
||||
import docutils.parsers.rst
|
||||
from docutils.parsers.rst import directives, languages, tableparser, roles
|
||||
from docutils.utils import escape2null, column_width
|
||||
from docutils.utils import escape2null, column_width, strip_combining_chars
|
||||
from docutils.utils import punctuation_chars, urischemes
|
||||
from docutils.utils import split_escaped_whitespace
|
||||
from docutils.utils._roman_numerals import (InvalidRomanNumeralError,
|
||||
@@ -1848,7 +1848,8 @@ class Body(RSTState):
|
||||
messages.extend(self.malformed_table(block, detail, i))
|
||||
return [], messages, blank_finish
|
||||
for i in range(len(block)): # check right edge
|
||||
if len(block[i]) != width or block[i][-1] not in '+|':
|
||||
if len(strip_combining_chars(block[i])
|
||||
) != width or block[i][-1] not in '+|':
|
||||
detail = 'Right border not aligned or missing.'
|
||||
messages.extend(self.malformed_table(block, detail, i))
|
||||
return [], messages, blank_finish
|
||||
|
@@ -167,6 +167,9 @@ class GridTableParser(TableParser):
|
||||
We'll end up knowing all the row and column boundaries, cell positions
|
||||
and their dimensions.
|
||||
"""
|
||||
# a copy of the block without combining characters:
|
||||
self.stripped_block = [strip_combining_chars(line)
|
||||
for line in self.block]
|
||||
corners = [(0, 0)]
|
||||
while corners:
|
||||
top, left = corners.pop(0)
|
||||
@@ -209,7 +212,7 @@ class GridTableParser(TableParser):
|
||||
|
||||
def scan_cell(self, top, left):
|
||||
"""Starting at the top-left corner, start tracing out a cell."""
|
||||
assert self.block[top][left] == '+'
|
||||
assert self.stripped_block[top][left] == '+'
|
||||
return self.scan_right(top, left)
|
||||
|
||||
def scan_right(self, top, left):
|
||||
@@ -218,7 +221,7 @@ class GridTableParser(TableParser):
|
||||
boundaries ('+').
|
||||
"""
|
||||
colseps = {}
|
||||
line = self.block[top]
|
||||
line = self.stripped_block[top]
|
||||
for i in range(left + 1, self.right + 1):
|
||||
if line[i] == '+':
|
||||
colseps[i] = [top]
|
||||
@@ -238,14 +241,14 @@ class GridTableParser(TableParser):
|
||||
"""
|
||||
rowseps = {}
|
||||
for i in range(top + 1, self.bottom + 1):
|
||||
if self.block[i][right] == '+':
|
||||
if self.stripped_block[i][right] == '+':
|
||||
rowseps[i] = [right]
|
||||
result = self.scan_left(top, left, i, right)
|
||||
if result:
|
||||
newrowseps, colseps = result
|
||||
update_dict_of_lists(rowseps, newrowseps)
|
||||
return i, rowseps, colseps
|
||||
elif self.block[i][right] != '|':
|
||||
elif self.stripped_block[i][right] != '|':
|
||||
return None
|
||||
return None
|
||||
|
||||
@@ -255,7 +258,7 @@ class GridTableParser(TableParser):
|
||||
It must line up with the starting point.
|
||||
"""
|
||||
colseps = {}
|
||||
line = self.block[bottom]
|
||||
line = self.stripped_block[bottom]
|
||||
for i in range(right - 1, left, -1):
|
||||
if line[i] == '+':
|
||||
colseps[i] = [bottom]
|
||||
@@ -275,9 +278,9 @@ class GridTableParser(TableParser):
|
||||
"""
|
||||
rowseps = {}
|
||||
for i in range(bottom - 1, top, -1):
|
||||
if self.block[i][left] == '+':
|
||||
if self.stripped_block[i][left] == '+':
|
||||
rowseps[i] = [left]
|
||||
elif self.block[i][left] != '|':
|
||||
elif self.stripped_block[i][left] != '|':
|
||||
return None
|
||||
return rowseps
|
||||
|
||||
|
@@ -1426,18 +1426,18 @@ class StringList(ViewList):
|
||||
def get_2D_block(self, top, left, bottom, right, strip_indent=True):
|
||||
block = self[top:bottom]
|
||||
indent = right
|
||||
for i in range(len(block.data)):
|
||||
# get slice from line, care for combining characters
|
||||
ci = utils.column_indices(block.data[i])
|
||||
for i, line in enumerate(block.data):
|
||||
# trim line to block borders, allow for for combining characters
|
||||
adjusted_indices = utils.column_indices(line)
|
||||
try:
|
||||
left = ci[left]
|
||||
left_i = adjusted_indices[left]
|
||||
except IndexError:
|
||||
left += len(block.data[i]) - len(ci)
|
||||
left_i = left
|
||||
try:
|
||||
right = ci[right]
|
||||
right_i = adjusted_indices[right]
|
||||
except IndexError:
|
||||
right += len(block.data[i]) - len(ci)
|
||||
block.data[i] = line = block.data[i][left:right].rstrip()
|
||||
right_i = len(line)
|
||||
block.data[i] = line = line[left_i:right_i].rstrip()
|
||||
if line:
|
||||
indent = min(indent, len(line) - len(line.lstrip()))
|
||||
if strip_indent and 0 < indent < right:
|
||||
|
@@ -73,32 +73,32 @@ totest['grid_tables'] = [
|
||||
[],
|
||||
[[(0, 0, 1, ['A table with']),
|
||||
(0, 0, 1, ['two columns.'])]])],
|
||||
# Combining chars in grid tables still fail
|
||||
# ["""\
|
||||
# +--------------+------------------+
|
||||
# | A tāble w̅ith | comb̲ining chars. |
|
||||
# +--------------+------------------+
|
||||
# """,
|
||||
# [(0, 0, 2, 15, ['A table with']),
|
||||
# (0, 15, 2, 30, ['combining chars.'])],
|
||||
# ([14, 14],
|
||||
# [],
|
||||
# [[(0, 0, 1, ['A table with']),
|
||||
# (0, 0, 1, ['combining chars.'])]])],
|
||||
# Combining chars in table cells
|
||||
["""\
|
||||
+--------------+------------------+
|
||||
| A tāble w̅ith | comb̲ining chars. |
|
||||
+--------------+------------------+
|
||||
""",
|
||||
[(0, 0, 2, 15, ['A tāble w̅ith']),
|
||||
(0, 15, 2, 34, ['comb̲ining chars.'])],
|
||||
([14, 18],
|
||||
[],
|
||||
[[(0, 0, 1, ['A tāble w̅ith']),
|
||||
(0, 0, 1, ['comb̲ining chars.'])]])],
|
||||
["""\
|
||||
+--------------+-------------+
|
||||
| A table with | two columns |
|
||||
| A tāble w̅ith | two columns |
|
||||
+--------------+-------------+
|
||||
| and | two rows. |
|
||||
+--------------+-------------+
|
||||
""",
|
||||
[(0, 0, 2, 15, ['A table with']),
|
||||
[(0, 0, 2, 15, ['A tāble w̅ith']),
|
||||
(0, 15, 2, 29, ['two columns']),
|
||||
(2, 0, 4, 15, ['and']),
|
||||
(2, 15, 4, 29, ['two rows.'])],
|
||||
([14, 13],
|
||||
[],
|
||||
[[(0, 0, 1, ['A table with']),
|
||||
[[(0, 0, 1, ['A tāble w̅ith']),
|
||||
(0, 0, 1, ['two columns'])],
|
||||
[(0, 0, 3, ['and']),
|
||||
(0, 0, 3, ['two rows.'])]])],
|
||||
@@ -126,18 +126,18 @@ totest['grid_tables'] = [
|
||||
None]])],
|
||||
["""\
|
||||
+------------+-------------+---------------+
|
||||
| A table | two rows in | and row spans |
|
||||
| with three +-------------+ to left and |
|
||||
| A tāble | two rows in | and row spans |
|
||||
| with t̲h̲r̲e̲e̲ +-------------+ to left and |
|
||||
| columns, | the middle, | right. |
|
||||
+------------+-------------+---------------+
|
||||
""",
|
||||
[(0, 0, 4, 13, ['A table', 'with three', 'columns,']),
|
||||
[(0, 0, 4, 13, ['A tāble', 'with t̲h̲r̲e̲e̲', 'columns,']),
|
||||
(0, 13, 2, 27, ['two rows in']),
|
||||
(0, 27, 4, 43, ['and row spans', 'to left and', 'right.']),
|
||||
(2, 13, 4, 27, ['the middle,'])],
|
||||
([12, 13, 15],
|
||||
[],
|
||||
[[(1, 0, 1, ['A table', 'with three', 'columns,']),
|
||||
[[(1, 0, 1, ['A tāble', 'with t̲h̲r̲e̲e̲', 'columns,']),
|
||||
(0, 0, 1, ['two rows in']),
|
||||
(1, 0, 1, ['and row spans', 'to left and', 'right.'])],
|
||||
[None,
|
||||
|
Reference in New Issue
Block a user