Skip to content
Snippets Groups Projects
Commit b439cacf authored by Jared Hancock's avatar Jared Hancock
Browse files

html2text: Fix leading whitespace after <br>

parent 98018391
No related branches found
No related tags found
No related merge requests found
...@@ -47,8 +47,9 @@ function convert_html_to_text($html, $width=74) { ...@@ -47,8 +47,9 @@ function convert_html_to_text($html, $width=74) {
// Add the default stylesheet // Add the default stylesheet
$elements->getRoot()->addStylesheet( $elements->getRoot()->addStylesheet(
HtmlStylesheet::fromArray(array( HtmlStylesheet::fromArray(array(
'p' => array('margin-bottom' => 1), 'html' => array('white-space' => 'pre'), # Don't wrap footnotes
'pre' => array('border-width' => 1, 'white-space' => 'pre'), 'p' => array('margin-bottom' => '1em'),
'pre' => array('border-width' => '1em', 'white-space' => 'pre'),
)) ))
); );
$options = array(); $options = array();
...@@ -93,7 +94,7 @@ function identify_node($node, $parent=null) { ...@@ -93,7 +94,7 @@ function identify_node($node, $parent=null) {
case "hr": case "hr":
return new HtmlHrElement($node, $parent); return new HtmlHrElement($node, $parent);
case "br": case "br":
return "\n"; return new HtmlBrElement($node, $parent);
case "style": case "style":
$parent->getRoot()->addStylesheet(new HtmlStylesheet($node)); $parent->getRoot()->addStylesheet(new HtmlStylesheet($node));
...@@ -189,16 +190,17 @@ class HtmlInlineElement { ...@@ -189,16 +190,17 @@ class HtmlInlineElement {
foreach ($this->children as $c) { foreach ($this->children as $c) {
if ($c instanceof DOMText) { if ($c instanceof DOMText) {
// Collapse white-space // Collapse white-space
$more = $c->wholeText;
switch ($this->ws) { switch ($this->ws) {
case 'pre': case 'pre':
case 'pre-wrap': case 'pre-wrap':
$more = $c->wholeText; break;
break; case 'nowrap':
case 'nowrap': case 'pre-line':
case 'pre-line': case 'normal':
case 'normal': default:
default: if ($after_block) $more = ltrim($more);
$more = preg_replace('/\s+/m', ' ', $c->wholeText); $more = preg_replace('/\s+/m', ' ', $more);
} }
} }
elseif ($c instanceof HtmlInlineElement) { elseif ($c instanceof HtmlInlineElement) {
...@@ -207,6 +209,7 @@ class HtmlInlineElement { ...@@ -207,6 +209,7 @@ class HtmlInlineElement {
else { else {
$more = $c; $more = $c;
} }
$after_block = ($c instanceof HtmlBlockElement);
if ($more instanceof PreFormattedText) if ($more instanceof PreFormattedText)
$output = new PreFormattedText($output . $more); $output = new PreFormattedText($output . $more);
elseif (is_string($more)) elseif (is_string($more))
...@@ -334,14 +337,19 @@ class HtmlBlockElement extends HtmlInlineElement { ...@@ -334,14 +337,19 @@ class HtmlBlockElement extends HtmlInlineElement {
if ($c instanceof HtmlBlockElement) if ($c instanceof HtmlBlockElement)
$this->min_width = max($c->getMinWidth(), $this->min_width); $this->min_width = max($c->getMinWidth(), $this->min_width);
elseif ($c instanceof DomText) elseif ($c instanceof DomText)
$this->min_width = max(max(array_map('strlen', explode(' ', $c->wholeText))), $this->min_width = max(max(array_map('mb_strwidth',
$this->min_width); explode(' ', $c->wholeText))), $this->min_width);
} }
} }
return $this->min_width; return $this->min_width;
} }
} }
class HtmlBrElement extends HtmlBlockElement {
function render($width, $options) {
return "\n";
}
}
class HtmlUElement extends HtmlInlineElement { class HtmlUElement extends HtmlInlineElement {
function render($width, $options) { function render($width, $options) {
$output = parent::render($width, $options); $output = parent::render($width, $options);
...@@ -399,7 +407,6 @@ class HtmlBlockquoteElement extends HtmlBlockElement { ...@@ -399,7 +407,6 @@ class HtmlBlockquoteElement extends HtmlBlockElement {
class HtmlCiteElement extends HtmlBlockElement { class HtmlCiteElement extends HtmlBlockElement {
function render($width, $options) { function render($width, $options) {
$options['trim'] = false;
$lines = explode("\n", ltrim(parent::render($width-3, $options))); $lines = explode("\n", ltrim(parent::render($width-3, $options)));
$lines[0] = "-- " . $lines[0]; $lines[0] = "-- " . $lines[0];
// Right justification // Right justification
...@@ -434,14 +441,15 @@ class HtmlAElement extends HtmlInlineElement { ...@@ -434,14 +441,15 @@ class HtmlAElement extends HtmlInlineElement {
if ($this->node->getAttribute("name") != null) { if ($this->node->getAttribute("name") != null) {
$output = "[$output]"; $output = "[$output]";
} }
} elseif (strpos($href, 'mailto:') === 0) {
$href = substr($href, 7);
$output = (($href != $output) ? "$href " : '') . "<$output>";
} elseif (mb_strwidth($href) > $width / 2) { } elseif (mb_strwidth($href) > $width / 2) {
if ($href != $output) if ($href != $output)
$this->getRoot()->addFootnote($output, $href); $this->getRoot()->addFootnote($output, $href);
$output = "[$output]"; $output = "[$output]";
} else { } elseif ($href != $output) {
if ($href != $output) { $output = "[$output]($href)";
$output = "[$output]($href)";
}
} }
return $output; return $output;
} }
...@@ -453,7 +461,6 @@ class HtmlListElement extends HtmlBlockElement { ...@@ -453,7 +461,6 @@ class HtmlListElement extends HtmlBlockElement {
function render($width, $options) { function render($width, $options) {
$options['marker'] = $this->marker; $options['marker'] = $this->marker;
$options['trim'] = false;
return parent::render($width, $options); return parent::render($width, $options);
} }
...@@ -494,7 +501,7 @@ class HtmlCodeElement extends HtmlInlineElement { ...@@ -494,7 +501,7 @@ class HtmlCodeElement extends HtmlInlineElement {
function render($width, $options) { function render($width, $options) {
$content = parent::render($width-2, $options); $content = parent::render($width-2, $options);
if (strpos($content, "\n")) if (strpos($content, "\n"))
return "```\n".$content."\n```"; return "```\n".trim($content)."\n```\n";
else else
return "`$content`"; return "`$content`";
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment