Skip to content

Commit 3cfc695

Browse files
author
Wouter Coppieters
committed
Squish surrounding whitespace in node text
1 parent b404927 commit 3cfc695

File tree

8 files changed

+27
-18
lines changed

8 files changed

+27
-18
lines changed

Cargo.lock

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "htmltoadf"
3-
version = "0.1.4"
3+
version = "0.1.5"
44
edition = "2021"
55
license = "MIT"
66
description = "An HTML to Atlassian Document Format (ADF) converter"

README.md

+8-7
Original file line numberDiff line numberDiff line change
@@ -31,20 +31,20 @@ https://wouterken.github.io/htmltoadf/
3131
### Install Binary from Crates.io with `cargo install`
3232
```
3333
$ cargo install htmltoadf
34-
installing htmltoadf v0.1.4 (/usr/src/html2adf)
34+
installing htmltoadf v0.1.5 (/usr/src/html2adf)
3535
Updating crates.io index
3636
Downloading crates ...
3737
Downloaded lock_api v0.4.6
3838
--snip--
39-
Compiling htmltoadf v0.1.4
39+
Compiling htmltoadf v0.1.5
4040
Finished release [optimized] target(s) in 1m 42s
4141
Installing ~/.cargo/bin/htmltoadf
42-
Installed package `htmltoadf v0.1.4` (executable `html2adf`)
42+
Installed package `htmltoadf v0.1.5` (executable `html2adf`)
4343
```
4444

4545
### Download Binary file from Github
4646
Pre-built binaries can be downloaded from here:
47-
https://github.com/wouterken/htmltoadf/releases/tag/0.1.2
47+
https://github.com/wouterken/htmltoadf/releases/tag/0.1.5
4848

4949
### Docker Image
5050
**Docker Repo:**
@@ -54,10 +54,10 @@ https://hub.docker.com/r/wouterken/html2adf
5454
**Usage**
5555

5656
```bash
57-
$ echo "<h1>Hello world<p>Test</p></h1>" | docker run --rm -i wouterken/html2adf:0.1.4
57+
$ echo "<h1>Hello world<p>Test</p></h1>" | docker run --rm -i wouterken/html2adf:0.1.5
5858
{"version":1,"type":"doc","content":[{"type":"heading","attrs":{"level":1},"content":[{"type":"text","text":"Hello world"},{"type":"text","text":"Test"}]}]}
5959

60-
$ echo "<h1>Hello world<p>Test</p></h1>" | docker run --rm -i wouterken/html2adf:0.1.4 | jq
60+
$ echo "<h1>Hello world<p>Test</p></h1>" | docker run --rm -i wouterken/html2adf:0.1.5 | jq
6161
{
6262
"version": 1,
6363
"type": "doc",
@@ -90,7 +90,7 @@ $ echo "<h1>Hello world<p>Test</p></h1>" | docker run --rm -i wouterken/html2adf
9090

9191
```toml
9292
[dependencies]
93-
htmltoadf = "0.1.4"
93+
htmltoadf = "0.1.5"
9494
```
9595

9696
**Code**
@@ -201,6 +201,7 @@ The following features are implemented:
201201
* Compile binaries and create release
202202
* Build and push Docker image
203203
* Build and push WASM NPM package
204+
* Push crate
204205
* Update dependency in demo page
205206
* Push to VCS
206207

docs/index.html

+2-2
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878

7979
</style>
8080
<script defer type="module">
81-
import init, {convert} from "https://unpkg.com/htmltoadf@0.1.4/htmltoadf.js";
81+
import init, {convert} from "https://unpkg.com/htmltoadf@0.1.5/htmltoadf.js";
8282

8383
let editor;
8484

@@ -106,7 +106,7 @@ <h1>Header 1</h1>
106106
var str = '<span class=json-string>';
107107
var r = pIndent || '';
108108
if (pKey)
109-
r = r + key + pKey.replace(/[": ]/g, '') + '</span>: ';
109+
r = r + '"'+key + pKey.replace(/[": ]/g, '') + '"</span>: ';
110110
if (pVal)
111111
r = r + (pVal[0] == '"' ? str : val) + pVal + '</span>';
112112
return r + (pEnd || '');

src/extractor.rs

+12-4
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,14 @@ pub fn esc_hr(hrstr: String) -> String {
2222
.to_string();
2323
}
2424

25+
/**
26+
* Squish surrounding whitespace to a single space if it exists.
27+
*/
28+
pub fn squish_surrounding_whitespace(input: &str) -> String {
29+
let re = Regex::new(r"^\s+|\s+$").unwrap();
30+
re.replace_all(input, " ").to_string()
31+
}
32+
2533
/**
2634
* We parse a raw scraper::HTML and return a
2735
* list of leaf doc nodes (each with a linked list pointer to the root)
@@ -38,22 +46,22 @@ pub fn extract_leaves(fragment: &Html) -> Vec<DocNode> {
3846
if element.value().name() == "iframe" || element.value().name() == "img" {
3947
leaf_nodes.push(DocNode {
4048
name: element.value().name().trim(),
41-
text: "".trim(),
49+
text: "".trim().to_owned(),
4250
node,
4351
})
4452
} else if element.value().name() == HRBR_PLACEHOLDER {
4553
leaf_nodes.push(DocNode {
4654
name: "hr",
47-
text: "".trim(),
55+
text: "".trim().to_owned(),
4856
node,
4957
})
5058
}
5159
} else if let Node::Text(text_node) = node.value() {
5260
if !text_node.text.trim().is_empty() {
5361
leaf_nodes.push(DocNode {
5462
name: "text",
55-
text: text_node.text.trim(),
56-
node: node,
63+
text: squish_surrounding_whitespace(&text_node.text),
64+
node,
5765
})
5866
}
5967
}

src/tests/combination.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ fn top_level() {
2222
</li>
2323
<li>
2424
<div style='color: #0F0'>
25-
<span> With some blue text inside </span>
25+
<span>With some blue text inside</span>
2626
</div>
2727
</li>
2828
<li>

src/tests/lists.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ fn nested_1() {
167167
"content": [
168168
{
169169
"type": "text",
170-
"text": "Nested List"
170+
"text": " Nested List "
171171
}
172172
]
173173
},

src/types/doc_node.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::fmt;
55

66
pub struct DocNode<'a> {
77
pub name: &'a str,
8-
pub text: &'a str,
8+
pub text: String,
99
pub node: NodeRef<'a, Node>,
1010
}
1111

0 commit comments

Comments
 (0)