Browse code

BM-14600 Fix: trim white space in text/plain preview

David Phan authored on 10/04/2019 13:07:36
Showing 4 changed files
... ...
@@ -94,13 +94,14 @@ public class BodyStreamProcessor {
94 94
 	private static Set<String> whiteList = buildWhiteList();
95 95
 
96 96
 	/**
97
-	 * The version of the DB body this {@link BodyStreamProcessor} produces from an
98
-	 * IMAP message.
97
+	 * The version of the DB body this {@link BodyStreamProcessor} produces from
98
+	 * an IMAP message.
99 99
 	 */
100 100
 	public static final int BODY_VERSION;
101 101
 
102 102
 	static {
103
-		// initialization in a separate static bloc enables tests to modify this final
103
+		// initialization in a separate static bloc enables tests to modify this
104
+		// final
104 105
 		// field using reflection
105 106
 		BODY_VERSION = 2;
106 107
 	}
... ...
@@ -234,7 +235,7 @@ public class BodyStreamProcessor {
234 235
 			for (AddressableEntity ae : parts) {
235 236
 				String mime = ae.getMimeType();
236 237
 				if (Mime4JHelper.TEXT_PLAIN.equals(mime) && !Mime4JHelper.isAttachment(ae)) {
237
-					return getBodyContent(ae);
238
+					return NekoHelper.trimWhiteSpace(getBodyContent(ae)).trim();
238 239
 				} else if (html == null && Mime4JHelper.TEXT_HTML.equals(mime) && !Mime4JHelper.isAttachment(ae)) {
239 240
 					html = getBodyContent(ae);
240 241
 				}
241 242
new file mode 100644
... ...
@@ -0,0 +1,59 @@
1
+Return-Path: <david@bm.lan>
2
+Received: from bionic.bm.lan (localhost.localdomain [127.0.0.1])
3
+	 by bionic with LMTPA;
4
+	 Wed, 10 Apr 2019 10:00:52 +0000
5
+X-Cyrus-Session-Id: cyrus-35778-1554890452-2-15751953108581710977
6
+X-Sieve: CMU Sieve 3.0
7
+Received: from /192.168.30.129 (LHLO bionic.bm.lan) by unknown with LMTP;
8
+ 4/10/19 10:00 AM
9
+Received: from localhost.localdomain (localhost.localdomain [127.0.0.1])
10
+	by bionic.bm.lan (Postfix) with ESMTP id A415480399
11
+	for <david@bm.lan>; Wed, 10 Apr 2019 10:00:52 +0000 (UTC)
12
+MIME-Version: 1.0
13
+Content-Type: multipart/mixed;
14
+ boundary="=_d666dfca7f2c78348fb2e03ddd1e785b"
15
+Date: Wed, 10 Apr 2019 12:00:52 +0200
16
+From: David Phan <david@bm.lan>
17
+To: David Phan <david@bm.lan>
18
+Subject: avec une pj
19
+Message-ID: <fbce0302a6a58f6b680609c956c528fb@bm.lan>
20
+X-Sender: david@bm.lan
21
+User-Agent: Roundcube Webmail/0.8.5
22
+X-Bm-Milter-Handled: 0741655b-ec8a-4111-8f82-cf2e575593ee
23
+X-Bm-Transport-Timestamp: 1554890452678
24
+
25
+--=_d666dfca7f2c78348fb2e03ddd1e785b
26
+Content-Type: multipart/alternative;
27
+ boundary="=_54e5258bf1dac08ad46d0669715f9e7c"
28
+
29
+--=_54e5258bf1dac08ad46d0669715f9e7c
30
+Content-Transfer-Encoding: 7bit
31
+Content-Type: text/plain; charset=UTF-8
32
+
33
+my body is a wonderland 
34
+
35
+  
36
+--=_54e5258bf1dac08ad46d0669715f9e7c
37
+Content-Transfer-Encoding: quoted-printable
38
+Content-Type: text/html; charset=UTF-8
39
+
40
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
41
+<html><body>
42
+<p><span style=3D"font-family: Arial;">my body is a wonderland</span></p>
43
+<p>&nbsp;</p>
44
+<div>&nbsp;</div>
45
+</body></html>
46
+
47
+--=_54e5258bf1dac08ad46d0669715f9e7c--
48
+
49
+--=_d666dfca7f2c78348fb2e03ddd1e785b
50
+Content-Transfer-Encoding: base64
51
+Content-Type: text/plain;
52
+ name=pj.txt
53
+Content-Disposition: attachment;
54
+ filename=pj.txt;
55
+ size=15
56
+
57
+bmVpbiBuZWluIG5laW4K
58
+--=_d666dfca7f2c78348fb2e03ddd1e785b--
59
+
... ...
@@ -152,8 +152,8 @@ public class BodyStreamProcessorTests {
152 152
 	}
153 153
 
154 154
 	/**
155
-	 * {@link MessageBody#preview} should not contain HTML nor have more than one
156
-	 * line nor have unnecessary white characters.
155
+	 * {@link MessageBody#preview} should not contain HTML nor have more than
156
+	 * one line nor have unnecessary white characters.
157 157
 	 */
158 158
 	@Test
159 159
 	public void testMessageBodyPreview()
... ...
@@ -174,4 +174,11 @@ public class BodyStreamProcessorTests {
174 174
 		assertEquals(expectedPreview, result.body.preview);
175 175
 	}
176 176
 
177
+	@Test
178
+	public void testMessageBodyPreview_Attachment()
179
+			throws IOException, InterruptedException, ExecutionException, TimeoutException {
180
+		this.testMessageBodyPreview("data/attachment.eml", "my body is a wonderland");
181
+
182
+	}
183
+
177 184
 }
... ...
@@ -49,8 +49,8 @@ public class NekoHelper {
49 49
 	private static final Logger logger = LoggerFactory.getLogger(NekoHelper.class);
50 50
 
51 51
 	/**
52
-	 * Pattern matching white characters sequence. Since '\s' does not include non
53
-	 * breaking spaces, we add them with p{Z}.
52
+	 * Pattern matching white characters sequence. Since '\s' does not include
53
+	 * non breaking spaces, we add them with p{Z}.
54 54
 	 * 
55 55
 	 * @see https://www.regular-expressions.info/unicode.html
56 56
 	 */
... ...
@@ -125,12 +125,16 @@ public class NekoHelper {
125 125
 	}
126 126
 
127 127
 	/**
128
-	 * Call {@link #rawText(String)}, then flatten (one line) and compact (trim and
129
-	 * shorten white-spaces).
128
+	 * Call {@link #rawText(String)}, then flatten (one line) and compact (trim
129
+	 * and shorten white-spaces).
130 130
 	 */
131 131
 	public static String flatCompactRawText(final String html) {
132 132
 		final String trimmedRawText = rawText(html).trim();
133
-		final Matcher matcher = WHITE_SPACES_PATTERN.matcher(trimmedRawText);
133
+		return trimWhiteSpace(trimmedRawText);
134
+	}
135
+
136
+	public static String trimWhiteSpace(final String txt) {
137
+		final Matcher matcher = WHITE_SPACES_PATTERN.matcher(txt);
134 138
 		return matcher.replaceAll(" ");
135 139
 	}
136 140