File mailx-12.5-mime.dif of Package mailx

---
 def.h     |    4 +
 mime.c    |  144 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 sendout.c |   40 +++++++++++++----
 3 files changed, 172 insertions(+), 16 deletions(-)

--- def.h
+++ def.h	2016-04-08 14:58:42.729798789 +0000
@@ -142,7 +142,9 @@ enum mimeclean {
 	MIME_LONGLINES	= 002,		/* has lines too long for RFC 2822 */
 	MIME_CTRLCHAR	= 004,		/* contains control characters */
 	MIME_HASNUL	= 010,		/* contains \0 characters */
-	MIME_NOTERMNL	= 020		/* lacks a terminating newline */
+	MIME_NOTERMNL	= 020,		/* lacks a terminating newline */
+	MIME_UTF8	= 040,		/* UTF-8 high bit multi byte characters */
+	MIME_LATIN	= 0100		/* Latin high bit single byte characters */
 };
 
 enum tdflags {
--- mime.c
+++ mime.c	2016-04-08 15:00:05.808259514 +0000
@@ -302,13 +302,78 @@ gettcharset(void)
 	return t;
 }
 
+#define F 0	/* character never appears in mail text */
+#define T 1	/* character appears in plain ASCII text */
+#define I 2	/* character appears in ISO-8859 text */
+#define X 3	/* character appears in non-ISO extended ASCII (Mac, IBM PC) */
+
+static char text_chars[256] = {
+   /* NUL                  BEL BS HT LF    FF CR    */
+	F, F, F, F, F, F, F, F, T, T, T, F, T, T, F, F,  /* 0x0X */
+	/*                              ESC          */
+	F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,  /* 0x1X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x2X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x3X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x4X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x5X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x6X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F,  /* 0x7X */
+	/*            NEL                            */
+	X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,  /* 0x8X */
+	X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,  /* 0x9X */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xaX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xbX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xcX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xdX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xeX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
+};
+
+static int encflags;
+static void
+test_enc(const char *s)
+{
+	int c = *s;
+	if (c & 0100) {
+		int n, follow;
+
+		if      ((c & 040) == 0)	/* 110xxxxx */
+			follow = 1;
+		else if ((c & 020) == 0)	/* 1110xxxx */
+			follow = 2;
+		else if ((c & 010) == 0)	/* 11110xxx */
+			follow = 3;
+		else if ((c & 004) == 0)	/* 111110xx */
+			follow = 4;
+		else if ((c & 002) == 0)	/* 1111110x */
+			follow = 5;
+		else
+			return;
+
+		for (n = 1; n <= follow; n++) {
+			if ((c = *(s+n)) == '\0')
+				goto latin;
+			if ((c & 0200) == 0 || (c & 0100))
+				goto latin;
+		}
+		encflags = MIME_UTF8;
+		return;
+	}
+latin:
+	c = *s;
+	if (text_chars[c & 0377] == I)
+		encflags = MIME_LATIN;
+}
+
 static int 
 has_highbit(const char *s)
 {
 	if (s) {
 		do
-			if (*s & 0200)
+			if (*s & 0200) {
+				test_enc(s);
 				return 1;
+			}
 		while (*s++ != '\0');
 	}
 	return 0;
@@ -328,6 +393,7 @@ name_highbit(struct name *np)
 char *
 need_hdrconv(struct header *hp, enum gfield w)
 {
+	encflags = 0;
 	if (w & GIDENT) {
 		if (hp->h_from && name_highbit(hp->h_from))
 			goto needs;
@@ -355,7 +421,7 @@ need_hdrconv(struct header *hp, enum gfi
 	if (w & GSUBJECT && has_highbit(hp->h_subject))
 		goto needs;
 	return NULL;
-needs:	return getcharset(MIME_HIGHBIT);
+needs:	return getcharset(MIME_HIGHBIT|encflags);
 }
 
 #ifdef	HAVE_ICONV
@@ -441,7 +507,7 @@ iconv_open_ft(const char *tocode, const
 	 * be used to check the validity of the input even with
 	 * identical encoding names.
 	 */
-	if (strcmp(t, f) == 0)
+	if (asccasecmp(t, f) == 0)
 		errno = 0;
 	return (iconv_t)-1;
 }
@@ -665,7 +731,7 @@ mime_tline(char *x, char *l)
 			l++;
 		if (*l != '\0')
 			*l++ = '\0';
-		if (strcmp(x, n) == 0) {
+		if (asccasecmp(x, n) == 0) {
 			match = 1;
 			break;
 		}
@@ -748,14 +814,62 @@ mime_isclean(FILE *f)
 				maxlen = curlen;
 			curlen = 1;
 		} else if (c & 0200) {
+			int i = c;
 			isclean |= MIME_HIGHBIT;
+			if (c & 0100) {
+				int n, follow;
+
+				if      ((c & 040) == 0)	/* 110xxxxx */
+					follow = 1;
+				else if ((c & 020) == 0)	/* 1110xxxx */
+					follow = 2;
+				else if ((c & 010) == 0)	/* 11110xxx */
+					follow = 3;
+				else if ((c & 004) == 0)	/* 111110xx */
+					follow = 4;
+				else if ((c & 002) == 0)	/* 1111110x */
+					follow = 5;
+				else
+					continue;
+
+				for (n = 0; n < follow; n++) {
+					lastc = c;
+					c = getc(f);
+					curlen++;
+
+					if (c == '\0') {
+						isclean |= MIME_HASNUL;
+						goto latin;
+					}
+					if ((c & 0200) == 0 || (c & 0100))
+						goto latin;
+					if ((c == '\n') || (c == EOF)) {
+						if (curlen > maxlen)
+							    maxlen = curlen;
+						curlen = 1;
+						goto latin;
+					}
+				}
+				isclean |= MIME_UTF8;
+				continue;
+			}
+		latin:
+			if (text_chars[i & 0377] == I)
+				isclean |= MIME_LATIN;
+			if (text_chars[i & 0377] == X) {
+				isclean |= MIME_CTRLCHAR;
+				break;
+			}
 		} else if (c == '\0') {
 			isclean |= MIME_HASNUL;
 			break;
-		} else if ((c < 040 && (c != '\t' && c != '\f')) || c == 0177) {
+		} else if (text_chars[c & 0377] == F) {
 			isclean |= MIME_CTRLCHAR;
+			break;
 		}
 	} while (c != EOF);
+	if (isclean & (MIME_CTRLCHAR|MIME_HASNUL))
+		isclean &= (MIME_CTRLCHAR|MIME_HASNUL);
 	if (lastc != '\n')
 		isclean |= MIME_NOTERMNL;
 	clearerr(f);
@@ -826,11 +940,16 @@ get_mime_convert(FILE *fp, char **conten
 			 * ^I or ^L in text/plain bodies. However, some
 			 * obscure character sets actually contain these
 			 * characters, so the content type can be set.
+			 * Beside ^I or ^L from RFC 2046 we accept also
+			 * backspace ^H often used in enhanced text.
 			 */
 			if ((*contenttype = value("contenttype-cntrl")) == NULL)
 				*contenttype = "application/octet-stream";
 		} else if (*contenttype == NULL)
 			*contenttype = "text/plain";
+	} else if (ascncasecmp(*contenttype, "text/", 5) == 0)
+	{
+		*charset = getcharset(*isclean);
 	}
 	return convert;
 }
@@ -989,8 +1108,14 @@ mime_fromhdr(struct str *in, struct str
 #ifdef	HAVE_ICONV
 	iconv_t fhicd = (iconv_t)-1;
 #endif
+	enum mimeclean isclean = 0;
 
 	tcs = gettcharset();
+
+	encflags = 0;
+	if (has_highbit(in->s))
+		isclean |= (MIME_HIGHBIT|encflags);
+
 	maxstor = in->l;
 	out->s = smalloc(maxstor + 1);
 	out->l = 0;
@@ -1010,7 +1135,7 @@ mime_fromhdr(struct str *in, struct str
 #ifdef	HAVE_ICONV
 			if (fhicd != (iconv_t)-1)
 				iconv_close(fhicd);
-			if (strcmp(cs, tcs))
+			if (asccasecmp(cs, tcs))
 				fhicd = iconv_open_ft(tcs, cs);
 			else
 				fhicd = (iconv_t)-1;
@@ -1105,12 +1230,17 @@ notmime:
 	}
 fromhdr_end:
 	*q = '\0';
-	if (flags & TD_ISPR) {
+	if ((flags & TD_ISPR) && (isclean & MIME_HIGHBIT)) {
 		struct str	new;
+		if ((isclean & MIME_UTF8) && asccasecmp("utf-8", tcs) == 0)
+			goto skip;
+		if ((isclean & MIME_LATIN) && ascncasecmp("iso-8859-", tcs, 9) == 0)
+			goto skip;
 		makeprint(out, &new);
 		free(out->s);
 		*out = new;
 	}
+skip:
 	if (flags & TD_DELCTRL)
 		out->l = delctrl(out->s, out->l);
 #ifdef	HAVE_ICONV
--- sendout.c
+++ sendout.c	2016-04-08 14:58:42.729798789 +0000
@@ -226,6 +226,23 @@ attach_file1(struct attachment *ap, FILE
 		charset = ap->a_charset;
 	convert = get_mime_convert(fi, &contenttype, &charset, &isclean,
 			dosign);
+#ifdef	HAVE_ICONV
+	tcs = gettcharset();
+	if (isclean & MIME_UTF8)
+	{
+		tcs = "utf-8";
+	}
+	if (isclean & MIME_LATIN) {
+		tcs = value("charset");
+		if (tcs == NULL && wantcharset && wantcharset != (char *)-1)
+			tcs = wantcharset;
+	}
+	if (tcs == NULL)  {
+		contenttype = "application/octet-stream";
+		charset = NULL;
+		convert = CONV_TOB64;
+	}
+#endif
 	fprintf(fo,
 		"\n--%s\n"
 		"Content-Type: %s",
@@ -255,11 +272,10 @@ attach_file1(struct attachment *ap, FILE
 		iconv_close(iconvd);
 		iconvd = (iconv_t)-1;
 	}
-	tcs = gettcharset();
 	if ((isclean & (MIME_HASNUL|MIME_CTRLCHAR)) == 0 &&
 			ascncasecmp(contenttype, "text/", 5) == 0 &&
-			isclean & MIME_HIGHBIT &&
-			charset != NULL) {
+			(isclean & MIME_HIGHBIT) &&
+			charset != NULL && tcs != NULL) {
 		if ((iconvd = iconv_open_ft(charset, tcs)) == (iconv_t)-1 &&
 				errno != 0) {
 			if (errno == EINVAL)
@@ -467,11 +483,12 @@ infix(struct header *hp, FILE *fi, int d
 	}
 	rm(tempMail);
 	Ftfree(&tempMail);
-	convert = get_mime_convert(fi, &contenttype, &charset,
-			&isclean, dosign);
+	convert = get_mime_convert(fi, &contenttype, &charset, &isclean,
+			dosign);
 #ifdef	HAVE_ICONV
 	tcs = gettcharset();
-	if ((convhdr = need_hdrconv(hp, GTO|GSUBJECT|GCC|GBCC|GIDENT)) != 0) {
+	if ((convhdr = need_hdrconv(hp, GTO|GSUBJECT|GCC|GBCC|GIDENT)) != 0 &&
+	    tcs != NULL) {
 		if (iconvd != (iconv_t)-1)
 			iconv_close(iconvd);
 		if ((iconvd = iconv_open_ft(convhdr, tcs)) == (iconv_t)-1
@@ -505,10 +522,17 @@ infix(struct header *hp, FILE *fi, int d
 		iconv_close(iconvd);
 		iconvd = (iconv_t)-1;
 	}
+	if (isclean & MIME_UTF8)
+		tcs = "utf-8";
+	if (isclean & MIME_LATIN) {
+		tcs = value("charset");
+		if (tcs == NULL && wantcharset && wantcharset != (char *)-1)
+			tcs = wantcharset;
+	}
 	if ((isclean & (MIME_HASNUL|MIME_CTRLCHAR)) == 0 &&
 			ascncasecmp(contenttype, "text/", 5) == 0 &&
-			isclean & MIME_HIGHBIT &&
-			charset != NULL) {
+			(isclean & MIME_HIGHBIT) &&
+			charset != NULL && tcs != NULL) {
 		if (iconvd != (iconv_t)-1)
 			iconv_close(iconvd);
 		if ((iconvd = iconv_open_ft(charset, tcs)) == (iconv_t)-1