This is mime.c in view mode; [Download] [Up]
/* MIME handling routines. * * Author: Michael Elkins <elkins@aero.org> * Modified by John E. Davis <davis@space.mit.edu> */ #include "config.h" #include "features.h" #include <stdio.h> #include <string.h> #ifdef HAVE_STDLIB_H # include <stdlib.h> #endif #ifdef HAVE_UNISTD_H # include <unistd.h> #endif #include <ctype.h> #include <slang.h> #include "jdmacros.h" #include "server.h" #include "slrn.h" #include "misc.h" #include "slrn.h" #include "group.h" #include "art.h" #include "uudecode.h" #if SLRN_HAS_MIME /* rest of file in this ifdef */ #include "mime.h" int Slrn_Use_Mime = 1; int Slrn_Use_Meta_Mail = 1; int Slrn_Mime_Was_Parsed; int Slrn_Mime_Was_Modified; int Slrn_Mime_Needs_Metamail; char *Slrn_Mime_Display_Charset; /* These are all supersets of US-ASCII */ static char *Compatable_Charsets[] = { "US-ASCII", /* This MUST be zeroth element */ "ISO-8859-1", "ISO-8859-2", "ISO-8859-3", "ISO-8859-4", "ISO-8859-5", "ISO-8859-6", "ISO-8859-7", "ISO-8859-8", "ISO-8859-9", "KOI8-R", NULL }; static char *Char_Set; static int Content_Type; #define CONTENT_TYPE_TEXT 0x01 #define CONTENT_TYPE_MESSAGE 0x02 #define CONTENT_TYPE_MULTIPART 0x03 #define CONTENT_TYPE_UNSUPPORTED 0x10 static int Content_Subtype; #define CONTENT_SUBTYPE_PLAIN 0x01 #define CONTENT_SUBTYPE_UNKNOWN 0x02 #define CONTENT_SUBTYPE_UNSUPPORTED 0x10 static int Encoding_Method; #define ENCODED_7BIT 1 #define ENCODED_8BIT 2 #define ENCODED_QUOTED 3 #define ENCODED_BASE64 4 #define ENCODED_BINARY 5 #define ENCODED_UNSUPPORTED 6 #ifndef isalnum #define isalnum(x) \ ((((x) <= 'Z') && ((x) >= 'A')) \ || (((x) <= 'z') && ((x) >= 'a')) \ || (((x) <= '9') && ((x) >= '0'))) #endif static Slrn_Article_Line_Type *find_header_line (char *header) { Slrn_Article_Line_Type *line = Slrn_Article_Lines; unsigned char ch = (unsigned char) UPPER_CASE(*header); unsigned int len = strlen (header); while ((line != NULL) && (line->flags & HEADER_LINE)) { unsigned char ch1 = (unsigned char) *line->buf; if ((ch == UPPER_CASE(ch1)) && (0 == slrn_case_strncmp ((unsigned char *)header, (unsigned char *)line->buf, len))) return line; line = line->next; } return NULL; } static char *find_compatable_charset (char *cs, unsigned int len) { char **compat_charset; compat_charset = Compatable_Charsets; while (*compat_charset != NULL) { if ((0 == slrn_case_strncmp ((unsigned char *) cs, (unsigned char *) *compat_charset, len)) && (len == strlen(*compat_charset))) return *compat_charset; compat_charset++; } return NULL; } static int parse_content_type_line (void) { Slrn_Article_Line_Type *line; char *b; /* Use default: text/plain; charset=us-ascii */ Content_Type = CONTENT_TYPE_TEXT; Content_Subtype = CONTENT_SUBTYPE_PLAIN; Char_Set = Compatable_Charsets[0]; if (NULL == (line = find_header_line ("Content-Type:"))) return 0; b = slrn_skip_whitespace (line->buf + 13); if (0 == slrn_case_strncmp ((unsigned char *)b, (unsigned char *) "text/", 5)) { b += 5; if (0 != slrn_case_strncmp ((unsigned char *)b, (unsigned char *) "plain", 5)) { Content_Subtype = CONTENT_SUBTYPE_UNSUPPORTED; return -1; } b += 5; } else if (0 == slrn_case_strncmp ((unsigned char *)b, (unsigned char *) "message/", 5)) { Content_Type = CONTENT_TYPE_MESSAGE; Content_Subtype = CONTENT_SUBTYPE_UNKNOWN; b += 8; } else if (0 == slrn_case_strncmp ((unsigned char *)b, (unsigned char *) "multipart/", 5)) { Content_Type = CONTENT_TYPE_MULTIPART; Content_Subtype = CONTENT_SUBTYPE_UNKNOWN; b += 10; } else { Content_Type = CONTENT_TYPE_UNSUPPORTED; return -1; } do { while (NULL != (b = slrn_strchr (b, ';'))) { char *charset; unsigned int len; b = slrn_skip_whitespace (b + 1); if (0 != slrn_case_strncmp ((unsigned char *)b, (unsigned char *)"charset", 8)) continue; b = slrn_skip_whitespace (b + 8); while (*b == 0) { line = line->next; if ((line == NULL) || ((line->flags & HEADER_LINE) == 0) || ((*(b = line->buf) != ' ') && (*b == '\t'))) return -1; b = slrn_skip_whitespace (b); } if (*b != '=') continue; if (*b == '"') b++; charset = b; while (*b && (*b != ';') && (*b != ' ') && (*b != '\t') && (*b != '\n') && (*b != '"')) b++; len = b - charset; Char_Set = find_compatable_charset (charset, len); if (Char_Set == NULL) return -1; return 0; } line = line->next; } while ((line != NULL) && (line->flags & HEADER_LINE) && ((*(b = line->buf) == ' ') || (*b == '\t'))); return 0; } static int parse_content_transfer_encoding_line (void) { Slrn_Article_Line_Type *line; unsigned char *buf; Encoding_Method = ENCODED_7BIT; line = find_header_line ("Content-Transfer-Encoding:"); if (line == NULL) return 0; buf = (unsigned char *) slrn_skip_whitespace (line->buf + 26); if (*buf == '"') buf++; if (0 == slrn_case_strncmp (buf, (unsigned char *) "7bit", 4)) Encoding_Method = ENCODED_7BIT; else if (0 == slrn_case_strncmp (buf, (unsigned char *) "8bit", 4)) Encoding_Method = ENCODED_8BIT; else if (0 == slrn_case_strncmp (buf, (unsigned char *) "base64", 6)) Encoding_Method = ENCODED_BASE64; else if (0 == slrn_case_strncmp (buf, (unsigned char *) "quoted-printable", 16)) Encoding_Method = ENCODED_QUOTED; else if (0 == slrn_case_strncmp (buf, (unsigned char *) "binary", 6)) Encoding_Method = ENCODED_BINARY; else { Encoding_Method = ENCODED_UNSUPPORTED; return -1; } return 0; } static int Index_Hex[128] = { -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1, -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1 }; #define HEX(c) (Index_Hex[(unsigned char)(c) & 0x7F]) static char *decode_quoted_printable (char *dest, char *src, char *srcmax, int treat_underscore_as_space) { char ch; while (src < srcmax) { ch = *src++; if ((ch == '=') && (src + 1 < srcmax)) { *dest++ = (16 * HEX(src[0])) + HEX(src[1]); src += 2; } else if ((ch == '_') && treat_underscore_as_space) { *dest++ = ' '; } else *dest++ = ch; } return dest; } static int Index_64[128] = { -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63, 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1, -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1 }; #define BASE64(c) (Index_64[(unsigned char)(c) & 0x7F]) static char *decode_base64 (char *dest, char *src, char *srcmax) { while (src + 3 < srcmax) { *dest++ = (BASE64(src[0]) << 2) | (BASE64(src[1]) >> 4); *dest++ = ((BASE64(src[1]) & 0xf) << 4) | (BASE64(src[2]) >> 2); *dest++ = ((BASE64(src[2]) & 0x3) << 6) | BASE64(src[3]); src += 4; } return dest; } int slrn_rfc1522_decode_string (char *s) { char *s1, *s2, ch; char *charset, method, *txt; unsigned int count = 0; unsigned int len; while (1) { while ((NULL != (s = slrn_strchr (s, '='))) && (s[1] != '?')) s++; if (s == NULL) break; s1 = s; charset = s = s1 + 2; while (((ch = *s) != 0) && (ch != '?') && (ch != ' ') && (ch != '\t') && (ch != '\n')) s++; if (ch != '?') { s = s1 + 2; continue; } charset = s1 + 2; len = s - charset; charset = find_compatable_charset (charset, len); s++; /* skip ? */ method = *s++; /* skip B,Q */ method = UPPER_CASE(method); if ((charset == NULL) || ((method != 'B') && (method != 'Q')) || (*s != '?')) { s = s1 + 2; continue; } /* Now look for the final ?= after encoded test */ s++; /* skip ? */ txt = s; while ((ch = *s) != 0) { if ((ch == ' ') || (ch == '\t') || (ch == '\n')) break; if ((ch == '?') && (s[1] == '=')) break; s++; } if ((ch != '?') || (s[1] != '=')) { s = s1 + 2; continue; } /* Note: these functions return a pointer to the END of the decoded * text. */ if (method == 'B') s1 = decode_base64 (s1, txt, s); else s1 = decode_quoted_printable (s1, txt, s, 1); /* Now move everything over */ s2 = s + 2; /* skip final ?= */ s = s1; /* start from here next loop */ while ((ch = *s2++) != 0) *s1++ = ch; *s1 = 0; count++; } return count; } static void rfc1522_decode_headers (void) { Slrn_Article_Line_Type *line = Slrn_Article_Lines; while ((line != NULL) && (line->flags & HEADER_LINE)) { if (slrn_rfc1522_decode_string (line->buf)) { Slrn_Mime_Was_Modified = 1; } line = line->next; } } static void decode_mime_base64 (void) { Slrn_Mime_Needs_Metamail = 1; } /* This function checks if the last character on curr_line is an = and * if it is, then it merges curr_line and curr_line->next. See RFC1341, * section 5.1 (Quoted-Printable Content-Transfer-Encoding) rule #5. * [csp@ohm.york.ac.uk] */ static int merge_if_soft_linebreak (Slrn_Article_Line_Type *curr_line) { Slrn_Article_Line_Type *next_line; while ((next_line = curr_line->next) != NULL) { char *b = curr_line->buf; unsigned int len; len = strlen (b); if (len == 0) return 0; len--; if (b[len] != '=') return 0; /* Remove the excess = character... */ b[len] = '\0'; if (NULL == (b = (char *) SLREALLOC (b, 1 + len + strlen (next_line->buf)))) return -1; curr_line->buf = b; strcpy (b + len, next_line->buf); /* Unlink next_line from the linked list of lines in the article... */ curr_line->next = next_line->next; if (next_line->next != NULL) next_line->next->prev = curr_line; SLFREE (next_line->buf); SLFREE (next_line); } return 0; } static void decode_mime_quoted_printable (void) { Slrn_Article_Line_Type *line = Slrn_Article_Lines; /* skip to body */ while ((line != NULL) && (line->flags & HEADER_LINE)) line = line->next; if (line == NULL) return; while (line != NULL) { char *b; unsigned int len; b = line->buf; len = strlen (b); if (len && (b[len - 1] == '=') && (line->next != NULL)) { (void) merge_if_soft_linebreak (line); b = line->buf; len = strlen (b); } b = decode_quoted_printable (b, b, b + len, 0); if (b < line->buf + len) { *b = 0; Slrn_Mime_Was_Modified = 1; } line = line->next; } } void slrn_mime_article_init (void) { Slrn_Mime_Was_Modified = 0; Slrn_Mime_Was_Parsed = 0; Slrn_Mime_Needs_Metamail = 0; } void slrn_mime_process_article (void) { if ((Slrn_Use_Mime == 0) || Slrn_Mime_Was_Parsed || (Slrn_Article_Lines == NULL)) return; Slrn_Mime_Was_Parsed = 1; /* or will be */ rfc1522_decode_headers (); if (NULL == find_header_line ("Mime-Version:")) return; if ((-1 == parse_content_type_line ()) || (-1 == parse_content_transfer_encoding_line ())) { Slrn_Mime_Needs_Metamail = 1; return; } switch (Encoding_Method) { case ENCODED_7BIT: case ENCODED_8BIT: case ENCODED_BINARY: /* Already done. */ return; case ENCODED_BASE64: decode_mime_base64 (); break; case ENCODED_QUOTED: decode_mime_quoted_printable (); break; default: Slrn_Mime_Needs_Metamail = 1; return; } } #ifndef MAXPATHLEN #define MAXPATHLEN 1024 #endif int slrn_mime_call_metamail (void) { #ifdef VMS return 0; #else int init = Slrn_TT_Initialized; char cmd[MAXPATHLEN]; char tempfile[MAXPATHLEN]; Slrn_Article_Line_Type *ptr = Slrn_Article_Lines; FILE *fp; if ((Slrn_Use_Meta_Mail == 0) || slrn_get_yesno (1, "Process this MIME article with metamail") <= 0) return 0; sprintf(tempfile, "/tmp/slrn.%ld", (long) getpid()); fp = fopen(tempfile, "w"); if (fp == NULL) { slrn_error ("Unable to open tmp file for metamail."); return 0; } while (ptr) { fputs(ptr->buf, fp); putc('\n', fp); ptr = ptr->next; } slrn_fclose(fp); strcpy(cmd, "metamail "); strcat(cmd, tempfile); /* Make sure that metamail has a normal environment */ slrn_reset_display(0); slrn_posix_system(cmd, 0); slrn_delete_file (tempfile); printf("Press return to continue..."); getchar(); fflush(stdin); /* get rid of any pending input! */ slrn_init_display(init, 0); return 1; #endif /* NOT VMS */ } /* ------------------------------------------------------------------------- * MIME encoding routines. * -------------------------------------------------------------------------*/ static char *Mime_Posting_Charset; static int Mime_Posting_Encoding; int slrn_mime_scan_file (FILE *fp) { /* This routine scans the article to determine what CTE should be used */ unsigned int linelen = 0; unsigned int maxlinelen = 0; int ch; int cr = 0; unsigned int hibin = 0; /* Skip the header. 8-bit characters in the header are taken care of * elsewhere since they ALWAYS need to be encoded. */ while ((ch = getc(fp)) != EOF) { if (ch == '\n') { ch = getc(fp); if (ch == '\n') break; } } if (ch == EOF) { rewind (fp); return -1; } while ((ch = getc(fp)) != EOF) { linelen++; if (ch & 0x80) hibin = 1; /* 8-bit character */ if (ch == '\n') { if (linelen > maxlinelen) maxlinelen = linelen; linelen = 0; } else if (((unsigned char)ch < 32) && (ch != '\t') && (ch != 0xC)) cr = 1; /* not tab or formfeed */ } if (linelen > maxlinelen) maxlinelen = linelen; if (hibin > 0) { /* 8-bit data. US-ASCII is NOT a valid charset, so use ISO-8859-1 */ if (slrn_case_strcmp((unsigned char *)"us-ascii", (unsigned char *)Slrn_Mime_Display_Charset) == 0) Mime_Posting_Charset = "iso-8859-1"; else Mime_Posting_Charset = Slrn_Mime_Display_Charset; } else if (NULL != find_compatable_charset (Slrn_Mime_Display_Charset, strlen (Slrn_Mime_Display_Charset))) /* 7-bit data. Check to make sure that this display supports US-ASCII */ Mime_Posting_Charset = "us-ascii"; else Mime_Posting_Charset = Slrn_Mime_Display_Charset; #if 0 if ((maxlinelen > 990) || (cr > 0)) { Mime_Posting_Encoding = ENCODED_QUOTED; } else #endif if (hibin > 0) Mime_Posting_Encoding = ENCODED_8BIT; else Mime_Posting_Encoding = ENCODED_7BIT; rewind(fp); return 0; } #define IS_RFC850_SPECIAL(c) \ (((c) == '(') || ((c) == ')') || ((c) == '<') || ((c) == '>') || ((c) == '"')) /* This routine returns -1 if d is not big enough to encode s. * Technically, this routine is incorrect. It should encode whitespace * separated words and not arbitrary sequences of text. */ static int rfc1522_encode (unsigned char *d, unsigned int len, unsigned char *s) { /* This routine encodes the a string containing 8-bit characters according * to the conventions of RFC1522. The strategy here is to print up to * the first word which contains 8-bit chars before starting the "Q" * encoding. When it has been determined that all of the 8-bit * characters have been encoded, switch back to normal mode. This * approach should generate relatively short encoded words and should be * more generally readable. */ unsigned int hibit; /* How many 8-bit characters are left? */ unsigned char *p, ch, *dmax; char charset[256]; unsigned int charset_len; /* First scan the string to see if there are 8-bit characters * Count them for later use. */ p = s; hibit = 0; while ((ch = *p) != 0) { if (ch & 0x80) hibit++; p++; } if (hibit == 0) { if (s + len < p) return -1; strcpy((char *)d, (char *)s); return 0; } if (0 == slrn_case_strcmp ((unsigned char *)Slrn_Mime_Display_Charset, (unsigned char *)"us-ascii")) { strcpy (charset, "=?iso-8859-1?Q?"); } else sprintf (charset, "=?%s?Q?", Slrn_Mime_Display_Charset); charset_len = strlen (charset); p = s; dmax = d + len; /* leave room for the 0 character */ dmax--; /* algorithm: * 1. copy up to first word that contains one or more eight bit chars. * 2. encode to last char of last word that contains eight bit chars, * or until a RFC822 special is encountered: <>()" * 3. Repeat * The rationale behind this is that 7 bit characters are allowed only * in certain portions of the header where an 8 bit character would never * appear. Thus by encoding only in regions of 8 bit charcters delimited * by the RFC822 specials, we are sure not to decode into forbidden regions. * * RFC1522 forbids strarting or ending a =?..?..?..?= sequence in mid-word. */ while (1) { /* 1. Copy to first 8 bit character */ while (((ch = *p) != 0) && (d < dmax) && (0 == (ch & 0x80))) { *d++ = ch; p++; } if (ch == 0) break; if (d == dmax) return -1; /* walk back until we find a word boundary or begin-of-line */ do { p--; ch = *p; if (0 == isalnum(ch)) { p++; break; } d--; } while (p != s); /* 2. Now start encoding. Work up to the first occurance of an RFC822 * special character or until there are no more eight bit characters * left. RFC1522 */ if (d + charset_len >= dmax) return -1; strcpy ((char *)d, charset); d += charset_len; while ((d < dmax) && ((ch = *p) != 0) && (0 == IS_RFC850_SPECIAL(ch)) && ((hibit != 0) || isalnum(ch))) { if (ch == ' ') *d++ = '_'; else if ((ch & 0x80) || ((ch < 32) && (ch != '\n'))) { /* We need 3 characters to encode this. */ if (d + 3 >= dmax) return -1; sprintf ((char *)d, "=%2X", (int) ch); d += 3; hibit--; } else *d++ = ch; p++; } /* Now turn off encoding. We need two characters */ if (d + 1 >= dmax) return -1; d[0] = '?'; d[1] = '='; d += 2; } *d = 0; return 0; } void slrn_mime_header_encode (char *s, unsigned int bytes) { char buf[1024]; unsigned int len = strlen (s); if (len < sizeof (buf)) { strcpy (buf, s); if (0 == rfc1522_encode ((unsigned char *)s, bytes, (unsigned char *)buf)) return; } /* Cannot do it so strip it to 8 bits. */ while (*s) { *s = *s & 0x7F; s++; } } void slrn_mime_add_headers (FILE *fp) { char *encoding; if (Mime_Posting_Charset == NULL) Mime_Posting_Charset = "us-ascii"; switch (Mime_Posting_Encoding) { default: case ENCODED_8BIT: encoding = "8bit"; break; case ENCODED_7BIT: if (!strcmp ("us-ascii", Mime_Posting_Charset)) return; encoding = "7bit"; break; case ENCODED_QUOTED: encoding = "quoted-printable"; } if (fp != NULL) { fprintf (fp, "\ Mime-Version: 1.0\n\ Content-Type: text/plain; charset=%s\n\ Content-Transfer-Encoding: %s\n", Mime_Posting_Charset, encoding); } else { Slrn_Post_Obj->po_printf ("\ Mime-Version: 1.0\r\n\ Content-Type: text/plain; charset=%s\r\n\ Content-Transfer-Encoding: %s\r\n", Mime_Posting_Charset, encoding); } } FILE *slrn_mime_encode (FILE *fp) { if ((Mime_Posting_Encoding == ENCODED_7BIT) || (Mime_Posting_Encoding == ENCODED_8BIT)) return fp; /* Add encoding later. */ return fp; } #endif /* SLRN_HAS_MIME */
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.