Bincimap 2.0.17
Easy Imapping
Loading...
Searching...
No Matches
mime-parsefull.cc
Go to the documentation of this file.
1
7#include "mime.h"
8#include "mime-utils.h"
9#include "mime-inputsource.h"
10#include "convert.h"
11#include <string>
12#include <vector>
13#include <map>
14#include <exception>
15#include <iostream>
16
17#include <string.h>
18#include <ctype.h>
19#include <stdio.h>
20#include <errno.h>
21
23
24using namespace ::std;
25
26//------------------------------------------------------------------------
28{
29 if (allIsParsed)
30 return;
31
32 allIsParsed = true;
33
34 if (!mimeSource || mimeSource->getFileDescriptor() != fd) {
35 delete mimeSource;
37 } else {
39 }
40
42 headerlength = 0;
44 bodylength = 0;
45 size = 0;
46 messagerfc822 = false;
47 multipart = false;
48
49 int bsize = 0;
50 string bound;
51 MimePart::parseFull(bound, bsize);
52
53 // eat any trailing junk to get the correct size
54 char c;
55 while (mimeSource->getChar(&c));
56
58
59}
60
61//------------------------------------------------------------------------
62static bool parseOneHeaderLine(Binc::Header *header, unsigned int *nlines)
63{
64 using namespace ::Binc;
65 char c;
66 bool eof = false;
67 char cqueue[4];
68 string name;
69 string content;
70
71 while (mimeSource->getChar(&c)) {
72 // If we encounter a \r before we got to the first ':', then
73 // rewind back to the start of the line and assume we're at the
74 // start of the body.
75 if (c == '\r') {
76 for (int i = 0; i < (int) name.length() + 1; ++i)
78 return false;
79 }
80
81 // A colon marks the end of the header name
82 if (c == ':') break;
83
84 // Otherwise add to the header name
85 name += c;
86 }
87
88 cqueue[0] = '\0';
89 cqueue[1] = '\0';
90 cqueue[2] = '\0';
91 cqueue[3] = '\0';
92
93 // Read until the end of the header.
94 bool endOfHeaders = false;
95 while (!endOfHeaders) {
96 if (!mimeSource->getChar(&c)) {
97 eof = true;
98 break;
99 }
100
101 if (c == '\n') ++*nlines;
102
103 for (int i = 0; i < 3; ++i)
104 cqueue[i] = cqueue[i + 1];
105 cqueue[3] = c;
106
107 if (strncmp(cqueue, "\r\n\r\n", 4) == 0) {
108 endOfHeaders = true;
109 break;
110 }
111
112 // If the last character was a newline, and the first now is not
113 // whitespace, then rewind one character and store the current
114 // key,value pair.
115 if (cqueue[2] == '\n' && c != ' ' && c != '\t') {
116 if (content.length() > 2)
117 content.resize(content.length() - 2);
118
119 trim(content);
120 header->add(name, content);
121
122 if (c != '\r') {
124 if (c == '\n') --*nlines;
125 return true;
126 }
127
128 mimeSource->getChar(&c);
129 return false;
130 }
131
132 content += c;
133 }
134
135 if (name != "") {
136 if (content.length() > 2)
137 content.resize(content.length() - 2);
138 header->add(name, content);
139 }
140
141 delete mimeSource;
142 return !(eof || endOfHeaders);
143}
144
145//------------------------------------------------------------------------
146static void parseHeader(Binc::Header *header, unsigned int *nlines)
147{
148 while (parseOneHeaderLine(header, nlines))
149 { }
150}
151
152//------------------------------------------------------------------------
153static void analyzeHeader(Binc::Header *header, bool *multipart,
154 bool *messagerfc822, string *subtype, string *boundary)
155{
156 using namespace ::Binc;
157
158 // Do simple parsing of headers to determine the
159 // type of message (multipart,messagerfc822 etc)
160 HeaderItem ctype;
161 if (header->getFirstHeader("content-type", ctype)) {
162 vector<string> types;
163 split(ctype.getValue(), ";", types);
164
165 if (types.size() > 0) {
166 // first element should describe content type
167 string tmp = types[0];
168 trim(tmp);
169 vector<string> v;
170 split(tmp, "/", v);
171 string key, value;
172
173 key = (v.size() > 0) ? v[0] : "text";
174 value = (v.size() > 1) ? v[1] : "plain";
175 lowercase(key);
176
177 if (key == "multipart") {
178 *multipart = true;
179 lowercase(value);
180 *subtype = value;
181 } else if (key == "message") {
182 lowercase(value);
183 if (value == "rfc822")
184 *messagerfc822 = true;
185 }
186 }
187
188 for (vector<string>::const_iterator i = types.begin();
189 i != types.end(); ++i) {
190 string element = *i;
191 trim(element);
192
193 if (element.find("=") != string::npos) {
194 string::size_type pos = element.find('=');
195 string key = element.substr(0, pos);
196 string value = element.substr(pos + 1);
197
198 lowercase(key);
199 trim(key);
200
201 if (key == "boundary") {
202 trim(value, " \"");
203 *boundary = value;
204 }
205 }
206 }
207 }
208}
209
210static void parseMessageRFC822(vector<Binc::MimePart> *members,
211 bool *foundendofpart,
212 unsigned int *bodylength,
213 unsigned int *nbodylines,
214 const string &toboundary)
215{
216 using namespace ::Binc;
217
218 // message rfc822 means a completely enclosed mime document. we
219 // call the parser recursively, and pass on the boundary string
220 // that we got. when parse() finds this boundary, it returns 0. if
221 // it finds the end boundary (boundary + "--"), it returns != 0.
222 MimePart m;
223
224 unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
225
226 // parsefull returns the number of bytes that need to be removed
227 // from the body because of the terminating boundary string.
228 int bsize = 0;
229 if (m.parseFull(toboundary, bsize))
230 *foundendofpart = true;
231
232 // make sure bodylength doesn't overflow
233 *bodylength = mimeSource->getOffset();
234 if (*bodylength >= bodystartoffsetcrlf) {
235 *bodylength -= bodystartoffsetcrlf;
236 if (*bodylength >= (unsigned int) bsize) {
237 *bodylength -= (unsigned int) bsize;
238 } else {
239 *bodylength = 0;
240 }
241 } else {
242 *bodylength = 0;
243 }
244
245 *nbodylines += m.getNofLines();
246
247 members->push_back(m);
248}
249
250static bool skipUntilBoundary(const string &delimiter,
251 unsigned int *nlines, bool *eof)
252{
253 int endpos = delimiter.length();
254 char *delimiterqueue = 0;
255 int delimiterpos = 0;
256 const char *delimiterStr = delimiter.c_str();
257 if (delimiter != "") {
258 delimiterqueue = new char[endpos];
259 memset(delimiterqueue, 0, endpos);
260 }
261
262 // first, skip to the first delimiter string. Anything between the
263 // header and the first delimiter string is simply ignored (it's
264 // usually a text message intended for non-mime clients)
265 char c;
266
267 bool foundBoundary = false;
268 for (;;) {
269 if (!mimeSource->getChar(&c)) {
270 *eof = true;
271 break;
272 }
273
274 if (c == '\n') ++*nlines;
275
276 // if there is no delimiter, we just read until the end of the
277 // file.
278 if (!delimiterqueue) continue;
279
280 delimiterqueue[delimiterpos++ % endpos] = c;
281
282 if (compareStringToQueue(delimiterStr, delimiterqueue,
283 delimiterpos, endpos)) {
284 foundBoundary = true;
285 break;
286 }
287 }
288
289 delete[] delimiterqueue;
290 delimiterqueue = 0;
291
292 return foundBoundary;
293}
294
295
296static void parseMultipart(const string &boundary,
297 const string &toboundary,
298 bool *eof,
299 unsigned int *nlines,
300 int *boundarysize,
301 bool *foundendofpart,
302 unsigned int *bodylength,
303 vector<Binc::MimePart> *members)
304{
305 using namespace ::Binc;
306 unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
307
308 // multipart parsing starts with skipping to the first
309 // boundary. then we call parse() for all parts. the last parse()
310 // command will return a code indicating that it found the last
311 // boundary of this multipart. Note that the first boundary does
312 // not have to start with CRLF.
313 string delimiter = "--" + boundary;
314
315 skipUntilBoundary(delimiter, nlines, eof);
316
317 if (!eof) *boundarysize = delimiter.size();
318
319 // Read two more characters. This may be CRLF, it may be "--" and
320 // it may be any other two characters.
321
322 char a;
323 if (!mimeSource->getChar(&a)) *eof = true;
324 if (a == '\n') ++*nlines;
325
326 char b;
327 if (!mimeSource->getChar(&b)) *eof = true;
328 if (b == '\n') ++*nlines;
329
330 // If we find two dashes after the boundary, then this is the end
331 // of boundary marker.
332 if (!*eof) {
333 if (a == '-' && b == '-') {
334 *foundendofpart = true;
335 *boundarysize += 2;
336
337 if (!mimeSource->getChar(&a)) *eof = true;
338 if (a == '\n') ++*nlines;
339 if (!mimeSource->getChar(&b)) *eof = true;
340 if (b == '\n') ++*nlines;
341 }
342
343 if (a == '\r' && b == '\n') {
344 // This exception is to handle a special case where the
345 // delimiter of one part is not followed by CRLF, but
346 // immediately followed by a CRLF prefixed delimiter.
347 if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b))
348 *eof = true;
349 else if (a == '-' && b == '-') {
354 } else {
357 }
358
359 *boundarysize += 2;
360 } else {
363 }
364 }
365
366 // read all mime parts.
367 if (!*foundendofpart && !*eof) {
368 bool quit = false;
369 do {
370 MimePart m;
371
372 // If parseFull returns != 0, then it encountered the multipart's
373 // final boundary.
374 int bsize = 0;
375 if (m.parseFull(boundary, bsize)) {
376 quit = true;
377 *boundarysize = bsize;
378 }
379
380 members->push_back(m);
381
382 } while (!quit);
383 }
384
385 if (!*foundendofpart && !*eof) {
386 // multipart parsing starts with skipping to the first
387 // boundary. then we call parse() for all parts. the last parse()
388 // command will return a code indicating that it found the last
389 // boundary of this multipart. Note that the first boundary does
390 // not have to start with CRLF.
391 string delimiter = "\r\n--" + toboundary;
392
393 skipUntilBoundary(delimiter, nlines, eof);
394
395 if (!*eof) *boundarysize = delimiter.size();
396
397 // Read two more characters. This may be CRLF, it may be "--" and
398 // it may be any other two characters.
399
400 char a = '\0';
401 if (!mimeSource->getChar(&a)) *eof = true;
402 if (a == '\n') ++*nlines;
403
404 char b = '\0';
405 if (!mimeSource->getChar(&b)) *eof = true;
406 if (b == '\n') ++*nlines;
407
408 // If we find two dashes after the boundary, then this is the end
409 // of boundary marker.
410 if (!*eof) {
411 if (a == '-' && b == '-') {
412 *foundendofpart = true;
413 *boundarysize += 2;
414 if (!mimeSource->getChar(&a))
415 *eof = true;
416 if (a == '\n')
417 ++*nlines;
418 if (!mimeSource->getChar(&b))
419 *eof = true;
420 if (b == '\n')
421 ++*nlines;
422 }
423
424 if (a == '\r' && b == '\n') {
425 // This exception is to handle a special case where the
426 // delimiter of one part is not followed by CRLF, but
427 // immediately followed by a CRLF prefixed delimiter.
428 if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b))
429 *eof = true;
430 else if (a == '-' && b == '-') {
435 } else {
438 }
439
440 *boundarysize += 2;
441 } else {
444 }
445 }
446 }
447
448 // make sure bodylength doesn't overflow
449 *bodylength = mimeSource->getOffset();
450 if (*bodylength >= bodystartoffsetcrlf) {
451 *bodylength -= bodystartoffsetcrlf;
452 if (*bodylength >= (unsigned int) *boundarysize) {
453 *bodylength -= (unsigned int) *boundarysize;
454 } else {
455 *bodylength = 0;
456 }
457 } else {
458 *bodylength = 0;
459 }
460}
461
462static void parseSinglePart(const string &toboundary,
463 int *boundarysize,
464 unsigned int *nbodylines,
465 unsigned int *nlines,
466 bool *eof, bool *foundendofpart,
467 unsigned int *bodylength)
468{
469 using namespace ::Binc;
470 unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
471
472 // If toboundary is empty, then we read until the end of the
473 // file. Otherwise we will read until we encounter toboundary.
474 string _toboundary;
475 if (toboundary != "") {
476 _toboundary = "\r\n--";
477 _toboundary += toboundary;
478 }
479
480 // if (skipUntilBoundary(_toboundary, nlines, eof))
481 // *boundarysize = _toboundary.length();
482
483 char *boundaryqueue = 0;
484 int endpos = _toboundary.length();
485 if (toboundary != "") {
486 boundaryqueue = new char[endpos];
487 memset(boundaryqueue, 0, endpos);
488 }
489 int boundarypos = 0;
490
491 *boundarysize = 0;
492
493 const char *_toboundaryStr = _toboundary.c_str();
494 string line;
495 bool toboundaryIsEmpty = (toboundary == "");
496 char c;
497 while (mimeSource->getChar(&c)) {
498 if (c == '\n') { ++*nbodylines; ++*nlines; }
499 if (toboundaryIsEmpty) continue;
500
501 // find boundary
502 boundaryqueue[boundarypos++ % endpos] = c;
503
504 if (compareStringToQueue(_toboundaryStr, boundaryqueue,
505 boundarypos, endpos)) {
506 *boundarysize = _toboundary.length();
507 break;
508 }
509 }
510
511 delete[] boundaryqueue;
512
513 if (toboundary != "") {
514
515 char a;
516 if (!mimeSource->getChar(&a)) *eof = true;
517 if (a == '\n') ++*nlines;
518
519 char b;
520 if (!mimeSource->getChar(&b)) *eof = true;
521 if (b == '\n') ++*nlines;
522
523 if (a == '-' && b == '-') {
524 *boundarysize += 2;
525 *foundendofpart = true;
526 if (!mimeSource->getChar(&a)) *eof = true;
527 if (a == '\n') ++*nlines;
528 if (!mimeSource->getChar(&b)) *eof = true;
529 if (b == '\n') ++*nlines;
530 }
531
532 if (a == '\r' && b == '\n') {
533 // This exception is to handle a special case where the
534 // delimiter of one part is not followed by CRLF, but
535 // immediately followed by a CRLF prefixed delimiter.
536 if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b))
537 *eof = true;
538 else if (a == '-' && b == '-') {
543 } else {
546 }
547
548 *boundarysize += 2;
549 } else {
552 }
553 }
554
555 // make sure bodylength doesn't overflow
556 *bodylength = mimeSource->getOffset();
557 if (*bodylength >= bodystartoffsetcrlf) {
558 *bodylength -= bodystartoffsetcrlf;
559 if (*bodylength >= (unsigned int) *boundarysize) {
560 *bodylength -= (unsigned int) *boundarysize;
561 } else {
562 *bodylength = 0;
563 }
564 } else {
565 *bodylength = 0;
566 }
567
568}
569
570//------------------------------------------------------------------------
571int Binc::MimePart::parseFull(const string &toboundary,
572 int &boundarysize) const
573{
574 headerstartoffsetcrlf = mimeSource->getOffset();
575
576 // Parse the header of this mime part.
577 parseHeader(&h, &nlines);
578
579 // Headerlength includes the seperating CRLF. Body starts after the
580 // CRLF.
581 headerlength = mimeSource->getOffset() - headerstartoffsetcrlf;
582 bodystartoffsetcrlf = mimeSource->getOffset();
583 bodylength = 0;
584
585 // Determine the type of mime part by looking at fields in the
586 // header.
587 analyzeHeader(&h, &multipart, &messagerfc822, &subtype, &boundary);
588
589 bool eof = false;
590 bool foundendofpart = false;
591
592 if (messagerfc822) {
593 parseMessageRFC822(&members, &foundendofpart, &bodylength,
594 &nbodylines, toboundary);
595
596 } else if (multipart) {
597 parseMultipart(boundary, toboundary, &eof, &nlines, &boundarysize,
598 &foundendofpart, &bodylength, &members);
599 } else {
600 parseSinglePart(toboundary, &boundarysize, &nbodylines, &nlines,
601 &eof, &foundendofpart, &bodylength);
602 }
603
604 return (eof || foundendofpart) ? 1 : 0;
605}
void add(const std::string &name, const std::string &content)
Definition: mime.cc:131
bool getFirstHeader(const std::string &key, HeaderItem &dest) const
Definition: mime.cc:89
void parseFull(int fd) const
virtual void reset(void)
int getFileDescriptor(void) const
unsigned int getOffset(void) const
virtual int parseFull(const std::string &toboundary, int &boundarysize) const
unsigned int headerstartoffsetcrlf
Definition: mime.h:57
bool multipart
Definition: mime.h:52
bool messagerfc822
Definition: mime.h:53
unsigned int bodylength
Definition: mime.h:61
unsigned int bodystartoffsetcrlf
Definition: mime.h:60
unsigned int size
Definition: mime.h:64
unsigned int headerlength
Definition: mime.h:58
Declaration of miscellaneous convertion functions.
The base class of the MIME input source.
Binc::MimeInputSource * mimeSource
Binc::MimeInputSource * mimeSource
bool compareStringToQueue(const char *s_in, char *bqueue, int pos, int size)
Definition: mime-utils.h:17
Declaration of main mime parser components.
Definition: bincimapd.cc:9
void split(const std::string &s_in, const std::string &delim, std::vector< std::string > &dest, bool skipempty=true)
Definition: convert.h:177
void lowercase(std::string &input)
Definition: convert.h:122
void trim(std::string &s_in, const std::string &chars=" \t\r\n")
Definition: convert.h:137