467 lines
13 KiB
Go
467 lines
13 KiB
Go
package rfc822
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"mime"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"git.sr.ht/~rjarry/aerc/lib/log"
|
|
"git.sr.ht/~rjarry/aerc/lib/parse"
|
|
"git.sr.ht/~rjarry/aerc/models"
|
|
"github.com/emersion/go-message"
|
|
_ "github.com/emersion/go-message/charset"
|
|
"github.com/emersion/go-message/mail"
|
|
)
|
|
|
|
type MultipartError struct {
|
|
e error
|
|
}
|
|
|
|
func (u MultipartError) Unwrap() error { return u.e }
|
|
|
|
func (u MultipartError) Error() string {
|
|
return "multipart error: " + u.e.Error()
|
|
}
|
|
|
|
// IsMultipartError returns a boolean indicating whether the error is known to
|
|
// report that the multipart message is malformed and could not be parsed.
|
|
func IsMultipartError(err error) bool {
|
|
return errors.As(err, new(MultipartError))
|
|
}
|
|
|
|
// RFC 1123Z regexp
|
|
var dateRe = regexp.MustCompile(`(((Mon|Tue|Wed|Thu|Fri|Sat|Sun))[,]?\s[0-9]{1,2})\s` +
|
|
`(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s` +
|
|
`([0-9]{4})\s([0-9]{2}):([0-9]{2})(:([0-9]{2}))?\s([\+|\-][0-9]{4})`)
|
|
|
|
func FetchEntityPartReader(e *message.Entity, index []int) (io.Reader, error) {
|
|
if len(index) == 0 {
|
|
// non multipart, simply return everything
|
|
return bufReader(e)
|
|
}
|
|
if mpr := e.MultipartReader(); mpr != nil {
|
|
idx := 0
|
|
for {
|
|
idx++
|
|
part, err := mpr.NextPart()
|
|
switch {
|
|
case message.IsUnknownCharset(err):
|
|
log.Warnf("FetchEntityPartReader: %v", err)
|
|
case message.IsUnknownEncoding(err):
|
|
log.Warnf("FetchEntityPartReader: %v", err)
|
|
case err != nil:
|
|
log.Warnf("FetchEntityPartReader: %v", err)
|
|
return bufReader(e)
|
|
}
|
|
if idx == index[0] {
|
|
rest := index[1:]
|
|
if len(rest) < 1 {
|
|
return bufReader(part)
|
|
}
|
|
return FetchEntityPartReader(part, index[1:])
|
|
}
|
|
}
|
|
}
|
|
return nil, fmt.Errorf("FetchEntityPartReader: unexpected code reached")
|
|
}
|
|
|
|
// TODO: the UI doesn't seem to like readers which aren't buffers
|
|
func bufReader(e *message.Entity) (io.Reader, error) {
|
|
var buf bytes.Buffer
|
|
if _, err := io.Copy(&buf, e.Body); err != nil {
|
|
return nil, err
|
|
}
|
|
return &buf, nil
|
|
}
|
|
|
|
// split a MIME type into its major and minor parts
|
|
func splitMIME(m string) (string, string) {
|
|
parts := strings.Split(m, "/")
|
|
if len(parts) != 2 {
|
|
return parts[0], ""
|
|
}
|
|
return parts[0], parts[1]
|
|
}
|
|
|
|
func fixContentType(h message.Header) (string, map[string]string) {
|
|
ct, rest := h.Get("Content-Type"), ""
|
|
if i := strings.Index(ct, ";"); i > 0 {
|
|
ct, rest = ct[:i], ct[i:]
|
|
}
|
|
|
|
// check if there are quotes around the content type
|
|
if strings.Contains(ct, "\"") {
|
|
header := strings.ReplaceAll(ct, "\"", "")
|
|
if rest != "" {
|
|
header += rest
|
|
}
|
|
h.Set("Content-Type", header)
|
|
if contenttype, params, err := h.ContentType(); err == nil {
|
|
return contenttype, params
|
|
}
|
|
}
|
|
|
|
// if all else fails, return text/plain
|
|
return "text/plain", nil
|
|
}
|
|
|
|
// ParseEntityStructure will parse the message and create a multipart structure
|
|
// for multipart messages. Parsing is done on a best-efforts basis:
|
|
//
|
|
// If the content-type cannot be parsed, ParseEntityStructure will try to fix
|
|
// it; otherwise, it returns a text/plain mime type as a fallback. No error will
|
|
// be returned.
|
|
//
|
|
// If a charset or encoding error is encountered for a message part of a
|
|
// multipart message, the error is logged and ignored. In those cases, we still
|
|
// get a valid message body but the content is just not decoded or converted. No
|
|
// error will be returned.
|
|
//
|
|
// If reading a multipart message fails, ParseEntityStructure will return a
|
|
// multipart error. This error indicates that this message is malformed and
|
|
// there is nothing more we can do. The caller is then advised to use a single
|
|
// text/plain body structure using CreateTextPlainPart().
|
|
func ParseEntityStructure(e *message.Entity) (*models.BodyStructure, error) {
|
|
var body models.BodyStructure
|
|
contentType, ctParams, err := e.Header.ContentType()
|
|
if err != nil {
|
|
// try to fix the error; if all measures fail, then return a
|
|
// text/plain content type to display at least plaintext
|
|
contentType, ctParams = fixContentType(e.Header)
|
|
}
|
|
|
|
mimeType, mimeSubType := splitMIME(contentType)
|
|
body.MIMEType = mimeType
|
|
body.MIMESubType = mimeSubType
|
|
body.Params = ctParams
|
|
body.Description = e.Header.Get("content-description")
|
|
body.Encoding = e.Header.Get("content-transfer-encoding")
|
|
if cd := e.Header.Get("content-disposition"); cd != "" {
|
|
contentDisposition, cdParams, err := e.Header.ContentDisposition()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("could not parse content disposition: %w", err)
|
|
}
|
|
body.Disposition = contentDisposition
|
|
body.DispositionParams = cdParams
|
|
}
|
|
body.Parts = []*models.BodyStructure{}
|
|
if mpr := e.MultipartReader(); mpr != nil {
|
|
for {
|
|
part, err := mpr.NextPart()
|
|
switch {
|
|
case errors.Is(err, io.EOF):
|
|
return &body, nil
|
|
case message.IsUnknownCharset(err):
|
|
log.Warnf("ParseEntityStructure: %v", err)
|
|
case message.IsUnknownEncoding(err):
|
|
log.Warnf("ParseEntityStructure: %v", err)
|
|
case err != nil:
|
|
return nil, MultipartError{err}
|
|
}
|
|
ps, err := ParseEntityStructure(part)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("could not parse child entity structure: %w", err)
|
|
}
|
|
body.Parts = append(body.Parts, ps)
|
|
}
|
|
}
|
|
return &body, nil
|
|
}
|
|
|
|
// CreateTextPlainBody creates a plain-vanilla text/plain body structure.
|
|
func CreateTextPlainBody() *models.BodyStructure {
|
|
body := &models.BodyStructure{}
|
|
body.MIMEType = "text"
|
|
body.MIMESubType = "plain"
|
|
body.Params = map[string]string{"charset": "utf-8"}
|
|
body.Parts = []*models.BodyStructure{}
|
|
return body
|
|
}
|
|
|
|
func parseEnvelope(h *mail.Header) *models.Envelope {
|
|
subj, err := h.Subject()
|
|
if err != nil {
|
|
log.Errorf("could not decode subject: %v", err)
|
|
subj = h.Get("Subject")
|
|
}
|
|
msgID, err := h.MessageID()
|
|
if err != nil {
|
|
log.Errorf("invalid Message-ID header: %v", err)
|
|
// proper parsing failed, so fall back to whatever is there
|
|
msgID = strings.Trim(h.Get("message-id"), "<>")
|
|
}
|
|
var irt string
|
|
irtList := parse.MsgIDList(h, "in-reply-to")
|
|
if len(irtList) > 0 {
|
|
irt = irtList[0]
|
|
}
|
|
date, err := parseDate(h)
|
|
if err != nil {
|
|
// if only the date parsing failed we still get the rest of the
|
|
// envelop structure in a valid state.
|
|
// Date parsing errors are fairly common and it's better to be
|
|
// slightly off than to not be able to read the mails at all
|
|
// hence we continue here
|
|
log.Errorf("invalid Date header: %v", err)
|
|
}
|
|
return &models.Envelope{
|
|
Date: date,
|
|
Subject: subj,
|
|
MessageId: msgID,
|
|
From: parseAddressList(h, "from"),
|
|
ReplyTo: parseAddressList(h, "reply-to"),
|
|
Sender: parseAddressList(h, "sender"),
|
|
To: parseAddressList(h, "to"),
|
|
Cc: parseAddressList(h, "cc"),
|
|
Bcc: parseAddressList(h, "bcc"),
|
|
InReplyTo: irt,
|
|
}
|
|
}
|
|
|
|
// If the date is formatted like ...... -0500 (EST), parser takes the EST part
|
|
// and ignores the numeric offset. Then it might easily fail to guess what EST
|
|
// means unless the proper locale is loaded. This function checks that, so such
|
|
// time values can be safely ignored
|
|
// https://stackoverflow.com/questions/49084316/why-doesnt-gos-time-parse-parse-the-timezone-identifier
|
|
func isDateOK(t time.Time) bool {
|
|
name, offset := t.Zone()
|
|
|
|
// non-zero offsets are fine
|
|
if offset != 0 {
|
|
return true
|
|
}
|
|
|
|
// zero offset is ok if that's UTC or GMT
|
|
if name == "UTC" || name == "GMT" || name == "" {
|
|
return true
|
|
}
|
|
|
|
// otherwise this date should not be trusted
|
|
return false
|
|
}
|
|
|
|
// parseDate tries to parse the date from the Date header with non std formats
|
|
// if this fails it tries to parse the received header as well
|
|
func parseDate(h *mail.Header) (time.Time, error) {
|
|
// here we store the best parsed time we have so far
|
|
// if we find no "correct" time, we'll use that
|
|
bestDate := time.Time{}
|
|
|
|
// trying the easy way
|
|
t, err := h.Date()
|
|
if err == nil {
|
|
if isDateOK(t) {
|
|
return t, nil
|
|
}
|
|
bestDate = t
|
|
}
|
|
text := h.Get("date")
|
|
|
|
// sometimes, no error occurs but the date is empty.
|
|
// In this case, guess time from received header field
|
|
if text == "" {
|
|
t, err := parseReceivedHeader(h)
|
|
if err == nil {
|
|
return t, nil
|
|
}
|
|
}
|
|
layouts := []string{
|
|
// X-Mailer: EarthLink Zoo Mail 1.0
|
|
"Mon, _2 Jan 2006 15:04:05 -0700 (GMT-07:00)",
|
|
}
|
|
for _, layout := range layouts {
|
|
if t, err := time.Parse(layout, text); err == nil {
|
|
if isDateOK(t) {
|
|
return t, nil
|
|
}
|
|
bestDate = t
|
|
}
|
|
}
|
|
|
|
// still no success, try the received header
|
|
t, err = parseReceivedHeader(h)
|
|
if err == nil {
|
|
if isDateOK(t) {
|
|
return t, nil
|
|
}
|
|
bestDate = t
|
|
}
|
|
|
|
// do we have at least something?
|
|
if !bestDate.IsZero() {
|
|
return bestDate, nil
|
|
}
|
|
|
|
// sad...
|
|
return time.Time{}, fmt.Errorf("unrecognized date format: %s", text)
|
|
}
|
|
|
|
func parseReceivedHeader(h *mail.Header) (time.Time, error) {
|
|
guess, err := h.Text("received")
|
|
if err != nil {
|
|
return time.Time{}, fmt.Errorf("received header not parseable: %w",
|
|
err)
|
|
}
|
|
return time.Parse(time.RFC1123Z, dateRe.FindString(guess))
|
|
}
|
|
|
|
func parseAddressList(h *mail.Header, key string) []*mail.Address {
|
|
addrs, err := h.AddressList(key)
|
|
if len(addrs) == 0 {
|
|
// Only consider the error if the returned address list is empty
|
|
// Sometimes, we get a list of addresses and unknown charset
|
|
// errors which are not fatal.
|
|
if val := h.Get(key); val != "" {
|
|
if err != nil {
|
|
log.Errorf("%s: %s: %v", key, val, err)
|
|
}
|
|
// Header value is not empty but parsing completely
|
|
// failed. Return something so that the message can at
|
|
// least be displayed.
|
|
return []*mail.Address{{Name: val}}
|
|
}
|
|
return nil
|
|
}
|
|
for _, addr := range addrs {
|
|
// Handle invalid headers with quoted *AND* encoded names
|
|
if strings.HasPrefix(addr.Name, "=?") && strings.HasSuffix(addr.Name, "?=") {
|
|
d := mime.WordDecoder{CharsetReader: message.CharsetReader}
|
|
addr.Name, _ = d.DecodeHeader(addr.Name)
|
|
}
|
|
}
|
|
// If we got at least one address, ignore any returned error.
|
|
return addrs
|
|
}
|
|
|
|
// RawMessage is an interface that describes a raw message
|
|
type RawMessage interface {
|
|
NewReader() (io.ReadCloser, error)
|
|
ModelFlags() (models.Flags, error)
|
|
Labels() ([]string, error)
|
|
UID() models.UID
|
|
}
|
|
|
|
// MessageInfo populates a models.MessageInfo struct for the message.
|
|
// based on the reader returned by NewReader
|
|
func MessageInfo(raw RawMessage) (*models.MessageInfo, error) {
|
|
var parseErr error
|
|
r, err := raw.NewReader()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer r.Close()
|
|
msg, err := ReadMessage(r)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("could not read message: %w", err)
|
|
}
|
|
bs, err := ParseEntityStructure(msg)
|
|
if IsMultipartError(err) {
|
|
log.Warnf("multipart error: %v", err)
|
|
bs = CreateTextPlainBody()
|
|
} else if err != nil {
|
|
return nil, fmt.Errorf("could not get structure: %w", err)
|
|
}
|
|
h := &mail.Header{Header: msg.Header}
|
|
env := parseEnvelope(h)
|
|
recDate, _ := parseReceivedHeader(h)
|
|
if recDate.IsZero() {
|
|
// better than nothing, if incorrect
|
|
recDate = env.Date
|
|
}
|
|
flags, err := raw.ModelFlags()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
labels, err := raw.Labels()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &models.MessageInfo{
|
|
BodyStructure: bs,
|
|
Envelope: env,
|
|
Flags: flags,
|
|
Labels: labels,
|
|
InternalDate: recDate,
|
|
RFC822Headers: h,
|
|
Size: 0,
|
|
Uid: raw.UID(),
|
|
Error: parseErr,
|
|
}, nil
|
|
}
|
|
|
|
// MessageHeaders populates a models.MessageInfo struct for the message.
|
|
// based on the reader returned by NewReader. Minimal information is included.
|
|
// There is no body structure or RFC822Headers set
|
|
func MessageHeaders(raw RawMessage) (*models.MessageInfo, error) {
|
|
var parseErr error
|
|
r, err := raw.NewReader()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer r.Close()
|
|
msg, err := ReadMessage(r)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("could not read message: %w", err)
|
|
}
|
|
h := &mail.Header{Header: msg.Header}
|
|
env := parseEnvelope(h)
|
|
recDate, _ := parseReceivedHeader(h)
|
|
if recDate.IsZero() {
|
|
// better than nothing, if incorrect
|
|
recDate = env.Date
|
|
}
|
|
flags, err := raw.ModelFlags()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
labels, err := raw.Labels()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &models.MessageInfo{
|
|
Envelope: env,
|
|
Flags: flags,
|
|
Labels: labels,
|
|
InternalDate: recDate,
|
|
Refs: parse.MsgIDList(h, "references"),
|
|
Size: 0,
|
|
Uid: raw.UID(),
|
|
Error: parseErr,
|
|
}, nil
|
|
}
|
|
|
|
// NewCRLFReader returns a reader with CRLF line endings
|
|
func NewCRLFReader(r io.Reader) io.Reader {
|
|
var buf bytes.Buffer
|
|
scanner := bufio.NewScanner(r)
|
|
for scanner.Scan() {
|
|
buf.WriteString(scanner.Text() + "\r\n")
|
|
}
|
|
return &buf
|
|
}
|
|
|
|
// ReadMessage is a wrapper for the message.Read function to read a message
|
|
// from r. The message's encoding and charset are automatically decoded to
|
|
// UTF-8. If an unknown charset or unknown encoding is encountered, the error is
|
|
// logged but a nil error is returned since the entity object can still be read.
|
|
func ReadMessage(r io.Reader) (*message.Entity, error) {
|
|
entity, err := message.Read(r)
|
|
switch {
|
|
case message.IsUnknownCharset(err):
|
|
// message body is valid, just not converted, so continue
|
|
log.Warnf("ReadMessage: %v", err)
|
|
case message.IsUnknownEncoding(err):
|
|
// message body is valid, just not decoded, so continue
|
|
log.Warnf("ReadMessage: %v", err)
|
|
case err != nil:
|
|
return nil, fmt.Errorf("could not read message: %w", err)
|
|
}
|
|
return entity, nil
|
|
}
|