init: pristine aerc 0.20.0 source
This commit is contained in:
@@ -0,0 +1,466 @@
|
||||
package rfc822
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"mime"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.sr.ht/~rjarry/aerc/lib/log"
|
||||
"git.sr.ht/~rjarry/aerc/lib/parse"
|
||||
"git.sr.ht/~rjarry/aerc/models"
|
||||
"github.com/emersion/go-message"
|
||||
_ "github.com/emersion/go-message/charset"
|
||||
"github.com/emersion/go-message/mail"
|
||||
)
|
||||
|
||||
type MultipartError struct {
|
||||
e error
|
||||
}
|
||||
|
||||
func (u MultipartError) Unwrap() error { return u.e }
|
||||
|
||||
func (u MultipartError) Error() string {
|
||||
return "multipart error: " + u.e.Error()
|
||||
}
|
||||
|
||||
// IsMultipartError returns a boolean indicating whether the error is known to
|
||||
// report that the multipart message is malformed and could not be parsed.
|
||||
func IsMultipartError(err error) bool {
|
||||
return errors.As(err, new(MultipartError))
|
||||
}
|
||||
|
||||
// RFC 1123Z regexp
|
||||
var dateRe = regexp.MustCompile(`(((Mon|Tue|Wed|Thu|Fri|Sat|Sun))[,]?\s[0-9]{1,2})\s` +
|
||||
`(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s` +
|
||||
`([0-9]{4})\s([0-9]{2}):([0-9]{2})(:([0-9]{2}))?\s([\+|\-][0-9]{4})`)
|
||||
|
||||
func FetchEntityPartReader(e *message.Entity, index []int) (io.Reader, error) {
|
||||
if len(index) == 0 {
|
||||
// non multipart, simply return everything
|
||||
return bufReader(e)
|
||||
}
|
||||
if mpr := e.MultipartReader(); mpr != nil {
|
||||
idx := 0
|
||||
for {
|
||||
idx++
|
||||
part, err := mpr.NextPart()
|
||||
switch {
|
||||
case message.IsUnknownCharset(err):
|
||||
log.Warnf("FetchEntityPartReader: %v", err)
|
||||
case message.IsUnknownEncoding(err):
|
||||
log.Warnf("FetchEntityPartReader: %v", err)
|
||||
case err != nil:
|
||||
log.Warnf("FetchEntityPartReader: %v", err)
|
||||
return bufReader(e)
|
||||
}
|
||||
if idx == index[0] {
|
||||
rest := index[1:]
|
||||
if len(rest) < 1 {
|
||||
return bufReader(part)
|
||||
}
|
||||
return FetchEntityPartReader(part, index[1:])
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("FetchEntityPartReader: unexpected code reached")
|
||||
}
|
||||
|
||||
// TODO: the UI doesn't seem to like readers which aren't buffers
|
||||
func bufReader(e *message.Entity) (io.Reader, error) {
|
||||
var buf bytes.Buffer
|
||||
if _, err := io.Copy(&buf, e.Body); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &buf, nil
|
||||
}
|
||||
|
||||
// split a MIME type into its major and minor parts
|
||||
func splitMIME(m string) (string, string) {
|
||||
parts := strings.Split(m, "/")
|
||||
if len(parts) != 2 {
|
||||
return parts[0], ""
|
||||
}
|
||||
return parts[0], parts[1]
|
||||
}
|
||||
|
||||
func fixContentType(h message.Header) (string, map[string]string) {
|
||||
ct, rest := h.Get("Content-Type"), ""
|
||||
if i := strings.Index(ct, ";"); i > 0 {
|
||||
ct, rest = ct[:i], ct[i:]
|
||||
}
|
||||
|
||||
// check if there are quotes around the content type
|
||||
if strings.Contains(ct, "\"") {
|
||||
header := strings.ReplaceAll(ct, "\"", "")
|
||||
if rest != "" {
|
||||
header += rest
|
||||
}
|
||||
h.Set("Content-Type", header)
|
||||
if contenttype, params, err := h.ContentType(); err == nil {
|
||||
return contenttype, params
|
||||
}
|
||||
}
|
||||
|
||||
// if all else fails, return text/plain
|
||||
return "text/plain", nil
|
||||
}
|
||||
|
||||
// ParseEntityStructure will parse the message and create a multipart structure
|
||||
// for multipart messages. Parsing is done on a best-efforts basis:
|
||||
//
|
||||
// If the content-type cannot be parsed, ParseEntityStructure will try to fix
|
||||
// it; otherwise, it returns a text/plain mime type as a fallback. No error will
|
||||
// be returned.
|
||||
//
|
||||
// If a charset or encoding error is encountered for a message part of a
|
||||
// multipart message, the error is logged and ignored. In those cases, we still
|
||||
// get a valid message body but the content is just not decoded or converted. No
|
||||
// error will be returned.
|
||||
//
|
||||
// If reading a multipart message fails, ParseEntityStructure will return a
|
||||
// multipart error. This error indicates that this message is malformed and
|
||||
// there is nothing more we can do. The caller is then advised to use a single
|
||||
// text/plain body structure using CreateTextPlainPart().
|
||||
func ParseEntityStructure(e *message.Entity) (*models.BodyStructure, error) {
|
||||
var body models.BodyStructure
|
||||
contentType, ctParams, err := e.Header.ContentType()
|
||||
if err != nil {
|
||||
// try to fix the error; if all measures fail, then return a
|
||||
// text/plain content type to display at least plaintext
|
||||
contentType, ctParams = fixContentType(e.Header)
|
||||
}
|
||||
|
||||
mimeType, mimeSubType := splitMIME(contentType)
|
||||
body.MIMEType = mimeType
|
||||
body.MIMESubType = mimeSubType
|
||||
body.Params = ctParams
|
||||
body.Description = e.Header.Get("content-description")
|
||||
body.Encoding = e.Header.Get("content-transfer-encoding")
|
||||
if cd := e.Header.Get("content-disposition"); cd != "" {
|
||||
contentDisposition, cdParams, err := e.Header.ContentDisposition()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not parse content disposition: %w", err)
|
||||
}
|
||||
body.Disposition = contentDisposition
|
||||
body.DispositionParams = cdParams
|
||||
}
|
||||
body.Parts = []*models.BodyStructure{}
|
||||
if mpr := e.MultipartReader(); mpr != nil {
|
||||
for {
|
||||
part, err := mpr.NextPart()
|
||||
switch {
|
||||
case errors.Is(err, io.EOF):
|
||||
return &body, nil
|
||||
case message.IsUnknownCharset(err):
|
||||
log.Warnf("ParseEntityStructure: %v", err)
|
||||
case message.IsUnknownEncoding(err):
|
||||
log.Warnf("ParseEntityStructure: %v", err)
|
||||
case err != nil:
|
||||
return nil, MultipartError{err}
|
||||
}
|
||||
ps, err := ParseEntityStructure(part)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not parse child entity structure: %w", err)
|
||||
}
|
||||
body.Parts = append(body.Parts, ps)
|
||||
}
|
||||
}
|
||||
return &body, nil
|
||||
}
|
||||
|
||||
// CreateTextPlainBody creates a plain-vanilla text/plain body structure.
|
||||
func CreateTextPlainBody() *models.BodyStructure {
|
||||
body := &models.BodyStructure{}
|
||||
body.MIMEType = "text"
|
||||
body.MIMESubType = "plain"
|
||||
body.Params = map[string]string{"charset": "utf-8"}
|
||||
body.Parts = []*models.BodyStructure{}
|
||||
return body
|
||||
}
|
||||
|
||||
func parseEnvelope(h *mail.Header) *models.Envelope {
|
||||
subj, err := h.Subject()
|
||||
if err != nil {
|
||||
log.Errorf("could not decode subject: %v", err)
|
||||
subj = h.Get("Subject")
|
||||
}
|
||||
msgID, err := h.MessageID()
|
||||
if err != nil {
|
||||
log.Errorf("invalid Message-ID header: %v", err)
|
||||
// proper parsing failed, so fall back to whatever is there
|
||||
msgID = strings.Trim(h.Get("message-id"), "<>")
|
||||
}
|
||||
var irt string
|
||||
irtList := parse.MsgIDList(h, "in-reply-to")
|
||||
if len(irtList) > 0 {
|
||||
irt = irtList[0]
|
||||
}
|
||||
date, err := parseDate(h)
|
||||
if err != nil {
|
||||
// if only the date parsing failed we still get the rest of the
|
||||
// envelop structure in a valid state.
|
||||
// Date parsing errors are fairly common and it's better to be
|
||||
// slightly off than to not be able to read the mails at all
|
||||
// hence we continue here
|
||||
log.Errorf("invalid Date header: %v", err)
|
||||
}
|
||||
return &models.Envelope{
|
||||
Date: date,
|
||||
Subject: subj,
|
||||
MessageId: msgID,
|
||||
From: parseAddressList(h, "from"),
|
||||
ReplyTo: parseAddressList(h, "reply-to"),
|
||||
Sender: parseAddressList(h, "sender"),
|
||||
To: parseAddressList(h, "to"),
|
||||
Cc: parseAddressList(h, "cc"),
|
||||
Bcc: parseAddressList(h, "bcc"),
|
||||
InReplyTo: irt,
|
||||
}
|
||||
}
|
||||
|
||||
// If the date is formatted like ...... -0500 (EST), parser takes the EST part
|
||||
// and ignores the numeric offset. Then it might easily fail to guess what EST
|
||||
// means unless the proper locale is loaded. This function checks that, so such
|
||||
// time values can be safely ignored
|
||||
// https://stackoverflow.com/questions/49084316/why-doesnt-gos-time-parse-parse-the-timezone-identifier
|
||||
func isDateOK(t time.Time) bool {
|
||||
name, offset := t.Zone()
|
||||
|
||||
// non-zero offsets are fine
|
||||
if offset != 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
// zero offset is ok if that's UTC or GMT
|
||||
if name == "UTC" || name == "GMT" || name == "" {
|
||||
return true
|
||||
}
|
||||
|
||||
// otherwise this date should not be trusted
|
||||
return false
|
||||
}
|
||||
|
||||
// parseDate tries to parse the date from the Date header with non std formats
|
||||
// if this fails it tries to parse the received header as well
|
||||
func parseDate(h *mail.Header) (time.Time, error) {
|
||||
// here we store the best parsed time we have so far
|
||||
// if we find no "correct" time, we'll use that
|
||||
bestDate := time.Time{}
|
||||
|
||||
// trying the easy way
|
||||
t, err := h.Date()
|
||||
if err == nil {
|
||||
if isDateOK(t) {
|
||||
return t, nil
|
||||
}
|
||||
bestDate = t
|
||||
}
|
||||
text := h.Get("date")
|
||||
|
||||
// sometimes, no error occurs but the date is empty.
|
||||
// In this case, guess time from received header field
|
||||
if text == "" {
|
||||
t, err := parseReceivedHeader(h)
|
||||
if err == nil {
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
layouts := []string{
|
||||
// X-Mailer: EarthLink Zoo Mail 1.0
|
||||
"Mon, _2 Jan 2006 15:04:05 -0700 (GMT-07:00)",
|
||||
}
|
||||
for _, layout := range layouts {
|
||||
if t, err := time.Parse(layout, text); err == nil {
|
||||
if isDateOK(t) {
|
||||
return t, nil
|
||||
}
|
||||
bestDate = t
|
||||
}
|
||||
}
|
||||
|
||||
// still no success, try the received header
|
||||
t, err = parseReceivedHeader(h)
|
||||
if err == nil {
|
||||
if isDateOK(t) {
|
||||
return t, nil
|
||||
}
|
||||
bestDate = t
|
||||
}
|
||||
|
||||
// do we have at least something?
|
||||
if !bestDate.IsZero() {
|
||||
return bestDate, nil
|
||||
}
|
||||
|
||||
// sad...
|
||||
return time.Time{}, fmt.Errorf("unrecognized date format: %s", text)
|
||||
}
|
||||
|
||||
func parseReceivedHeader(h *mail.Header) (time.Time, error) {
|
||||
guess, err := h.Text("received")
|
||||
if err != nil {
|
||||
return time.Time{}, fmt.Errorf("received header not parseable: %w",
|
||||
err)
|
||||
}
|
||||
return time.Parse(time.RFC1123Z, dateRe.FindString(guess))
|
||||
}
|
||||
|
||||
func parseAddressList(h *mail.Header, key string) []*mail.Address {
|
||||
addrs, err := h.AddressList(key)
|
||||
if len(addrs) == 0 {
|
||||
// Only consider the error if the returned address list is empty
|
||||
// Sometimes, we get a list of addresses and unknown charset
|
||||
// errors which are not fatal.
|
||||
if val := h.Get(key); val != "" {
|
||||
if err != nil {
|
||||
log.Errorf("%s: %s: %v", key, val, err)
|
||||
}
|
||||
// Header value is not empty but parsing completely
|
||||
// failed. Return something so that the message can at
|
||||
// least be displayed.
|
||||
return []*mail.Address{{Name: val}}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
for _, addr := range addrs {
|
||||
// Handle invalid headers with quoted *AND* encoded names
|
||||
if strings.HasPrefix(addr.Name, "=?") && strings.HasSuffix(addr.Name, "?=") {
|
||||
d := mime.WordDecoder{CharsetReader: message.CharsetReader}
|
||||
addr.Name, _ = d.DecodeHeader(addr.Name)
|
||||
}
|
||||
}
|
||||
// If we got at least one address, ignore any returned error.
|
||||
return addrs
|
||||
}
|
||||
|
||||
// RawMessage is an interface that describes a raw message
|
||||
type RawMessage interface {
|
||||
NewReader() (io.ReadCloser, error)
|
||||
ModelFlags() (models.Flags, error)
|
||||
Labels() ([]string, error)
|
||||
UID() models.UID
|
||||
}
|
||||
|
||||
// MessageInfo populates a models.MessageInfo struct for the message.
|
||||
// based on the reader returned by NewReader
|
||||
func MessageInfo(raw RawMessage) (*models.MessageInfo, error) {
|
||||
var parseErr error
|
||||
r, err := raw.NewReader()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer r.Close()
|
||||
msg, err := ReadMessage(r)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not read message: %w", err)
|
||||
}
|
||||
bs, err := ParseEntityStructure(msg)
|
||||
if IsMultipartError(err) {
|
||||
log.Warnf("multipart error: %v", err)
|
||||
bs = CreateTextPlainBody()
|
||||
} else if err != nil {
|
||||
return nil, fmt.Errorf("could not get structure: %w", err)
|
||||
}
|
||||
h := &mail.Header{Header: msg.Header}
|
||||
env := parseEnvelope(h)
|
||||
recDate, _ := parseReceivedHeader(h)
|
||||
if recDate.IsZero() {
|
||||
// better than nothing, if incorrect
|
||||
recDate = env.Date
|
||||
}
|
||||
flags, err := raw.ModelFlags()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
labels, err := raw.Labels()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &models.MessageInfo{
|
||||
BodyStructure: bs,
|
||||
Envelope: env,
|
||||
Flags: flags,
|
||||
Labels: labels,
|
||||
InternalDate: recDate,
|
||||
RFC822Headers: h,
|
||||
Size: 0,
|
||||
Uid: raw.UID(),
|
||||
Error: parseErr,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// MessageHeaders populates a models.MessageInfo struct for the message.
|
||||
// based on the reader returned by NewReader. Minimal information is included.
|
||||
// There is no body structure or RFC822Headers set
|
||||
func MessageHeaders(raw RawMessage) (*models.MessageInfo, error) {
|
||||
var parseErr error
|
||||
r, err := raw.NewReader()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer r.Close()
|
||||
msg, err := ReadMessage(r)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not read message: %w", err)
|
||||
}
|
||||
h := &mail.Header{Header: msg.Header}
|
||||
env := parseEnvelope(h)
|
||||
recDate, _ := parseReceivedHeader(h)
|
||||
if recDate.IsZero() {
|
||||
// better than nothing, if incorrect
|
||||
recDate = env.Date
|
||||
}
|
||||
flags, err := raw.ModelFlags()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
labels, err := raw.Labels()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &models.MessageInfo{
|
||||
Envelope: env,
|
||||
Flags: flags,
|
||||
Labels: labels,
|
||||
InternalDate: recDate,
|
||||
Refs: parse.MsgIDList(h, "references"),
|
||||
Size: 0,
|
||||
Uid: raw.UID(),
|
||||
Error: parseErr,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// NewCRLFReader returns a reader with CRLF line endings
|
||||
func NewCRLFReader(r io.Reader) io.Reader {
|
||||
var buf bytes.Buffer
|
||||
scanner := bufio.NewScanner(r)
|
||||
for scanner.Scan() {
|
||||
buf.WriteString(scanner.Text() + "\r\n")
|
||||
}
|
||||
return &buf
|
||||
}
|
||||
|
||||
// ReadMessage is a wrapper for the message.Read function to read a message
|
||||
// from r. The message's encoding and charset are automatically decoded to
|
||||
// UTF-8. If an unknown charset or unknown encoding is encountered, the error is
|
||||
// logged but a nil error is returned since the entity object can still be read.
|
||||
func ReadMessage(r io.Reader) (*message.Entity, error) {
|
||||
entity, err := message.Read(r)
|
||||
switch {
|
||||
case message.IsUnknownCharset(err):
|
||||
// message body is valid, just not converted, so continue
|
||||
log.Warnf("ReadMessage: %v", err)
|
||||
case message.IsUnknownEncoding(err):
|
||||
// message body is valid, just not decoded, so continue
|
||||
log.Warnf("ReadMessage: %v", err)
|
||||
case err != nil:
|
||||
return nil, fmt.Errorf("could not read message: %w", err)
|
||||
}
|
||||
return entity, nil
|
||||
}
|
||||
Reference in New Issue
Block a user