init: pristine aerc 0.20.0 source

This commit is contained in:
Mortdecai
2026-04-07 19:54:54 -04:00
commit 083402a548
502 changed files with 68722 additions and 0 deletions
+466
View File
@@ -0,0 +1,466 @@
package rfc822
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"mime"
"regexp"
"strings"
"time"
"git.sr.ht/~rjarry/aerc/lib/log"
"git.sr.ht/~rjarry/aerc/lib/parse"
"git.sr.ht/~rjarry/aerc/models"
"github.com/emersion/go-message"
_ "github.com/emersion/go-message/charset"
"github.com/emersion/go-message/mail"
)
type MultipartError struct {
e error
}
func (u MultipartError) Unwrap() error { return u.e }
func (u MultipartError) Error() string {
return "multipart error: " + u.e.Error()
}
// IsMultipartError returns a boolean indicating whether the error is known to
// report that the multipart message is malformed and could not be parsed.
func IsMultipartError(err error) bool {
return errors.As(err, new(MultipartError))
}
// RFC 1123Z regexp
var dateRe = regexp.MustCompile(`(((Mon|Tue|Wed|Thu|Fri|Sat|Sun))[,]?\s[0-9]{1,2})\s` +
`(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s` +
`([0-9]{4})\s([0-9]{2}):([0-9]{2})(:([0-9]{2}))?\s([\+|\-][0-9]{4})`)
func FetchEntityPartReader(e *message.Entity, index []int) (io.Reader, error) {
if len(index) == 0 {
// non multipart, simply return everything
return bufReader(e)
}
if mpr := e.MultipartReader(); mpr != nil {
idx := 0
for {
idx++
part, err := mpr.NextPart()
switch {
case message.IsUnknownCharset(err):
log.Warnf("FetchEntityPartReader: %v", err)
case message.IsUnknownEncoding(err):
log.Warnf("FetchEntityPartReader: %v", err)
case err != nil:
log.Warnf("FetchEntityPartReader: %v", err)
return bufReader(e)
}
if idx == index[0] {
rest := index[1:]
if len(rest) < 1 {
return bufReader(part)
}
return FetchEntityPartReader(part, index[1:])
}
}
}
return nil, fmt.Errorf("FetchEntityPartReader: unexpected code reached")
}
// TODO: the UI doesn't seem to like readers which aren't buffers
func bufReader(e *message.Entity) (io.Reader, error) {
var buf bytes.Buffer
if _, err := io.Copy(&buf, e.Body); err != nil {
return nil, err
}
return &buf, nil
}
// split a MIME type into its major and minor parts
func splitMIME(m string) (string, string) {
parts := strings.Split(m, "/")
if len(parts) != 2 {
return parts[0], ""
}
return parts[0], parts[1]
}
func fixContentType(h message.Header) (string, map[string]string) {
ct, rest := h.Get("Content-Type"), ""
if i := strings.Index(ct, ";"); i > 0 {
ct, rest = ct[:i], ct[i:]
}
// check if there are quotes around the content type
if strings.Contains(ct, "\"") {
header := strings.ReplaceAll(ct, "\"", "")
if rest != "" {
header += rest
}
h.Set("Content-Type", header)
if contenttype, params, err := h.ContentType(); err == nil {
return contenttype, params
}
}
// if all else fails, return text/plain
return "text/plain", nil
}
// ParseEntityStructure will parse the message and create a multipart structure
// for multipart messages. Parsing is done on a best-efforts basis:
//
// If the content-type cannot be parsed, ParseEntityStructure will try to fix
// it; otherwise, it returns a text/plain mime type as a fallback. No error will
// be returned.
//
// If a charset or encoding error is encountered for a message part of a
// multipart message, the error is logged and ignored. In those cases, we still
// get a valid message body but the content is just not decoded or converted. No
// error will be returned.
//
// If reading a multipart message fails, ParseEntityStructure will return a
// multipart error. This error indicates that this message is malformed and
// there is nothing more we can do. The caller is then advised to use a single
// text/plain body structure using CreateTextPlainPart().
func ParseEntityStructure(e *message.Entity) (*models.BodyStructure, error) {
var body models.BodyStructure
contentType, ctParams, err := e.Header.ContentType()
if err != nil {
// try to fix the error; if all measures fail, then return a
// text/plain content type to display at least plaintext
contentType, ctParams = fixContentType(e.Header)
}
mimeType, mimeSubType := splitMIME(contentType)
body.MIMEType = mimeType
body.MIMESubType = mimeSubType
body.Params = ctParams
body.Description = e.Header.Get("content-description")
body.Encoding = e.Header.Get("content-transfer-encoding")
if cd := e.Header.Get("content-disposition"); cd != "" {
contentDisposition, cdParams, err := e.Header.ContentDisposition()
if err != nil {
return nil, fmt.Errorf("could not parse content disposition: %w", err)
}
body.Disposition = contentDisposition
body.DispositionParams = cdParams
}
body.Parts = []*models.BodyStructure{}
if mpr := e.MultipartReader(); mpr != nil {
for {
part, err := mpr.NextPart()
switch {
case errors.Is(err, io.EOF):
return &body, nil
case message.IsUnknownCharset(err):
log.Warnf("ParseEntityStructure: %v", err)
case message.IsUnknownEncoding(err):
log.Warnf("ParseEntityStructure: %v", err)
case err != nil:
return nil, MultipartError{err}
}
ps, err := ParseEntityStructure(part)
if err != nil {
return nil, fmt.Errorf("could not parse child entity structure: %w", err)
}
body.Parts = append(body.Parts, ps)
}
}
return &body, nil
}
// CreateTextPlainBody creates a plain-vanilla text/plain body structure.
func CreateTextPlainBody() *models.BodyStructure {
body := &models.BodyStructure{}
body.MIMEType = "text"
body.MIMESubType = "plain"
body.Params = map[string]string{"charset": "utf-8"}
body.Parts = []*models.BodyStructure{}
return body
}
func parseEnvelope(h *mail.Header) *models.Envelope {
subj, err := h.Subject()
if err != nil {
log.Errorf("could not decode subject: %v", err)
subj = h.Get("Subject")
}
msgID, err := h.MessageID()
if err != nil {
log.Errorf("invalid Message-ID header: %v", err)
// proper parsing failed, so fall back to whatever is there
msgID = strings.Trim(h.Get("message-id"), "<>")
}
var irt string
irtList := parse.MsgIDList(h, "in-reply-to")
if len(irtList) > 0 {
irt = irtList[0]
}
date, err := parseDate(h)
if err != nil {
// if only the date parsing failed we still get the rest of the
// envelop structure in a valid state.
// Date parsing errors are fairly common and it's better to be
// slightly off than to not be able to read the mails at all
// hence we continue here
log.Errorf("invalid Date header: %v", err)
}
return &models.Envelope{
Date: date,
Subject: subj,
MessageId: msgID,
From: parseAddressList(h, "from"),
ReplyTo: parseAddressList(h, "reply-to"),
Sender: parseAddressList(h, "sender"),
To: parseAddressList(h, "to"),
Cc: parseAddressList(h, "cc"),
Bcc: parseAddressList(h, "bcc"),
InReplyTo: irt,
}
}
// If the date is formatted like ...... -0500 (EST), parser takes the EST part
// and ignores the numeric offset. Then it might easily fail to guess what EST
// means unless the proper locale is loaded. This function checks that, so such
// time values can be safely ignored
// https://stackoverflow.com/questions/49084316/why-doesnt-gos-time-parse-parse-the-timezone-identifier
func isDateOK(t time.Time) bool {
name, offset := t.Zone()
// non-zero offsets are fine
if offset != 0 {
return true
}
// zero offset is ok if that's UTC or GMT
if name == "UTC" || name == "GMT" || name == "" {
return true
}
// otherwise this date should not be trusted
return false
}
// parseDate tries to parse the date from the Date header with non std formats
// if this fails it tries to parse the received header as well
func parseDate(h *mail.Header) (time.Time, error) {
// here we store the best parsed time we have so far
// if we find no "correct" time, we'll use that
bestDate := time.Time{}
// trying the easy way
t, err := h.Date()
if err == nil {
if isDateOK(t) {
return t, nil
}
bestDate = t
}
text := h.Get("date")
// sometimes, no error occurs but the date is empty.
// In this case, guess time from received header field
if text == "" {
t, err := parseReceivedHeader(h)
if err == nil {
return t, nil
}
}
layouts := []string{
// X-Mailer: EarthLink Zoo Mail 1.0
"Mon, _2 Jan 2006 15:04:05 -0700 (GMT-07:00)",
}
for _, layout := range layouts {
if t, err := time.Parse(layout, text); err == nil {
if isDateOK(t) {
return t, nil
}
bestDate = t
}
}
// still no success, try the received header
t, err = parseReceivedHeader(h)
if err == nil {
if isDateOK(t) {
return t, nil
}
bestDate = t
}
// do we have at least something?
if !bestDate.IsZero() {
return bestDate, nil
}
// sad...
return time.Time{}, fmt.Errorf("unrecognized date format: %s", text)
}
func parseReceivedHeader(h *mail.Header) (time.Time, error) {
guess, err := h.Text("received")
if err != nil {
return time.Time{}, fmt.Errorf("received header not parseable: %w",
err)
}
return time.Parse(time.RFC1123Z, dateRe.FindString(guess))
}
func parseAddressList(h *mail.Header, key string) []*mail.Address {
addrs, err := h.AddressList(key)
if len(addrs) == 0 {
// Only consider the error if the returned address list is empty
// Sometimes, we get a list of addresses and unknown charset
// errors which are not fatal.
if val := h.Get(key); val != "" {
if err != nil {
log.Errorf("%s: %s: %v", key, val, err)
}
// Header value is not empty but parsing completely
// failed. Return something so that the message can at
// least be displayed.
return []*mail.Address{{Name: val}}
}
return nil
}
for _, addr := range addrs {
// Handle invalid headers with quoted *AND* encoded names
if strings.HasPrefix(addr.Name, "=?") && strings.HasSuffix(addr.Name, "?=") {
d := mime.WordDecoder{CharsetReader: message.CharsetReader}
addr.Name, _ = d.DecodeHeader(addr.Name)
}
}
// If we got at least one address, ignore any returned error.
return addrs
}
// RawMessage is an interface that describes a raw message
type RawMessage interface {
NewReader() (io.ReadCloser, error)
ModelFlags() (models.Flags, error)
Labels() ([]string, error)
UID() models.UID
}
// MessageInfo populates a models.MessageInfo struct for the message.
// based on the reader returned by NewReader
func MessageInfo(raw RawMessage) (*models.MessageInfo, error) {
var parseErr error
r, err := raw.NewReader()
if err != nil {
return nil, err
}
defer r.Close()
msg, err := ReadMessage(r)
if err != nil {
return nil, fmt.Errorf("could not read message: %w", err)
}
bs, err := ParseEntityStructure(msg)
if IsMultipartError(err) {
log.Warnf("multipart error: %v", err)
bs = CreateTextPlainBody()
} else if err != nil {
return nil, fmt.Errorf("could not get structure: %w", err)
}
h := &mail.Header{Header: msg.Header}
env := parseEnvelope(h)
recDate, _ := parseReceivedHeader(h)
if recDate.IsZero() {
// better than nothing, if incorrect
recDate = env.Date
}
flags, err := raw.ModelFlags()
if err != nil {
return nil, err
}
labels, err := raw.Labels()
if err != nil {
return nil, err
}
return &models.MessageInfo{
BodyStructure: bs,
Envelope: env,
Flags: flags,
Labels: labels,
InternalDate: recDate,
RFC822Headers: h,
Size: 0,
Uid: raw.UID(),
Error: parseErr,
}, nil
}
// MessageHeaders populates a models.MessageInfo struct for the message.
// based on the reader returned by NewReader. Minimal information is included.
// There is no body structure or RFC822Headers set
func MessageHeaders(raw RawMessage) (*models.MessageInfo, error) {
var parseErr error
r, err := raw.NewReader()
if err != nil {
return nil, err
}
defer r.Close()
msg, err := ReadMessage(r)
if err != nil {
return nil, fmt.Errorf("could not read message: %w", err)
}
h := &mail.Header{Header: msg.Header}
env := parseEnvelope(h)
recDate, _ := parseReceivedHeader(h)
if recDate.IsZero() {
// better than nothing, if incorrect
recDate = env.Date
}
flags, err := raw.ModelFlags()
if err != nil {
return nil, err
}
labels, err := raw.Labels()
if err != nil {
return nil, err
}
return &models.MessageInfo{
Envelope: env,
Flags: flags,
Labels: labels,
InternalDate: recDate,
Refs: parse.MsgIDList(h, "references"),
Size: 0,
Uid: raw.UID(),
Error: parseErr,
}, nil
}
// NewCRLFReader returns a reader with CRLF line endings
func NewCRLFReader(r io.Reader) io.Reader {
var buf bytes.Buffer
scanner := bufio.NewScanner(r)
for scanner.Scan() {
buf.WriteString(scanner.Text() + "\r\n")
}
return &buf
}
// ReadMessage is a wrapper for the message.Read function to read a message
// from r. The message's encoding and charset are automatically decoded to
// UTF-8. If an unknown charset or unknown encoding is encountered, the error is
// logged but a nil error is returned since the entity object can still be read.
func ReadMessage(r io.Reader) (*message.Entity, error) {
entity, err := message.Read(r)
switch {
case message.IsUnknownCharset(err):
// message body is valid, just not converted, so continue
log.Warnf("ReadMessage: %v", err)
case message.IsUnknownEncoding(err):
// message body is valid, just not decoded, so continue
log.Warnf("ReadMessage: %v", err)
case err != nil:
return nil, fmt.Errorf("could not read message: %w", err)
}
return entity, nil
}
+190
View File
@@ -0,0 +1,190 @@
package rfc822
import (
"io"
"os"
"path/filepath"
"testing"
"time"
"git.sr.ht/~rjarry/aerc/models"
"github.com/emersion/go-message/mail"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMessageInfoParser(t *testing.T) {
rootDir := "testdata/message/valid"
msgFiles, err := os.ReadDir(rootDir)
die(err)
for _, fi := range msgFiles {
if fi.IsDir() {
continue
}
p := fi.Name()
t.Run(p, func(t *testing.T) {
m := newMockRawMessageFromPath(filepath.Join(rootDir, p))
mi, err := MessageInfo(m)
if err != nil {
t.Fatal("Failed to create MessageInfo with:", err)
}
if perr := mi.Error; perr != nil {
t.Fatal("Expected no parsing error, but got:", mi.Error)
}
})
}
}
func TestMessageInfoMalformed(t *testing.T) {
rootDir := "testdata/message/malformed"
msgFiles, err := os.ReadDir(rootDir)
die(err)
for _, fi := range msgFiles {
if fi.IsDir() {
continue
}
p := fi.Name()
t.Run(p, func(t *testing.T) {
m := newMockRawMessageFromPath(filepath.Join(rootDir, p))
_, err := MessageInfo(m)
if err != nil {
t.Fatal(err)
}
})
}
}
func TestParseMessageDate(t *testing.T) {
// we use different times for "Date" and "Received" fields so we can check which one is parsed
// however, we accept both if the date header can be parsed using the current locale
tests := []struct {
date string
received string
utc []time.Time
}{
{
date: "Fri, 22 Dec 2023 11:19:01 +0000",
received: "from aaa.bbb.com for <user@host.com>; Fri, 22 Dec 2023 06:19:02 -0500 (EST)",
utc: []time.Time{
time.Date(2023, time.December, 22, 11, 19, 1, 0, time.UTC), // we expect the Date field to be parsed straight away
},
},
{
date: "Fri, 29 Dec 2023 14:06:37 +0100",
received: "from somewhere.com for a@b.c; Fri, 30 Dec 2023 4:06:43 +1300",
utc: []time.Time{
time.Date(2023, time.December, 29, 13, 6, 37, 0, time.UTC), // we expect the Date field to be parsed here
},
},
{
date: "Fri, 29 Dec 2023 00:51:00 EST",
received: "by hostname.com; Fri, 29 Dec 2023 00:51:33 -0500 (EST)",
utc: []time.Time{
time.Date(2023, time.December, 29, 5, 51, 33, 0, time.UTC), // in most cases the Received field will be parsed
time.Date(2023, time.December, 29, 5, 51, 0o0, 0, time.UTC), // however, if the EST locale is loaded, the Date header can be parsed too
},
},
}
for _, test := range tests {
h := mail.Header{}
h.SetText("Date", test.date)
h.SetText("Received", test.received)
res, err := parseDate(&h)
require.Nil(t, err)
found := false
for _, ref := range test.utc {
if ref.Equal(res.UTC()) {
found = true
break
}
}
require.True(t, found, "Can't properly parse date and time from the Date/Received headers")
}
}
func TestParseAddressList(t *testing.T) {
header := mail.HeaderFromMap(map[string][]string{
"From": {`"=?utf-8?B?U21pZXRhbnNraSwgV29qY2llY2ggVGFkZXVzeiBpbiBUZWFtcw==?=" <noreply@email.teams.microsoft.com>`},
"To": {`=?UTF-8?q?Oc=C3=A9ane_de_Seazon?= <hello@seazon.fr>`},
"Cc": {`=?utf-8?b?0KjQsNCz0L7QsiDQk9C10L7RgNCz0LjQuSB2aWEgZGlzY3Vzcw==?= <ovs-discuss@openvswitch.org>`},
"Bcc": {`"Foo, Baz Bar" <~foo/baz@bar.org>`},
"Reply-To": {`Someone`},
})
type vector struct {
kind string
header string
name string
email string
}
vectors := []vector{
{
kind: "quoted",
header: "Bcc",
name: "Foo, Baz Bar",
email: "~foo/baz@bar.org",
},
{
kind: "Qencoded",
header: "To",
name: "Océane de Seazon",
email: "hello@seazon.fr",
},
{
kind: "Bencoded",
header: "Cc",
name: "Шагов Георгий via discuss",
email: "ovs-discuss@openvswitch.org",
},
{
kind: "quoted+Bencoded",
header: "From",
name: "Smietanski, Wojciech Tadeusz in Teams",
email: "noreply@email.teams.microsoft.com",
},
{
kind: "no email",
header: "Reply-To",
name: "Someone",
email: "",
},
}
for _, vec := range vectors {
t.Run(vec.kind, func(t *testing.T) {
addrs := parseAddressList(&header, vec.header)
assert.Len(t, addrs, 1)
assert.Equal(t, vec.name, addrs[0].Name)
assert.Equal(t, vec.email, addrs[0].Address)
})
}
}
type mockRawMessage struct {
path string
}
func newMockRawMessageFromPath(p string) *mockRawMessage {
return &mockRawMessage{
path: p,
}
}
func (m *mockRawMessage) NewReader() (io.ReadCloser, error) {
return os.Open(m.path)
}
func (m *mockRawMessage) ModelFlags() (models.Flags, error) { return 0, nil }
func (m *mockRawMessage) Labels() ([]string, error) { return nil, nil }
func (m *mockRawMessage) UID() models.UID { return "" }
func die(err error) {
if err != nil {
panic(err)
}
}
+26
View File
@@ -0,0 +1,26 @@
Subject: Confirmation Needed gUdVJQBhsd
Content-Type: multipart/mixed; boundary="Nextpart_1Q2YJhd197991794467076Pgfa"
To: <BORK@example.com>
From: ""REGISTRAR"" <zdglopi-1Q2YJhd-noReply@example.com>
--Nextpart_1Q2YJhd197991794467076Pgfa
Content-Type: multipart/parallel; boundary="sg54sd54g54sdg54"
--sg54sd54g54sdg54
Content-Type: multipart/alternative; boundary="54qgf54q546f46qsf46qsf"
--54qgf54q546f46qsf46qsf
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: Hexa
--54qgf54q546f46qsf46qsf
Content-Type: text/html; charset=utf-8
<CeNteR><a hRef="https://example.com-ap-southeast-example.com.com/example.com#qs=r-acacaeehdiebadgdhgghcaegckhabababaggacihaccajfbacccgaehhbkacb"><b><h2>Congratulations Netflix Customer!</h2></b></a><br>
<HeaD>
<ObJECT>
--Nextpart_1Q2YJhd197991794467076Pgfa--
+45
View File
@@ -0,0 +1,45 @@
Subject: Your ECOLINES tickets
X-PHP-Originating-Script: 33:functions.inc.php
From: ECOLINES <ecolines@ecolines.lv>
Content-Type: multipart/mixed;
boundary="PHP-mixed-ba319678ca12656cfb8cd46e736ce09d"
Message-Id: <E1nvIQS-0004tm-Bc@legacy.ecolines.net>
Date: Sun, 29 May 2022 15:53:44 +0300
--PHP-mixed-ba319678ca12656cfb8cd46e736ce09d
Content-Type: multipart/alternative; boundary="PHP-alt-ba319678ca12656cfb8cd46e736ce09d"
--PHP-alt-ba319678ca12656cfb8cd46e736ce09d
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: 7bit
Your tickets are attached to this message. Also You can print out Your tickets from our website www.ecolines.net<b
r />
--PHP-alt-ba319678ca12656cfb8cd46e736ce09d
Content-Type: text/html; charset="UTF-8"
Content-Transfer-Encoding: 7bit
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
--PHP-alt-ba319678ca12656cfb8cd46e736ce09d--
--PHP-mixed-ba319678ca12656cfb8cd46e736ce09d
Content-Type: "application/pdf"; name="17634428.pdf"
Content-Disposition: attachment; filename="17634428.pdf"
Content-Transfer-Encoding: base64
JVBERi0xLjQKMSAwIG9iago8PAovVGl0bGUgKP7/AFkAbwB1AHIAIAB0AGkAYwBrAGUAdCkKL0Ny
--PHP-mixed-ba319678ca12656cfb8cd46e736ce09d
Content-Type: "application/pdf"; name="invoice-6385490.pdf"
Content-Disposition: attachment; filename="invoice-6385490.pdf"
Content-Transfer-Encoding: base64
JVBERi0xLjQKMSAwIG9iago8PAovVGl0bGUgKP7/AEkAbgB2AG8AaQBjAGUpCi9DcmVhdG9yICj+
--PHP-mixed-ba319678ca12656cfb8cd46e736ce09d--