init: pristine aerc 0.20.0 source
This commit is contained in:
@@ -0,0 +1,37 @@
|
||||
package parse
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"regexp"
|
||||
|
||||
"git.sr.ht/~rjarry/aerc/lib/log"
|
||||
)
|
||||
|
||||
var AnsiReg = regexp.MustCompile("\x1B\\[[0-?]*[ -/]*[@-~]")
|
||||
|
||||
// StripAnsi strips ansi escape codes from the reader
|
||||
func StripAnsi(r io.Reader) io.Reader {
|
||||
buf := bytes.NewBuffer(nil)
|
||||
scanner := bufio.NewScanner(r)
|
||||
scanner.Buffer(nil, 1024*1024*1024)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
line = AnsiReg.ReplaceAll(line, []byte(""))
|
||||
_, err := buf.Write(line)
|
||||
if err != nil {
|
||||
log.Warnf("failed write ", err)
|
||||
}
|
||||
_, err = buf.Write([]byte("\n"))
|
||||
if err != nil {
|
||||
log.Warnf("failed write ", err)
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "failed to read line: %v\n", err)
|
||||
}
|
||||
return buf
|
||||
}
|
||||
@@ -0,0 +1,471 @@
|
||||
package parse
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.sr.ht/~rjarry/aerc/lib/log"
|
||||
)
|
||||
|
||||
const dateFmt = "2006-01-02"
|
||||
|
||||
// ParseDateRange parses a date range into a start and end date. Dates are
|
||||
// expected to be in the YYYY-MM-DD format.
|
||||
//
|
||||
// Start and end dates are connected by the range operator ".." where end date
|
||||
// is not included in the date range.
|
||||
//
|
||||
// ParseDateRange can also parse open-ended ranges, i.e. start.. or ..end are
|
||||
// allowed.
|
||||
//
|
||||
// Relative date terms (such as "1 week 1 day" or "1w 1d") can be used, too.
|
||||
func DateRange(s string) (start, end time.Time, err error) {
|
||||
s = cleanInput(s)
|
||||
s = ensureRangeOp(s)
|
||||
i := strings.Index(s, "..")
|
||||
switch {
|
||||
case i < 0:
|
||||
// single date
|
||||
start, err = translate(s)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("failed to parse date: %w", err)
|
||||
return
|
||||
}
|
||||
end = start.AddDate(0, 0, 1)
|
||||
|
||||
case i == 0:
|
||||
// end date only
|
||||
if len(s) < 2 {
|
||||
err = fmt.Errorf("no date found")
|
||||
return
|
||||
}
|
||||
end, err = translate(s[2:])
|
||||
if err != nil {
|
||||
err = fmt.Errorf("failed to parse date: %w", err)
|
||||
return
|
||||
}
|
||||
|
||||
case i > 0:
|
||||
// start date first
|
||||
start, err = translate(s[:i])
|
||||
if err != nil {
|
||||
err = fmt.Errorf("failed to parse date: %w", err)
|
||||
return
|
||||
}
|
||||
if len(s[i:]) <= 2 {
|
||||
return
|
||||
}
|
||||
// and end dates if available
|
||||
end, err = translate(s[(i + 2):])
|
||||
if err != nil {
|
||||
err = fmt.Errorf("failed to parse date: %w", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
type dictFunc = func(bool) time.Time
|
||||
|
||||
// dict is a dictionary to translate words to dates. Map key must be at least 3
|
||||
// characters for matching purposes.
|
||||
var dict map[string]dictFunc = map[string]dictFunc{
|
||||
"today": func(_ bool) time.Time {
|
||||
return time.Now()
|
||||
},
|
||||
"yesterday": func(_ bool) time.Time {
|
||||
return time.Now().AddDate(0, 0, -1)
|
||||
},
|
||||
"week": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -7
|
||||
}
|
||||
return time.Now().AddDate(0, 0,
|
||||
daydiff(time.Monday)+diff)
|
||||
},
|
||||
"month": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -1
|
||||
}
|
||||
t := time.Now()
|
||||
return t.AddDate(0, diff, -t.Day()+1)
|
||||
},
|
||||
"year": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -1
|
||||
}
|
||||
t := time.Now()
|
||||
return t.AddDate(diff, 0, -t.YearDay()+1)
|
||||
},
|
||||
"monday": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -7
|
||||
}
|
||||
return time.Now().AddDate(0, 0,
|
||||
daydiff(time.Monday)+diff)
|
||||
},
|
||||
"tuesday": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -7
|
||||
}
|
||||
return time.Now().AddDate(0, 0,
|
||||
daydiff(time.Tuesday)+diff)
|
||||
},
|
||||
"wednesday": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -7
|
||||
}
|
||||
return time.Now().AddDate(0, 0,
|
||||
daydiff(time.Wednesday)+diff)
|
||||
},
|
||||
"thursday": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -7
|
||||
}
|
||||
return time.Now().AddDate(0, 0,
|
||||
daydiff(time.Thursday)+diff)
|
||||
},
|
||||
"friday": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -7
|
||||
}
|
||||
return time.Now().AddDate(0, 0,
|
||||
daydiff(time.Friday)+diff)
|
||||
},
|
||||
"saturday": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -7
|
||||
}
|
||||
return time.Now().AddDate(0, 0,
|
||||
daydiff(time.Saturday)+diff)
|
||||
},
|
||||
"sunday": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -7
|
||||
}
|
||||
return time.Now().AddDate(0, 0,
|
||||
daydiff(time.Sunday)+diff)
|
||||
},
|
||||
"january": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -1
|
||||
}
|
||||
t := time.Now()
|
||||
return t.AddDate(diff,
|
||||
monthdiff(time.January), -t.Day()+1)
|
||||
},
|
||||
"february": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -1
|
||||
}
|
||||
t := time.Now()
|
||||
return t.AddDate(diff,
|
||||
monthdiff(time.February), -t.Day()+1)
|
||||
},
|
||||
"march": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -1
|
||||
}
|
||||
t := time.Now()
|
||||
return t.AddDate(diff,
|
||||
monthdiff(time.March), -t.Day()+1)
|
||||
},
|
||||
"april": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -1
|
||||
}
|
||||
t := time.Now()
|
||||
return t.AddDate(diff,
|
||||
monthdiff(time.April), -t.Day()+1)
|
||||
},
|
||||
"may": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -1
|
||||
}
|
||||
t := time.Now()
|
||||
return t.AddDate(diff,
|
||||
monthdiff(time.May), -t.Day()+1)
|
||||
},
|
||||
"june": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -1
|
||||
}
|
||||
t := time.Now()
|
||||
return t.AddDate(diff,
|
||||
monthdiff(time.June), -t.Day()+1)
|
||||
},
|
||||
"july": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -1
|
||||
}
|
||||
t := time.Now()
|
||||
return t.AddDate(diff,
|
||||
monthdiff(time.July), -t.Day()+1)
|
||||
},
|
||||
"august": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -1
|
||||
}
|
||||
t := time.Now()
|
||||
return t.AddDate(diff,
|
||||
monthdiff(time.August), -t.Day()+1)
|
||||
},
|
||||
"september": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -1
|
||||
}
|
||||
t := time.Now()
|
||||
return t.AddDate(diff,
|
||||
monthdiff(time.September), -t.Day()+1)
|
||||
},
|
||||
"october": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -1
|
||||
}
|
||||
t := time.Now()
|
||||
return t.AddDate(diff,
|
||||
monthdiff(time.October), -t.Day()+1)
|
||||
},
|
||||
"november": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -1
|
||||
}
|
||||
t := time.Now()
|
||||
return t.AddDate(diff,
|
||||
monthdiff(time.November), -t.Day()+1)
|
||||
},
|
||||
"december": func(this bool) time.Time {
|
||||
diff := 0
|
||||
if !this {
|
||||
diff = -1
|
||||
}
|
||||
t := time.Now()
|
||||
return t.AddDate(diff,
|
||||
monthdiff(time.December), -t.Day()+1)
|
||||
},
|
||||
}
|
||||
|
||||
func daydiff(d time.Weekday) int {
|
||||
daydiff := d - time.Now().Weekday()
|
||||
if daydiff > 0 {
|
||||
return int(daydiff) - 7
|
||||
}
|
||||
return int(daydiff)
|
||||
}
|
||||
|
||||
func monthdiff(d time.Month) int {
|
||||
monthdiff := d - time.Now().Month()
|
||||
if monthdiff > 0 {
|
||||
return int(monthdiff) - 12
|
||||
}
|
||||
return int(monthdiff)
|
||||
}
|
||||
|
||||
// translate translates regular time words into date strings
|
||||
func translate(s string) (time.Time, error) {
|
||||
if s == "" {
|
||||
return time.Now(), fmt.Errorf("empty string")
|
||||
}
|
||||
log.Tracef("input: %s", s)
|
||||
s0 := s
|
||||
|
||||
// if next characters is integer, then parse a relative date
|
||||
if '0' <= s[0] && s[0] <= '9' && hasUnit(s) {
|
||||
relDate, err := RelativeDate(s)
|
||||
if err != nil {
|
||||
log.Errorf("could not parse relative date from '%s': %v",
|
||||
s0, err)
|
||||
} else {
|
||||
log.Tracef("relative date: translated to %v from %s",
|
||||
relDate, s0)
|
||||
return bod(relDate.Apply(time.Now())), nil
|
||||
}
|
||||
}
|
||||
|
||||
// consult dictionary for terms translation
|
||||
s, this, hasPrefix := handlePrefix(s)
|
||||
for term, dateFn := range dict {
|
||||
if term == "month" && !hasPrefix {
|
||||
continue
|
||||
}
|
||||
if strings.Contains(term, s) {
|
||||
log.Tracef("dictionary: translated to %s from %s",
|
||||
term, s0)
|
||||
return bod(dateFn(this)), nil
|
||||
}
|
||||
}
|
||||
|
||||
// this is a regular date, parse it in the normal format
|
||||
log.Infof("parse: translates %s to regular format", s0)
|
||||
return time.Parse(dateFmt, s)
|
||||
}
|
||||
|
||||
// bod returns the begin of the day
|
||||
func bod(t time.Time) time.Time {
|
||||
y, m, d := t.Date()
|
||||
return time.Date(y, m, d, 0, 0, 0, 0, t.Location())
|
||||
}
|
||||
|
||||
func handlePrefix(s string) (string, bool, bool) {
|
||||
var hasPrefix bool
|
||||
this := true
|
||||
if strings.HasPrefix(s, "this") {
|
||||
hasPrefix = true
|
||||
s = strings.TrimPrefix(s, "this")
|
||||
}
|
||||
if strings.HasPrefix(s, "last") {
|
||||
hasPrefix = true
|
||||
this = false
|
||||
s = strings.TrimPrefix(s, "last")
|
||||
}
|
||||
return s, this, hasPrefix
|
||||
}
|
||||
|
||||
func cleanInput(s string) string {
|
||||
s = strings.ToLower(s)
|
||||
s = strings.ReplaceAll(s, " ", "")
|
||||
s = strings.ReplaceAll(s, "_", "")
|
||||
return s
|
||||
}
|
||||
|
||||
// RelDate is the relative date in the past, e.g. yesterday would be
|
||||
// represented as RelDate{0,0,1}.
|
||||
type RelDate struct {
|
||||
Year uint
|
||||
Month uint
|
||||
Day uint
|
||||
}
|
||||
|
||||
func (d RelDate) Apply(t time.Time) time.Time {
|
||||
return t.AddDate(-int(d.Year), -int(d.Month), -int(d.Day))
|
||||
}
|
||||
|
||||
// ParseRelativeDate parses a string of relative terms into a DateAdd.
|
||||
//
|
||||
// Syntax: N (year|month|week|day) ..
|
||||
//
|
||||
// The following are valid inputs:
|
||||
// 5weeks1day
|
||||
// 5w1d
|
||||
//
|
||||
// Adapted from the Go stdlib in src/time/format.go
|
||||
func RelativeDate(s string) (RelDate, error) {
|
||||
s0 := s
|
||||
s = cleanInput(s)
|
||||
var da RelDate
|
||||
for s != "" {
|
||||
var n uint
|
||||
|
||||
var err error
|
||||
|
||||
// expect an integer
|
||||
if !('0' <= s[0] && s[0] <= '9') {
|
||||
return da, fmt.Errorf("not a valid relative term: %s",
|
||||
s0)
|
||||
}
|
||||
|
||||
// consume integer
|
||||
n, s, err = leadingInt(s)
|
||||
if err != nil {
|
||||
return da, fmt.Errorf("cannot read integer in %s",
|
||||
s0)
|
||||
}
|
||||
|
||||
// consume the units
|
||||
i := 0
|
||||
for ; i < len(s); i++ {
|
||||
c := s[i]
|
||||
if '0' <= c && c <= '9' {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i == 0 {
|
||||
return da, fmt.Errorf("missing unit in %s", s0)
|
||||
}
|
||||
|
||||
u := s[:i]
|
||||
s = s[i:]
|
||||
switch u[0] {
|
||||
case 'y':
|
||||
da.Year += n
|
||||
case 'm':
|
||||
da.Month += n
|
||||
case 'w':
|
||||
da.Day += 7 * n
|
||||
case 'd':
|
||||
da.Day += n
|
||||
default:
|
||||
return da, fmt.Errorf("unknown unit %s in %s", u, s0)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return da, nil
|
||||
}
|
||||
|
||||
func hasUnit(s string) (has bool) {
|
||||
for _, u := range "ymwd" {
|
||||
if strings.Contains(s, string(u)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// leadingInt parses and returns the leading integer in s.
|
||||
//
|
||||
// Adapted from the Go stdlib in src/time/format.go
|
||||
func leadingInt(s string) (x uint, rem string, err error) {
|
||||
i := 0
|
||||
for ; i < len(s); i++ {
|
||||
c := s[i]
|
||||
if c < '0' || c > '9' {
|
||||
break
|
||||
}
|
||||
x = x*10 + uint(c) - '0'
|
||||
}
|
||||
return x, s[i:], nil
|
||||
}
|
||||
|
||||
func ensureRangeOp(s string) string {
|
||||
if strings.Contains(s, "..") {
|
||||
return s
|
||||
}
|
||||
s0 := s
|
||||
for _, m := range []string{"this", "last"} {
|
||||
for _, u := range []string{"year", "month", "week"} {
|
||||
term := m + u
|
||||
if strings.Contains(s, term) {
|
||||
if m == "last" {
|
||||
return s0 + "..this" + u
|
||||
} else {
|
||||
return s0 + ".."
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return s0
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
package parse_test
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.sr.ht/~rjarry/aerc/lib/parse"
|
||||
)
|
||||
|
||||
func TestParseDateRange(t *testing.T) {
|
||||
dateFmt := "2006-01-02"
|
||||
date := func(s string) time.Time { d, _ := time.Parse(dateFmt, s); return d }
|
||||
tests := []struct {
|
||||
s string
|
||||
start time.Time
|
||||
end time.Time
|
||||
}{
|
||||
{
|
||||
s: "2022-11-01",
|
||||
start: date("2022-11-01"),
|
||||
end: date("2022-11-02"),
|
||||
},
|
||||
{
|
||||
s: "2022-11-01..",
|
||||
start: date("2022-11-01"),
|
||||
},
|
||||
{
|
||||
s: "..2022-11-05",
|
||||
end: date("2022-11-05"),
|
||||
},
|
||||
{
|
||||
s: "2022-11-01..2022-11-05",
|
||||
start: date("2022-11-01"),
|
||||
end: date("2022-11-05"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
start, end, err := parse.DateRange(test.s)
|
||||
if err != nil {
|
||||
t.Errorf("ParseDateRange return error for %s: %v",
|
||||
test.s, err)
|
||||
}
|
||||
|
||||
if !start.Equal(test.start) {
|
||||
t.Errorf("wrong start date; expected %v, got %v",
|
||||
test.start, start)
|
||||
}
|
||||
|
||||
if !end.Equal(test.end) {
|
||||
t.Errorf("wrong end date; expected %v, got %v",
|
||||
test.end, end)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRelativeDate(t *testing.T) {
|
||||
tests := []struct {
|
||||
s string
|
||||
want parse.RelDate
|
||||
}{
|
||||
{
|
||||
s: "5 weeks 1 day",
|
||||
want: parse.RelDate{Year: 0, Month: 0, Day: 5*7 + 1},
|
||||
},
|
||||
{
|
||||
s: "5_weeks 1_day",
|
||||
want: parse.RelDate{Year: 0, Month: 0, Day: 5*7 + 1},
|
||||
},
|
||||
{
|
||||
s: "5weeks1day",
|
||||
want: parse.RelDate{Year: 0, Month: 0, Day: 5*7 + 1},
|
||||
},
|
||||
{
|
||||
s: "5w1d",
|
||||
want: parse.RelDate{Year: 0, Month: 0, Day: 5*7 + 1},
|
||||
},
|
||||
{
|
||||
s: "5y4m3w1d",
|
||||
want: parse.RelDate{Year: 5, Month: 4, Day: 3*7 + 1},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
da, err := parse.RelativeDate(test.s)
|
||||
if err != nil {
|
||||
t.Errorf("ParseRelativeDate return error for %s: %v",
|
||||
test.s, err)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(da, test.want) {
|
||||
t.Errorf("results don't match. expected %v, got %v",
|
||||
test.want, da)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
package parse
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"git.sr.ht/~rjarry/aerc/lib/log"
|
||||
"github.com/emersion/go-message/mail"
|
||||
)
|
||||
|
||||
// MsgIDList parses a list of message identifiers. It returns message
|
||||
// identifiers without angle brackets. If the header field is missing,
|
||||
// it returns nil.
|
||||
//
|
||||
// This can be used on In-Reply-To and References header fields.
|
||||
// If the field does not conform to RFC 5322, fall back
|
||||
// to greedily parsing a subsequence of the original field.
|
||||
func MsgIDList(h *mail.Header, key string) []string {
|
||||
l, err := h.MsgIDList(key)
|
||||
if err == nil {
|
||||
return l
|
||||
}
|
||||
log.Errorf("%s: %s", err, h.Get(key))
|
||||
|
||||
// Expensive, fix your peer's MUA instead!
|
||||
var list []string
|
||||
header := &mail.Header{Header: h.Header.Copy()}
|
||||
value := header.Get(key)
|
||||
for err != nil && len(value) > 0 {
|
||||
// Skip parsed IDs
|
||||
if len(l) > 0 {
|
||||
last := "<" + l[len(l)-1] + ">"
|
||||
value = value[strings.Index(value, last)+len(last):]
|
||||
list = append(list, l...)
|
||||
}
|
||||
|
||||
// Skip a character until some IDs can be parsed
|
||||
value = value[1:]
|
||||
header.Set(key, value)
|
||||
l, err = header.MsgIDList(key)
|
||||
}
|
||||
return append(list, l...)
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
package parse_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"git.sr.ht/~rjarry/aerc/lib/parse"
|
||||
"github.com/emersion/go-message/mail"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestMsgIDList(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "valid",
|
||||
input: "<1q@az> (cmt)\r\n <2w@sx> (khld)",
|
||||
expected: []string{"1q@az", "2w@sx"},
|
||||
},
|
||||
{
|
||||
name: "comma",
|
||||
input: "<3e@dc>, <4r@fv>,\t<5t@gb>",
|
||||
expected: []string{"3e@dc", "4r@fv", "5t@gb"},
|
||||
},
|
||||
{
|
||||
name: "other non-CFWS separators",
|
||||
input: "<6y@>, <hn@7u>\n <> <jm@8i>",
|
||||
expected: []string{"hn@7u", "jm@8i"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
var h mail.Header
|
||||
h.Set("References", test.input)
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
actual := parse.MsgIDList(&h, "References")
|
||||
assert.Equal(t, test.expected, actual)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,129 @@
|
||||
package parse
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"regexp"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// Partial regexp to match the beginning of URLs and email addresses.
|
||||
// The remainder of the matched URLs/emails is parsed manually.
|
||||
var urlRe = regexp.MustCompile(
|
||||
`([a-z]{2,8})://` + // URL start
|
||||
`|` + // or
|
||||
`(mailto:)?[[:alnum:]_+.~/-]*[[:alnum:]]@`, // email start
|
||||
)
|
||||
|
||||
// HttpLinks searches a reader for a http link and returns a copy of the
|
||||
// reader and a slice with links.
|
||||
func HttpLinks(r io.Reader) (io.Reader, []string) {
|
||||
buf, err := io.ReadAll(r)
|
||||
if err != nil {
|
||||
return r, nil
|
||||
}
|
||||
|
||||
links := make(map[string]bool)
|
||||
b := buf
|
||||
match := urlRe.FindSubmatchIndex(b)
|
||||
for ; match != nil; match = urlRe.FindSubmatchIndex(b) {
|
||||
// Regular expressions do not really cut it here and we
|
||||
// need to detect opening/closing braces to handle
|
||||
// markdown link syntax.
|
||||
var paren, bracket, ltgt, scheme int
|
||||
var emitUrl bool
|
||||
i, j := match[0], match[1]
|
||||
b = b[i:]
|
||||
scheme = j - i
|
||||
j = scheme
|
||||
|
||||
// "inline" email without a mailto: prefix - add some extra checks for those
|
||||
inlineEmail := len(match) > 4 && match[2] == -1 && match[4] == -1
|
||||
|
||||
for !emitUrl && j < len(b) && bytes.IndexByte(urichars, b[j]) != -1 {
|
||||
switch b[j] {
|
||||
case '[':
|
||||
bracket++
|
||||
j++
|
||||
case '(':
|
||||
paren++
|
||||
j++
|
||||
case '<':
|
||||
ltgt++
|
||||
j++
|
||||
case ']':
|
||||
bracket--
|
||||
if bracket < 0 {
|
||||
emitUrl = true
|
||||
} else {
|
||||
j++
|
||||
}
|
||||
case ')':
|
||||
paren--
|
||||
if paren < 0 {
|
||||
emitUrl = true
|
||||
} else {
|
||||
j++
|
||||
}
|
||||
case '>':
|
||||
ltgt--
|
||||
if ltgt < 0 {
|
||||
emitUrl = true
|
||||
} else {
|
||||
j++
|
||||
}
|
||||
case '&':
|
||||
if inlineEmail {
|
||||
emitUrl = true
|
||||
} else {
|
||||
j++
|
||||
}
|
||||
default:
|
||||
j++
|
||||
}
|
||||
|
||||
// we don't want those in inline emails
|
||||
if inlineEmail && (paren > 0 || ltgt > 0 || bracket > 0) {
|
||||
j--
|
||||
emitUrl = true
|
||||
}
|
||||
}
|
||||
|
||||
// Heuristic to remove trailing characters that are
|
||||
// valid URL characters, but typically not at the end of
|
||||
// the URL
|
||||
for trim := true; trim && j > 0; {
|
||||
switch b[j-1] {
|
||||
case '.', ',', ':', ';', '?', '!', '"', '\'', '%':
|
||||
j--
|
||||
default:
|
||||
trim = false
|
||||
}
|
||||
}
|
||||
if j == scheme {
|
||||
// Only an URL scheme, ignore.
|
||||
b = b[j:]
|
||||
continue
|
||||
}
|
||||
url := string(b[:j])
|
||||
if inlineEmail {
|
||||
// Email address with missing mailto: scheme. Add it.
|
||||
url = "mailto:" + url
|
||||
}
|
||||
links[url] = true
|
||||
b = b[j:]
|
||||
}
|
||||
|
||||
results := make([]string, 0, len(links))
|
||||
for link := range links {
|
||||
results = append(results, link)
|
||||
}
|
||||
sort.Strings(results)
|
||||
|
||||
return bytes.NewReader(buf), results
|
||||
}
|
||||
|
||||
var urichars = []byte(
|
||||
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +
|
||||
"0123456789-_.,~:;/?#@!$&%*+=\"'<>()[]",
|
||||
)
|
||||
@@ -0,0 +1,162 @@
|
||||
package parse_test
|
||||
|
||||
import (
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"git.sr.ht/~rjarry/aerc/lib/parse"
|
||||
)
|
||||
|
||||
func TestHyperlinks(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
text string
|
||||
links []string
|
||||
}{
|
||||
{
|
||||
name: "http-link",
|
||||
text: "http://aerc-mail.org",
|
||||
links: []string{"http://aerc-mail.org"},
|
||||
},
|
||||
{
|
||||
name: "https-link",
|
||||
text: "https://aerc-mail.org",
|
||||
links: []string{"https://aerc-mail.org"},
|
||||
},
|
||||
{
|
||||
name: "https-link-in-text",
|
||||
text: "text https://aerc-mail.org more text",
|
||||
links: []string{"https://aerc-mail.org"},
|
||||
},
|
||||
{
|
||||
name: "https-link-in-parenthesis",
|
||||
text: "text (https://aerc-mail.org) more text",
|
||||
links: []string{"https://aerc-mail.org"},
|
||||
},
|
||||
{
|
||||
name: "https-link-in-quotes",
|
||||
text: "text \"https://aerc-mail.org\" more text",
|
||||
links: []string{"https://aerc-mail.org"},
|
||||
},
|
||||
{
|
||||
name: "https-link-in-angle-brackets",
|
||||
text: "text <https://aerc-mail.org> more text",
|
||||
links: []string{"https://aerc-mail.org"},
|
||||
},
|
||||
{
|
||||
name: "https-link-in-html",
|
||||
text: "<a href=\"https://aerc-mail.org\">",
|
||||
links: []string{"https://aerc-mail.org"},
|
||||
},
|
||||
{
|
||||
name: "https-link-twice",
|
||||
text: "text https://aerc-mail.org more text https://aerc-mail.org more text",
|
||||
links: []string{"https://aerc-mail.org"},
|
||||
},
|
||||
{
|
||||
name: "https-link-markdown",
|
||||
text: "text [https://aerc-mail.org](https://aerc-mail.org) more text",
|
||||
links: []string{"https://aerc-mail.org"},
|
||||
},
|
||||
{
|
||||
name: "multiple-links",
|
||||
text: "text https://aerc-mail.org more text http://git.sr.ht/~rjarry/aerc more text",
|
||||
links: []string{"https://aerc-mail.org", "http://git.sr.ht/~rjarry/aerc"},
|
||||
},
|
||||
{
|
||||
name: "rfc",
|
||||
text: "text http://www.ietf.org/rfc/rfc2396.txt more text",
|
||||
links: []string{"http://www.ietf.org/rfc/rfc2396.txt"},
|
||||
},
|
||||
{
|
||||
name: "http-with-query-and-fragment",
|
||||
text: "text <http://example.com:8042/over/there?name=ferret#nose> more text",
|
||||
links: []string{"http://example.com:8042/over/there?name=ferret#nose"},
|
||||
},
|
||||
{
|
||||
name: "http-with-at",
|
||||
text: "text http://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm more text",
|
||||
links: []string{"http://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm"},
|
||||
},
|
||||
{
|
||||
name: "https-with-fragment",
|
||||
text: "text https://www.ics.uci.edu/pub/ietf/uri/#Related more text",
|
||||
links: []string{"https://www.ics.uci.edu/pub/ietf/uri/#Related"},
|
||||
},
|
||||
{
|
||||
name: "https-with-query",
|
||||
text: "text https://www.example.com/index.php?id_sezione=360&sid=3a5ebc944f41daa6f849f730f1 more text",
|
||||
links: []string{"https://www.example.com/index.php?id_sezione=360&sid=3a5ebc944f41daa6f849f730f1"},
|
||||
},
|
||||
{
|
||||
name: "https-onedrive",
|
||||
text: "I have a link like this in an email (I deleted a few characters here-and-there for privacy) https://1drv.ms/w/s!Ap-KLfhNxS4fRt6tIvw?e=dW8WLO",
|
||||
links: []string{"https://1drv.ms/w/s!Ap-KLfhNxS4fRt6tIvw?e=dW8WLO"},
|
||||
},
|
||||
{
|
||||
name: "email",
|
||||
text: "You can reach me via the somewhat strange, but nonetheless valid, email foo@baz.com",
|
||||
links: []string{"mailto:foo@baz.com"},
|
||||
},
|
||||
{
|
||||
name: "mailto",
|
||||
text: "You can reach me via the somewhat strange, but nonetheless valid, email mailto:bar@fooz.fr. Thank you",
|
||||
links: []string{"mailto:bar@fooz.fr"},
|
||||
},
|
||||
{
|
||||
name: "mailto-ipv6",
|
||||
text: "You can reach me via the somewhat strange, but nonetheless valid, email mailto:~mpldr/list@[2001:db8::7]",
|
||||
links: []string{"mailto:~mpldr/list@[2001:db8::7]"},
|
||||
},
|
||||
{
|
||||
name: "mailto-ipv6-query",
|
||||
text: "You can reach me via the somewhat strange, but nonetheless valid, email mailto:~mpldr/list@[2001:db8::7]?subject=whazzup%3F",
|
||||
links: []string{"mailto:~mpldr/list@[2001:db8::7]?subject=whazzup%3F"},
|
||||
},
|
||||
{
|
||||
name: "simple email in <a href>",
|
||||
text: `<a href="mailto:a@abc.com" rel="noopener noreferrer">`,
|
||||
links: []string{"mailto:a@abc.com"},
|
||||
},
|
||||
{
|
||||
name: "simple email in <a> body",
|
||||
text: `<a href="#" rel="noopener noreferrer">a@abc.com</a><br/><p>more text</p>`,
|
||||
links: []string{"mailto:a@abc.com"},
|
||||
},
|
||||
{
|
||||
name: "emails in <a> href and body",
|
||||
text: `<a href="mailto:a@abc.com" rel="noopener noreferrer">b@abc.com</a><br/><p>more text</p>`,
|
||||
links: []string{"mailto:a@abc.com", "mailto:b@abc.com"},
|
||||
},
|
||||
{
|
||||
name: "email in <...>",
|
||||
text: `<div>01.02.2023, 10:11, "Firstname Lastname" <a@abc.com>:</div>`,
|
||||
links: []string{"mailto:a@abc.com"},
|
||||
},
|
||||
}
|
||||
|
||||
for i, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
// make sure reader is exact copy of input reader
|
||||
reader, parsedLinks := parse.HttpLinks(strings.NewReader(test.text))
|
||||
if _, err := io.ReadAll(reader); err != nil {
|
||||
t.Skipf("could not read text: %v", err)
|
||||
}
|
||||
|
||||
// check correct parsed links
|
||||
if len(parsedLinks) != len(test.links) {
|
||||
t.Errorf("different number of links: got %d but expected %d", len(parsedLinks), len(test.links))
|
||||
}
|
||||
linkMap := make(map[string]struct{})
|
||||
for _, got := range parsedLinks {
|
||||
linkMap[got] = struct{}{}
|
||||
}
|
||||
for _, expected := range test.links {
|
||||
if _, ok := linkMap[expected]; !ok {
|
||||
t.Errorf("link[%d] not parsed: %s", i, expected)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
package parse
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"sync"
|
||||
|
||||
"git.sr.ht/~rjarry/aerc/lib/log"
|
||||
)
|
||||
|
||||
var reCache sync.Map
|
||||
|
||||
// Check if a string matches the specified regular expression.
|
||||
// The regexp is compiled only once and stored in a cache for future use.
|
||||
func MatchCache(s, expr string) bool {
|
||||
var re interface{}
|
||||
var found bool
|
||||
|
||||
if re, found = reCache.Load(expr); !found {
|
||||
var err error
|
||||
re, err = regexp.Compile(expr)
|
||||
if err != nil {
|
||||
log.Errorf("`%s` invalid regexp: %s", expr, err)
|
||||
}
|
||||
reCache.Store(expr, re)
|
||||
}
|
||||
if re, ok := re.(*regexp.Regexp); ok && re != nil {
|
||||
return re.MatchString(s)
|
||||
}
|
||||
return false
|
||||
}
|
||||
Reference in New Issue
Block a user