~netlandish/links

ad6d7d733bbf3f2f12f3774a5fb22d9914586b24 — Peter Sanchez 14 days ago 864c750
Add utf8 sanitizer to address edge case import errors.

Fixes: https://todo.code.netlandish.com/~netlandish/links/96
Signed-off-by: Peter Sanchez <peter@netlandish.com>
2 files changed, 24 insertions(+), 2 deletions(-)

M core/import.go
M helpers.go
M core/import.go => core/import.go +3 -2
@@ 114,6 114,7 @@ func (p pinBoardObj) GetTags() []string {
func trimTags(tags []string) []string {
	var ret []string
	for _, t := range tags {
		t = links.SanitizeUTF8(t)
		if len(t) > 50 {
			t = t[:50]
		}


@@ 300,9 301,9 @@ func processOrgLinks(obj importObj, baseURLMap map[string]int,
		title = title[:146] + "..."
	}
	return &models.OrgLink{
		Title:       title,
		Title:       links.SanitizeUTF8(title),
		URL:         obj.GetURL(),
		Description: obj.GetDescription(),
		Description: links.SanitizeUTF8(obj.GetDescription()),
		BaseURLID:   sql.NullInt64{Valid: true, Int64: int64(baseID)},
		OrgID:       org.ID,
		UserID:      int(user.ID),

M helpers.go => helpers.go +21 -0
@@ 23,6 23,7 @@ import (
	"strconv"
	"strings"
	"time"
	"unicode/utf8"

	"git.sr.ht/~emersion/gqlclient"
	"github.com/99designs/gqlgen/graphql"


@@ 1159,3 1160,23 @@ func IPForContext(ctx context.Context) string {
	}
	return ip
}

// SanitizeUTF8 will strip out invalid utf-8 characters
func SanitizeUTF8(input string) string {
	if utf8.ValidString(input) {
		return input
	}

	var b strings.Builder
	for i := 0; i < len(input); {
		r, size := utf8.DecodeRuneInString(input[i:])
		if r == utf8.RuneError && size == 1 {
			// Replace invalid bytes with a space
			b.WriteString(" ")
		} else {
			b.WriteRune(r)
		}
		i += size
	}
	return b.String()
}

Do not follow this link