package parser
import (
"bufio"
"bytes"
"encoding/xml"
"io"
"regexp"
"strings"
"golang.org/x/net/html/charset"
)
func firstNonEmpty(vals ...string) string {
for _, val := range vals {
valTrimmed := strings.TrimSpace(val)
if len(valTrimmed) > 0 {
return valTrimmed
}
}
return ""
}
var linkRe = regexp.MustCompile(`(https?:\/\/\S+)`)
func plain2html(text string) string {
text = linkRe.ReplaceAllString(text, `$1`)
text = strings.ReplaceAll(text, "\n", "
")
return text
}
func xmlDecoder(r io.Reader) *xml.Decoder {
decoder := xml.NewDecoder(r)
decoder.Strict = false
decoder.CharsetReader = func(cs string, input io.Reader) (io.Reader, error) {
r, err := charset.NewReaderLabel(cs, input)
if err == nil {
r = NewSafeXMLReader(r)
}
return r, err
}
return decoder
}
type safexmlreader struct {
reader *bufio.Reader
buffer *bytes.Buffer
}
func NewSafeXMLReader(r io.Reader) io.Reader {
return &safexmlreader{
reader: bufio.NewReader(r),
buffer: bytes.NewBuffer(make([]byte, 0, 4096)),
}
}
func (xr *safexmlreader) Read(p []byte) (int, error) {
for xr.buffer.Len() < cap(p) {
r, _, err := xr.reader.ReadRune()
if err == io.EOF {
if xr.buffer.Len() == 0 {
return 0, io.EOF
}
break
}
if err != nil {
return 0, err
}
if isInCharacterRange(r) {
xr.buffer.WriteRune(r)
}
}
return xr.buffer.Read(p)
}
// NOTE: copied from "encoding/xml" package
// Decide whether the given rune is in the XML Character Range, per
// the Char production of https://www.xml.com/axml/testaxml.htm,
// Section 2.2 Characters.
func isInCharacterRange(r rune) (inrange bool) {
return r == 0x09 ||
r == 0x0A ||
r == 0x0D ||
r >= 0x20 && r <= 0xD7FF ||
r >= 0xE000 && r <= 0xFFFD ||
r >= 0x10000 && r <= 0x10FFFF
}
// NOTE: copied from "encoding/xml" package
// procInst parses the `param="..."` or `param='...'`
// value out of the provided string, returning "" if not found.
func procInst(param, s string) string {
// TODO: this parsing is somewhat lame and not exact.
// It works for all actual cases, though.
param = param + "="
idx := strings.Index(s, param)
if idx == -1 {
return ""
}
v := s[idx+len(param):]
if v == "" {
return ""
}
if v[0] != '\'' && v[0] != '"' {
return ""
}
idx = strings.IndexRune(v[1:], rune(v[0]))
if idx == -1 {
return ""
}
return v[1 : idx+1]
}