From 9d59974e5a1fc24b08963e9c5a0fcaa4f3a5e7f6 Mon Sep 17 00:00:00 2001 From: brent s Date: Sun, 4 Jul 2021 04:06:05 -0400 Subject: [PATCH] nowhere near usable but setting aside for rainy day --- conf.go | 2 +- const.go | 12 ++++- func.go | 1 + func_nsxml.go | 128 +++++++++++++++++++++++++++++++++++++++++++++++ schema.go | 134 ++++---------------------------------------------- types.go | 28 +++++++++++ 6 files changed, 178 insertions(+), 127 deletions(-) create mode 100644 func.go create mode 100644 func_nsxml.go create mode 100644 types.go diff --git a/conf.go b/conf.go index 40ee056..d419ec4 100644 --- a/conf.go +++ b/conf.go @@ -1,4 +1,4 @@ -package conf +package main import ( "encoding/xml" diff --git a/const.go b/const.go index 742d6b8..1965b2e 100644 --- a/const.go +++ b/const.go @@ -1,6 +1,14 @@ -package conf +package main + +import ( + "regexp" +) const ( - XSIVal = "http://www.w3.org/2001/XMLSchema-instance" + XSIVal = "http://www.w3.org/2001/XMLSchema-instance" DefSchemaNS = "xsi" ) + +var ( + uriRe = regexp.MustCompile(`^(?Pfile|https?)://(?P.+)$`) +) diff --git a/func.go b/func.go new file mode 100644 index 0000000..06ab7d0 --- /dev/null +++ b/func.go @@ -0,0 +1 @@ +package main diff --git a/func_nsxml.go b/func_nsxml.go new file mode 100644 index 0000000..b8f9d8d --- /dev/null +++ b/func_nsxml.go @@ -0,0 +1,128 @@ +package main + +import ( + "io/ioutil" + "net/http" + "strings" + + lxml "github.com/lestrrat-go/libxml2" + lxmlp "github.com/lestrrat-go/libxml2/parser" + lxmlt "github.com/lestrrat-go/libxml2/types" + "github.com/lestrrat-go/libxml2/xsd" + rpaths "r00t2.io/sysutils/paths" +) + +func NewNsXml(raw *[]byte) (x *NsXml, err error) { + + var xml NsXml + + xml.Raw = raw + + x = &xml + + return +} + +func (x *NsXml) Validate(defaults bool) (bool, error) { + // We need the XSD before we can validate. + if err := x.getXSD(); err != nil { + return false, err + } + + return false, nil +} + +func (x *NsXml) getXSD() error { + if err := x.mapNS(); err != nil { + return err + } + r := uriRe.FindStringSubmatch(strings.ToLower(x.XSD.XSDPath)) + reResults := make(map[string]string) + for i, name := range uriRe.SubexpNames() { + if i != 0 && name != "" { + reResults[name] = r[i] + } + } + var xsdRaw []byte + switch reResults["type"] { + case "file": + p := reResults["path"] + exists, err := rpaths.RealPathExists(&p) + if (err != nil) || (!exists) { + return err + } + xsdRaw, err = ioutil.ReadFile(p) + if err != nil { + return err + } + case "http", "https": + resp, err := http.Get(x.XSD.XSDPath) + if err != nil { + return err + } + defer resp.Body.Close() + xsdRaw, err = ioutil.ReadAll(resp.Body) + if err != nil { + return err + } + default: + return errors.New("invalid URI type for schemaLocation") + } + x.XSD, err = xsd.Parse(xsdRaw) + if err != nil { + return err + } +} + +// mapNS (tries to) extract the default namespace from the document via the SchemaLocation property of XML, sets DefaultNS, +// and then gets the XSDPath specified therein. +func (x *NsXml) mapNS() error { + if x.XSD.Pointer() != 0 { + // Already set. + return nil + } + if err := x.parse(); err != nil { + return err + } + + x.DefaultNS = x.Root.NamespaceURI() + /* x.DefaultNS = *x.XML.XMLName.Space + sl := *x.XML.SchemaLocation + */ + ns := strings.Fields(sl) + if ns != nil { + if len(ns) > 2 { + return errors.New("too many values for a valid schemaLocation") + } else if len(ns) == 0 { + return errors.New("no specified value for schemaLocation") + } else if len(ns) == 1 { + // LAZY. This is improper XML, but is commonly used regardless. + x.XSDPath = ns[0] + } else { + if ns[0] == x.DefaultNS { + x.XSDPath = ns[1] + } + } + } + return nil +} + +// parse parses the x.XML into its x.LXML +func (x *NsXml) parse() error { + if x.LXML != nil { + return nil // Already parsed + } + if x.XML == nil { + return errors.New("XML property is empty") + } + x.LXML, err = lxml.Parse(*x.XML, lxmlp.XMLParseBigLines, lxmlp.XMLParseXInclude) + if err != nil { + return err + } + de, err := x.LXML.DocumentElement() + if err != nil { + return err + } + x.Root = de.(lxmlt.Element) + return nil +} diff --git a/schema.go b/schema.go index 386cf0f..6811749 100644 --- a/schema.go +++ b/schema.go @@ -1,131 +1,17 @@ -package conf +package main // The usage of this depends on the successful resolution of https://github.com/lestrrat-go/libxml2/issues/67. import ( - `errors` - `io/ioutil` - `net/http` - `regexp` - `strings` + "errors" + "io/ioutil" + "net/http" + "strings" - lxml `github.com/lestrrat-go/libxml2` - lxmlp `github.com/lestrrat-go/libxml2/parser` - lxmlt `github.com/lestrrat-go/libxml2/types` - `github.com/lestrrat-go/libxml2/xsd` + lxml "github.com/lestrrat-go/libxml2" + lxmlp "github.com/lestrrat-go/libxml2/parser" + lxmlt "github.com/lestrrat-go/libxml2/types" + "github.com/lestrrat-go/libxml2/xsd" - rpaths `r00t2.io/sysutils/paths` + rpaths "r00t2.io/sysutils/paths" ) - -var uriRe = regexp.MustCompile(`^(?Pfile|https?)://(?P.+)$`) - -type NSXML struct { - XML *[]byte // The raw XML bytes. - LXML lxmlt.Document // The lxml.Parse()'d XML. - Root lxmlt.Element // The Document's root element - XSD xsd.Schema // Its schema. -} - -func (x *NSXML) Validate(defaults bool) (bool, error) { - // We need the XSD before we can validate. - if err := x.getXSD(); err != nil { - return false, err - } - - return false, nil -} - -func (x *NSXML) getXSD() error { - if err := x.mapNS(); err != nil { - return err - } - r := uriRe.FindStringSubmatch(strings.ToLower(x.XSDPath)) - reResults := make(map[string]string) - for i, name := range uriRe.SubexpNames() { - if i != 0 && name != "" { - reResults[name] = r[i] - } - } - var xsdRaw []byte - switch reResults["type"] { - case "file": - p := reResults["path"] - exists, err := rpaths.RealPathExists(&p) - if (err != nil) || (!exists) { - return err - } - xsdRaw, err = ioutil.ReadFile(p) - if err != nil { - return err - } - case "http", "https": - resp, err := http.Get(x.XSDPath) - if err != nil { - return err - } - defer resp.Body.Close() - xsdRaw, err = ioutil.ReadAll(resp.Body) - if err != nil { - return err - } - default: - return errors.New("invalid URI type for schemaLocation") - } - x.XSD, err = xsd.Parse(xsdRaw) - if err != nil { - return err - } -} - -// mapNS (tries to) extract the default namespace from the document via the SchemaLocation property of XML, sets DefaultNS, -// and then gets the XSDPath specified therein. -func (x *NSXML) mapNS() error { - if x.XSD.Pointer() != 0 { - // Already set. - return nil - } - if err := x.parse(); err != nil { - return err - } - - x.DefaultNS = x.Root.NamespaceURI() - /* x.DefaultNS = *x.XML.XMLName.Space - sl := *x.XML.SchemaLocation - */ - ns := strings.Fields(sl) - if ns != nil { - if len(ns) > 2 { - return errors.New("too many values for a valid schemaLocation") - } else if len(ns) == 0 { - return errors.New("no specified value for schemaLocation") - } else if len(ns) == 1 { - // LAZY. This is improper XML, but is commonly used regardless. - x.XSDPath = ns[0] - } else { - if ns[0] == x.DefaultNS { - x.XSDPath = ns[1] - } - } - } - return nil -} - -// parse parses the x.XML into its x.LXML -func (x *NSXML) parse() error { - if x.LXML != nil { - return nil // Already parsed - } - if x.XML == nil { - return errors.New("XML property is empty") - } - x.LXML, err = lxml.Parse(*x.XML, lxmlp.XMLParseBigLines, lxmlp.XMLParseXInclude) - if err != nil { - return err - } - de, err := x.LXML.DocumentElement() - if err != nil { - return err - } - x.Root = de.(lxmlt.Element) - return nil -} diff --git a/types.go b/types.go new file mode 100644 index 0000000..8937d5a --- /dev/null +++ b/types.go @@ -0,0 +1,28 @@ +package main + +import ( + lxmlt "github.com/lestrrat-go/libxml2/types" + "github.com/lestrrat-go/libxml2/xsd" +) + +// NsXml is a namespaced XML document. +type NsXml struct { + // Raw is the raw bytes of the document. + Raw *[]byte + // XML is the libxml2-parsed document. + XML lxmlt.Document + // Root is LXML's root element. + Root lxmlt.Element + // XSD is LXML's schema (NsXsd). + XSD *NsXsd +} + +// NsXsd is a namespaced XSD (XML Schema Definition). +type NsXsd struct { + // Raw is the raw bytes of the document. + Raw *[]byte + // XSD is the libxml2-parsed schema. + XSD xsd.Schema + // XSDPath is the path of the XSD in a fetchable form (the URI of the document, essentially). + XSDPath string +}