Module:Wikidata/Chemin/parser

local tool = require("Module:Utilitaire") local path = require "Module:Wikidata/Chemin/Path" local parser = require "Module:FParser"

local pparser = {}

--[[

grammar :

letter  ::= "A" | "B" | "C" | "D" | "E" | "F" | "G"

                         | "H" | "I" | "J" | "K" | "L" | "M" | "N"
                         | "O" | "P" | "Q" | "R" | "S" | "T" | "U"
                         | "V" | "W" | "X" | "Y" | "Z" | "a" | "b"
                         | "c" | "d" | "e" | "f" | "g" | "h" | "i"
                         | "j" | "k" | "l" | "m" | "n" | "o" | "p"
                         | "q" | "r" | "s" | "t" | "u" | "v" | "w"
                         | "x" | "y" | "z" ;

digit  ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; space  ::= " " ;

Pid  ::= "P" , digit, { digit } ; Pname  ::= letter, { letter | digit | space | "'" } ;

PathFirstLevel  ::= pathFirstAlternative

-- Rules specific to allow to start from a statement instead of an item on the highest level of a path, variant of PathAlternative and PathSequence

pathFirstAlternative  ::= PathFirstSequence ( '|' PathFirstSequence )* PathFirstSequence

                       ::= ('>' PathQualifier | PathEltOrInverse ) ( '/' PathEltOrInverse | '^' PathElt )*

Path ::= PathAlternative PathAlternative ::= PathSequence ( '|' PathSequence )* PathSequence ::= PathEltOrInverse ( '/' PathEltOrInverse | '^' PathElt )* PathElt ::= PathPrimary PathMod? PathEltOrInverse ::= PathElt | '^' PathElt PathMod ::= ( '*' | '?' | '+' | '{' ( Integer ( ',' ( '}' | Integer '}' ) | '}' ) ) ) PathPrimary ::= ( Prop | 'a' | '(' Path ')'

                           | ( Prop | '!' PathNegatedPropertySet ) '>' PathQualifier
                           | '!' PathNegatedPropertySet )

PathQualifier  ::= ( Prop | '!' PathNegatedPropertySet | PathPropertySet )

Prop  ::= IRIref | Pid | Pname

rules 95 and 96 in https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#rPathNegatedPropertySet

PathNegatedPropertySet  ::= PathOneInPropertySet | '(' ( PathOneInPropertySet ( '|' PathOneInPropertySet )* )? ')' PathOneInPropertySet  ::= iri | 'a' | '^' ( iri | 'a' )

PathPropertySet  ::= '(' Path ( '|' Path )+ ')'

For information, SPARQL property path grammar :

https://www.w3.org/TR/sparql11-property-paths/#path-syntax

TriplesSameSubjectPath  ::= VarOrTerm PropertyListNotEmptyPath | TriplesNode PropertyListPath PropertyListPath  ::= PropertyListNotEmpty? PropertyListNotEmptyPath::= ( VerbPath | VerbSimple ) ObjectList ( ';' ( ( VerbPath | VerbSimple ) ObjectList )? )* VerbPath  ::= Path VerbSimple  ::= Var Path  ::= PathAlternative PathAlternative  ::= PathSequence ( '|' PathSequence )* PathSequence  ::= PathEltOrInverse ( '/' PathEltOrInverse | '^' PathElt )* PathElt  ::= PathPrimary PathMod? PathEltOrInverse  ::= PathElt | '^' PathElt PathMod  ::= ( '*' | '?' | '+' | '{' ( Integer ( ',' ( '}' | Integer '}' ) | '}' ) ) ) PathPrimary  ::= ( IRIref | 'a' | '(' Path ')' )

--]]

local lexer = parser.lexer

local chain = parser.chain local alternative = parser.alternative local plus = parser.plus local idop = parser.idop local nary_op_parser = parser.nary_op_parser local lex_char = lexer.lex_char local parse_epsilon = lexer.lex_epsilon local lex_integer = lexer.lex_integer


-- grammar base lexer functions


local lex_pid = function(state) local res = lexer.lex_regex(state, "P[0-9]+") if res then res.type="Pid" return res end end

local lex_sparql_prefix = function(state) local res = lexer.lex_regex(state, "[a-z_]*") if res then res.type="prefix" return res end end

local lex_property_name = function(state) local res = lexer.lex_regex(state, "[a-zA-Z][a-z A-Z'-]*") if res then res.type="Plabel" return res end end



-- PathElt  ::= PathPrimary PathMod? -- PathMod  ::= ( '*' | '?' | '+' | '{' ( Integer ( ',' ( '}' | Integer '}' ) | '}' ) ) )

function pparser.pathElt(state) local node local prime_node

local min_bound = nil local max_bound = nil

local function create_node(type) return idop( function(state) node = type:create(prime_node, min_bound, max_bound) end ) end

local res = chain{ pparser.pathPrimary, idop(function(state) prime_node = state.node end), alternative{ chain{ lex_char("*"), create_node(path.StarNode) }, chain{ lex_char("+"), create_node(path.PlusNode) }, chain{ lex_char("?"), create_node(path.MaybeNode) }, chain{ lex_char("^"), create_node(path.InverseNode) }, chain{ lex_char("{"), lex_integer, idop(function(state) min_bound = tonumber(state.lexed) end), alternative{ chain{ lex_char(","), lex_integer, idop(function(state) max_bound = tonumber(state.lexed) end) }, chain{ parse_epsilon, idop(function(state) max_bound = nil end) } }, create_node(path.BetweenNode, min_bound, max_bound), lex_char("}"), }, chain{ parse_epsilon, idop(function(state) node = prime_node end) } } }(state)

if res then res.node = node return res end end


-- PathEltOrInverse  ::= PathElt | '^' PathElt pparser.pathEltOrInverse = function(state) return alternative{ pparser.pathElt, chain{ lex_char("^"), pparser.pathElt, function(state) state.node = path.InverseNode(state.node) return state end } }(state) end


--[[

Tests :

plop=p.parse("P31",p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop) yes property=>

  P31

plop=p.parse("P31>P279", p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop) yes property=>

  P279

node=>

  P31

plop=p.parse("P31{1,6}",p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop)


plop=p.parse("(P31|P17>P31)",p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop) yes nodes=>

  1=>
     property=>
        P31
  2=>
     property=>
        P31
     node=>
        P17

--]]


pparser.pathSequence = nary_op_parser( pparser.pathEltOrInverse, alternative{ chain{ lexer.lex_char("/"), pparser.pathEltOrInverse, }, chain{ lexer.lex_char("\^"), pparser.pathElt, function(state) state.node = path.InverseNode:create(state.node) return state end } }, function(acc) return path.SequenceNode:create(acc) end )


--[[ Tests:

plop=p.parse("P31/P31+",p.pathSequence) ; t = require "Module:Tools" ; t.dump_to_console(plop) yes nodes=>

  1=>
     property=>
        P31
  2=>
     node=>
        property=>
           P31

--]]


-- PathAlternative  ::= PathSequence ( '|' PathSequence )*

pparser.pathAlternative = nary_op_parser( pparser.pathSequence, chain{ lex_char("[|]"), pparser.pathSequence }, function(acc) return path.AlternativeNode:create(acc) end )

--[[ plop=p.parse("P31|P17/P279+",p.pathAlternative) ; t = require "Module:Tools" ; t.dump_to_console(plop) yes nodes=>

  1=>
     property=>
        P31
  2=>
     nodes=>
        1=>
           property=>
              P17
        2=>
           node=>
              property=>
                 P279
                 

plop=p.parse("P31|P17>P31/P279+",p.pathAlternative) ; t = require "Module:Tools" ; t.dump_to_console(plop) yes nodes=>

  1=>
     property=>
        P31
  2=>
     nodes=>
        1=>
           property=>
              P31
           node=>
              P17
        2=>
           node=>
              property=>
                 P279

--]]


-- PathSequence  ::= PathEltOrInverse ( '/' PathEltOrInverse | '^' PathElt )*


local instance = function() -- P31/P279* return path.SequenceNode:create( { path.PropertyNode:create("P31"), path.StarNode:create(path.PropertyNode:create("P279")) } ) end

-- PathPrimary  ::= ( Prop | '!' NegatedPropertySet ) ( '>' ( Prop | '!' NegatedPropertySet ) ) ? | 'a' | '(' Path ')'

pparser.pathPrimary = function(state) local node

local res = alternative{ chain{ lex_char('a'), lex_char(' '), idop(function(state) node = instance() end) }, chain{ chain{ alternative{ pparser.prop, chain {lex_char('!'), pparser.negatedPropertySet} }, idop(function(state) node = state.node end) }, alternative{ chain{ pparser.pathQualifier, idop( function(state) node = path.QualifiedStatementNode:create( node, state.node ) end ) }, parse_epsilon } }, chain{ lexer.open_parenthesis, pparser.path, idop( function(state) node = state.node end ), lexer.close_parenthesis }, chain{ lexer.lex_char('!'), pparser.negatedPropertySet, idop( function(state) node = state.node end ) } }(state) if res then res.node = node return res end end

--[[ Tests :

p.parse("a ", p.pathPrimary) => yes p.parse("!P31", p.pathPrimary) => yes p.parse("!(P31|instance of)", p.pathPrimary) => yes

--]]

-- stupid function to be eliminated soon (hum) local function parsePropAndWrap(wrapper) return chain{ pparser.prop, function (state)

local node = state.node local nodes = {} nodes[1] = {} nodes[1].node = node -- TODO: understand why this is needed instead of just "nodes[1] = node" state.node = wrapper(nodes) return state end } end

pparser.pathPropertySetParser = function(final_node_creator)

   return function(state)
   	return chain{

lexer.open_parenthesis, alternative{ nary_op_parser( pparser.pathOneInPropertySet, chain{ lexer.lex_char("|"), pparser.pathOneInPropertySet }, final_node_creator, function (node)

local singlenodes = {} singlenodes[1] = node -- mmm -- singlenodes[1].node = node

return final_node_creator(singlenodes) end ), -- parsePropAndWrap(final_node_creator), -- case for "!(P31)" like patterns, naryopparser or something needs to be fixed to better handle this -- here the solution for negation is to create a negated set with only one property. chain{ parse_epsilon, function(state) state.node = final_node_creator({}) return state end } -- allows emty set (to mimic any qualifer allowed, equiv of «*») }, lexer.close_parenthesis

   	}(state)
   end

end

pparser.propOrSetParser = function(creator) return function(state) return alternative{ parsePropAndWrap(creator), -- case for the pattern !P31 , in case it’s negated this stills need to be wrapped on a negated set pparser.pathPropertySetParser(function(nodes) return creator(nodes) end), }(state) end end

-- '>' ( Prop | '!' NegatedPropertySet | PropertySet ) pparser.pathQualifier = chain{ lex_char(">"), alternative{ chain{ lex_char("!"), pparser.propOrSetParser(function(nodes) return path.NegatedPropertySetNode:create(nodes) end) }, pparser.propOrSetParser(function(nodes) return path.PropertySetNode:create(nodes) end) }, function(state) state.node = path.QualifierSnakNode:create(state.node) return state end } --[[ =p.parse(">!(P31|P31)",p.pathQualifier) =p.parse(">(P31|P31)",p.pathQualifier) =p.parse(">P31",p.pathQualifier) =p.parse(">!P31",p.pathQualifier) --]]

-- PathNegatedPropertySet  ::= PathOneInPropertySet | '(' ( PathOneInPropertySet ( '|' PathOneInPropertySet )* )? ')'

pparser.negatedPropertySet = pparser.pathPropertySetParser( function(nodes) return path.NegatedPropertySetNode:create(nodes) end )


--[[ Tests :

p.parse("!P31",p.negatedPropertySet) p.parse("(P31|P32)",p.negatedPropertySet) => yes p.parse("P31",p.negatedPropertySet) => yes p.parse("^P31",p.negatedPropertySet) => yes p.parse("^(P31)",p.negatedPropertySet) => nope p.parse("(P31)",p.negatedPropertySet) => yes p.parse("(^P31)",p.negatedPropertySet) => yes p.parse("(^P31|a|plop)",p.negatedPropertySet) => yes

All good(?)

--]]

-- PathOneInPropertySet  ::= iri | 'a' | '^' ( iri | 'a' )

pparser.pathOneInPropertySet = function(state) local node = {}

local pElement = alternative{ chain{ lexer.lex_char('a'), idop(function(state) elem = instance() end) }, chain{ pparser.prop, idop(function(state) elem = state.node end) } }

local res = alternative{ chain{ lexer.lex_char("^"), pElement, idop(function(state) node = state.node end) }, chain{ pElement, idop(function(state) node = path.InverseNode:create(state.node) end) } }(state)

if res then res.node = node end return res end


-- Prop ::= IRIref | Pid | Pname pparser.prop = function(state) local res = alternative{ chain{ parser.questionmark( chain{ lex_sparql_prefix, lex_char(":") } ), lex_pid }, lex_property_name }(state)

if res then res.node = path.PropertyNode:create(res.lexed) return res end end --[[

Tests :

p.parse("a ", p.primary) => yes p.parse("P31@", p.prop) => nope p.parse("P31", p.prop) => nope p.parse("P31>P279", p.prop) => nope

--]]


-- PathFirstSequence  ::= '>' PathQualifier ( '/' PathEltOrInverse | '^' PathElt )* pparser.pathFirstSequence = nary_op_parser( -- chain{ pparser.pathQualifier, -- function(state) -- state.node = path.QualifierSnakNode:create(state.node) -- return state -- end -- }, chain{ lex_char("/"), pparser.pathEltOrInverse }, function (acc) return path.SequenceNode:create(acc) end )

pparser.path = function(state) return pparser.pathAlternative(state) end

-- PathFirstAlternative  ::= PathFirstSequence ( '|' PathFirstSequence )* | Path pparser.pathFirstAlternative = alternative{ pparser.path, nary_op_parser( pparser.pathFirstSequence, chain{ lex_char("|"), pparser.pathFirstSequence }, function(acc) return path.AlternativeNode:create(acc) end ), }


-- plop = p.parse_path("P31/P31/P31>P31/P31")

pparser.parse_path = function (property_path) local res = parser.parse(property_path, pparser.pathFirstAlternative) assert(res, "parsing returned a nil obj on path : «" .. property_path .. "»") return res end

-- to test in console pparser.parse = parser.parse

return pparser