Graphing around with Solr
Basic operations for types of node in a graph.
The null query selects all nodes as indicated by the red outlines of the nodes.
Nodes with outgoing edges.
This is the set of documents that contain nested documents.
Query says, use the !parent query parser to find the set of documents that have a _nest_path_ value and return the parents of those documents.
Nodes with no outgoing edges.
This is the set of documents with no nested documents
Show the code
search(reltest,
q="-_nest_path_:* AND !(
{!parent
which='*:* AND -_nest_path_:*'
}+_nest_path_:*)",
rows=100
)
Nodes with an incoming edge
That is, nodes that are the target of any edge.
This approach uses an inner join from the edge target in child documents to the target node.
Note that the returned records are a composite of the nested child and its parent
Show the code
_left = ''' search(reltest,
q="_nest_path_:*",
fl="target_s,p:relation_type_s,s:_nest_parent_",
rows=1000,
sort="target_s asc"
)'''
_right = ''' search(reltest,
q="-_nest_path_:*",
fl="id,*",
rows=1000,
sort="id asc"
)'''
expr = f'''
innerJoin(
{_left},
{_right},
on="target_s=id"
)
'''
print(expr)
res = solr.sendExpr(expr)
solr.render(example_graph.docs, res)
innerJoin(
search(reltest,
q="_nest_path_:*",
fl="target_s,p:relation_type_s,s:_nest_parent_",
rows=1000,
sort="target_s asc"
),
search(reltest,
q="-_nest_path_:*",
fl="id,*",
rows=1000,
sort="id asc"
),
on="target_s=id"
)
Nodes with no incoming edges
This is a bit complicated because we only know about the outgoing edges. The approach taken here is to find all the documents that are not nested (i.e. not the edges nested documents) and subtract from that set the documents that are the target of an edge using the Solr complement stream decorator.
Show the code
# A is all non-nested documents
A = ''' search(reltest,
q="-_nest_path_:*",
fl="id,*",
rows=1000,
sort="id asc"
)'''
# B is all documents that are the target of a relation
B = '''fetch(reltest,
search(reltest,
q=_nest_parent_:*,
fl="target_s",
sort="target_s asc",
rows=1000
),
fl="id,target_s",
on="target_s=id"
)'''
# This doesn't work. I guess the tuples need to be of the same kind of document, not just matching the "on" keys?
C = '''
search(reltest,
q="_nest_parent_:*",
fl="id,target_s",
sort="target_s asc",
rows=1000
)'''
# complement A, B is the list of documents in A that are not in B, or A-B.
expr = f'''complement(
{A},
{B},
on="id=target_s"
)'''
print(expr)
res = solr.sendExpr(expr)
solr.render(example_graph.docs, res)complement(
search(reltest,
q="-_nest_path_:*",
fl="id,*",
rows=1000,
sort="id asc"
),
fetch(reltest,
search(reltest,
q=_nest_parent_:*,
fl="target_s",
sort="target_s asc",
rows=1000
),
fl="id,target_s",
on="target_s=id"
),
on="id=target_s"
)
Number of incoming edges
The approach here is to count the number of edges grouped by the target. This can be done by faceting with the facet stream source or drill combined with a rollup.
Show the code
# Can use facet for this
expr='''select(
facet(reltest,
q="target_s:*",
buckets="target_s",
rows=100,
count(*)
),
target_s as id,
count(*) as cnt
)'''
#Or drill, which may be a bit more efficient, but results are not sorted
# number of incoming edges from nodes with name starting with "sub"
q = "{!child of='*:* -_nest_path_:*'}name_t:sub*"
# any descendants
q = "target_s:*"
expr=f'''select(drill(reltest,
q={q},
fl="target_s",
sort="target_s asc",
rollup(input(), over="target_s", count(*))
),
target_s as id,
count(*) as cnt
)'''
print(expr)
res = solr.sendExpr(expr)
solr.render(example_graph.docs, res, add_label="cnt")select(drill(reltest,
q=target_s:*,
fl="target_s",
sort="target_s asc",
rollup(input(), over="target_s", count(*))
),
target_s as id,
count(*) as cnt
)
Number of outgoing edges
Show the code
select(
rollup(
search(reltest,
q="target_s:*",
fl="*",
rows=100
),
over="_nest_parent_",
count(*)
),
_nest_parent_ as id,
count(*) as cnt
)
Nodes with an incoming edge and no outgoing edge
This finds progenitors of possibly overlapping graphs, nodes a and b in this case, assuming isolated single nodes are not considered to be a graph.
One approach is to take the complement of the nodes with incoming edges and nodes with outgoing edges.
Show the code
incoming ='''select(
facet(reltest,
q="target_s:*",
buckets="target_s",
rows=100,
count(*)
),
target_s as id,
count(*) as cnt
)'''
outgoing = '''select(
rollup(
search(
reltest,
q="target_s:*",
fl="*",
rows=100
),
over="_nest_parent_",
count(*)
),
_nest_parent_ as id,
count(*) as cnt
)'''
expr = f'''complement(
sort(
{incoming},
by="id asc"
),
sort(
{outgoing},
by="id asc"
),
on="id"
)
'''
print(expr)
res = solr.sendExpr(expr)
solr.render(example_graph.docs, res)complement(
sort(
select(
facet(reltest,
q="target_s:*",
buckets="target_s",
rows=100,
count(*)
),
target_s as id,
count(*) as cnt
),
by="id asc"
),
sort(
select(
rollup(
search(
reltest,
q="target_s:*",
fl="*",
rows=100
),
over="_nest_parent_",
count(*)
),
_nest_parent_ as id,
count(*) as cnt
),
by="id asc"
),
on="id"
)
Another approach is an innerJoin, or intersect to emit only docs with incoming edges, but no outgoing edges. The intersect operation will only emit fields from the first stream whereas innerJoin will emit a union of fields. Both are illustrated below.
Show the code
A = '''search(
reltest,
q="-_nest_path_:* AND !({!parent which='*:* -_nest_path_:*'}+target_s:*)",
fl="id,name_t",
sort="id asc",
)'''
B ='''sort(
facet(
reltest,
q="target_s:*",
buckets="target_s",
rows=100,
count(*)
),
by="target_s asc"
)'''
expr = f'''intersect(
{A},
{B},
on="id=target_s"
)'''
print(expr)
res = solr.sendExpr(expr)
solr.render(example_graph.docs, res, show_docs=True, show_graph=False)intersect(
search(
reltest,
q="-_nest_path_:* AND !({!parent which='*:* -_nest_path_:*'}+target_s:*)",
fl="id,name_t",
sort="id asc",
),
sort(
facet(
reltest,
q="target_s:*",
buckets="target_s",
rows=100,
count(*)
),
by="target_s asc"
),
on="id=target_s"
)
{
"result-set": {
"docs": [
{
"id": "a",
"name_t": "parent a"
},
{
"id": "b",
"name_t": "parent b"
},
{
"EOF": true,
"RESPONSE_TIME": 3
}
]
}
}
Show the code
A = '''search(
reltest,
q="-_nest_path_:* AND !({!parent which='*:* -_nest_path_:*'}+target_s:*)",
fl="id,name_t",
sort="id asc",
)'''
B ='''sort(
facet(
reltest,
q="target_s:*",
buckets="target_s",
rows=100,
count(*)
),
by="target_s asc"
)'''
expr = f'''innerJoin(
{A},
{B},
on="id=target_s"
)'''
print(expr)
res = solr.sendExpr(expr)
solr.render(example_graph.docs, res, show_docs=True, show_graph=False)innerJoin(
search(
reltest,
q="-_nest_path_:* AND !({!parent which='*:* -_nest_path_:*'}+target_s:*)",
fl="id,name_t",
sort="id asc",
),
sort(
facet(
reltest,
q="target_s:*",
buckets="target_s",
rows=100,
count(*)
),
by="target_s asc"
),
on="id=target_s"
)
{
"result-set": {
"docs": [
{
"count(*)": 2,
"id": "a",
"target_s": "a",
"name_t": "parent a"
},
{
"count(*)": 2,
"id": "b",
"target_s": "b",
"name_t": "parent b"
},
{
"EOF": true,
"RESPONSE_TIME": 2
}
]
}
}