Graphing around with Solr

Basic operations for types of node in a graph.

Published

August 30, 2022

The null query selects all nodes as indicated by the red outlines of the nodes.

Show the code
import example_graph
import graphutzing

solr = graphutzing.SolrConnection()

res = solr.squery()
solr.render(example_graph.docs, res)

Nodes with outgoing edges.

This is the set of documents that contain nested documents.

Query says, use the !parent query parser to find the set of documents that have a _nest_path_ value and return the parents of those documents.

Show the code
ex1 = '''
search(reltest,
    q="{!parent 
        which='*:* -_nest_path_:*'
       }(+_nest_path_:*)",
    fl="*",
    rows=100
)
'''
print(ex1)
res = solr.sendExpr(ex1)
solr.render(example_graph.docs, res)

search(reltest,
    q="{!parent 
        which='*:* -_nest_path_:*'
       }(+_nest_path_:*)",
    fl="*",
    rows=100
)

Nodes with no outgoing edges.

This is the set of documents with no nested documents

Show the code
ex1 = '''
search(reltest,
    q="-_nest_path_:* AND !(
        {!parent 
            which='*:* AND -_nest_path_:*'
        }+_nest_path_:*)",
    rows=100
)
'''
print(ex1)
res = solr.sendExpr(ex1)
solr.render(example_graph.docs, res)

search(reltest,
    q="-_nest_path_:* AND !(
        {!parent 
            which='*:* AND -_nest_path_:*'
        }+_nest_path_:*)",
    rows=100
)

Nodes with an incoming edge

That is, nodes that are the target of any edge.

This approach uses an inner join from the edge target in child documents to the target node.

Note that the returned records are a composite of the nested child and its parent

Show the code
_left = '''    search(reltest,
        q="_nest_path_:*",
        fl="target_s,p:relation_type_s,s:_nest_parent_",
        rows=1000,
        sort="target_s asc"
    )'''
_right = '''    search(reltest,
        q="-_nest_path_:*",
        fl="id,*",
        rows=1000,
        sort="id asc"
    )'''

expr = f'''
innerJoin(
    {_left},
    {_right},
    on="target_s=id"
)
'''
print(expr)
res = solr.sendExpr(expr)
solr.render(example_graph.docs, res)

innerJoin(
        search(reltest,
        q="_nest_path_:*",
        fl="target_s,p:relation_type_s,s:_nest_parent_",
        rows=1000,
        sort="target_s asc"
    ),
        search(reltest,
        q="-_nest_path_:*",
        fl="id,*",
        rows=1000,
        sort="id asc"
    ),
    on="target_s=id"
)

Nodes with no incoming edges

This is a bit complicated because we only know about the outgoing edges. The approach taken here is to find all the documents that are not nested (i.e. not the edges nested documents) and subtract from that set the documents that are the target of an edge using the Solr complement stream decorator.

Show the code
# A is all non-nested documents
A = '''    search(reltest,
        q="-_nest_path_:*",
        fl="id,*",
        rows=1000,
        sort="id asc"
    )'''
# B is all documents that are the target of a relation
B = '''fetch(reltest, 
        search(reltest,
            q=_nest_parent_:*,
            fl="target_s",
            sort="target_s asc",
            rows=1000
        ),
        fl="id,target_s",
        on="target_s=id"
    )'''

# This doesn't work. I guess the tuples need to be of the same kind of document, not just matching the "on" keys?
C = '''
search(reltest,
    q="_nest_parent_:*",
    fl="id,target_s",
    sort="target_s asc",
    rows=1000
)'''

# complement A, B is the list of documents in A that are not in B, or A-B.
expr = f'''complement(
    {A},
    {B},
    on="id=target_s"
)'''

print(expr)
res = solr.sendExpr(expr)
solr.render(example_graph.docs, res)
complement(
        search(reltest,
        q="-_nest_path_:*",
        fl="id,*",
        rows=1000,
        sort="id asc"
    ),
    fetch(reltest, 
        search(reltest,
            q=_nest_parent_:*,
            fl="target_s",
            sort="target_s asc",
            rows=1000
        ),
        fl="id,target_s",
        on="target_s=id"
    ),
    on="id=target_s"
)

Number of incoming edges

The approach here is to count the number of edges grouped by the target. This can be done by faceting with the facet stream source or drill combined with a rollup.

Show the code
# Can use facet for this
expr='''select(
    facet(reltest,
        q="target_s:*",
        buckets="target_s",
        rows=100,
        count(*)
    ),
    target_s as id,
    count(*) as cnt
)'''

#Or drill, which may be a bit more efficient, but results are not sorted
# number of incoming edges from nodes with name starting with "sub"
q = "{!child of='*:* -_nest_path_:*'}name_t:sub*"
# any descendants
q = "target_s:*"
expr=f'''select(drill(reltest,
        q={q},
        fl="target_s",
        sort="target_s asc",
        rollup(input(), over="target_s", count(*))
    ),
    target_s as id,
    count(*) as cnt
)'''
print(expr)
res = solr.sendExpr(expr)
solr.render(example_graph.docs, res, add_label="cnt")
select(drill(reltest,
        q=target_s:*,
        fl="target_s",
        sort="target_s asc",
        rollup(input(), over="target_s", count(*))
    ),
    target_s as id,
    count(*) as cnt
)

Number of outgoing edges

Show the code
expr='''select(
    rollup(
        search(reltest,
            q="target_s:*",
            fl="*",
            rows=100
        ),
        over="_nest_parent_",
        count(*)
    ),
    _nest_parent_ as id,
    count(*) as cnt
)'''
print(expr)
res = solr.sendExpr(expr)
solr.render(example_graph.docs, res, add_label="cnt")
select(
    rollup(
        search(reltest,
            q="target_s:*",
            fl="*",
            rows=100
        ),
        over="_nest_parent_",
        count(*)
    ),
    _nest_parent_ as id,
    count(*) as cnt
)

Nodes with an incoming edge and no outgoing edge

This finds progenitors of possibly overlapping graphs, nodes a and b in this case, assuming isolated single nodes are not considered to be a graph.

One approach is to take the complement of the nodes with incoming edges and nodes with outgoing edges.

Show the code
incoming ='''select(
            facet(reltest,
                q="target_s:*",
                buckets="target_s",
                rows=100,
                count(*)
            ),
            target_s as id,
            count(*) as cnt
        )'''
outgoing = '''select(
            rollup(
                search(
                    reltest,
                    q="target_s:*",
                    fl="*",
                    rows=100
                ),
                over="_nest_parent_",
                count(*)
            ),
            _nest_parent_ as id,
            count(*) as cnt
        )'''

expr = f'''complement(
    sort(
        {incoming}, 
        by="id asc"
    ),
    sort(
        {outgoing}, 
        by="id asc"
    ),
    on="id"
)
'''
print(expr)
res = solr.sendExpr(expr)
solr.render(example_graph.docs, res)
complement(
    sort(
        select(
            facet(reltest,
                q="target_s:*",
                buckets="target_s",
                rows=100,
                count(*)
            ),
            target_s as id,
            count(*) as cnt
        ), 
        by="id asc"
    ),
    sort(
        select(
            rollup(
                search(
                    reltest,
                    q="target_s:*",
                    fl="*",
                    rows=100
                ),
                over="_nest_parent_",
                count(*)
            ),
            _nest_parent_ as id,
            count(*) as cnt
        ), 
        by="id asc"
    ),
    on="id"
)

Another approach is an innerJoin, or intersect to emit only docs with incoming edges, but no outgoing edges. The intersect operation will only emit fields from the first stream whereas innerJoin will emit a union of fields. Both are illustrated below.

Show the code
A = '''search(
        reltest,
        q="-_nest_path_:* AND !({!parent which='*:* -_nest_path_:*'}+target_s:*)",
        fl="id,name_t",
        sort="id asc",
    )'''
B ='''sort(
        facet(
            reltest,
            q="target_s:*",
            buckets="target_s",
            rows=100,
            count(*)
        ),
        by="target_s asc"
    )'''
expr = f'''intersect(
    {A},
    {B},
    on="id=target_s"
)'''
print(expr)
res = solr.sendExpr(expr)
solr.render(example_graph.docs, res, show_docs=True, show_graph=False)
intersect(
    search(
        reltest,
        q="-_nest_path_:* AND !({!parent which='*:* -_nest_path_:*'}+target_s:*)",
        fl="id,name_t",
        sort="id asc",
    ),
    sort(
        facet(
            reltest,
            q="target_s:*",
            buckets="target_s",
            rows=100,
            count(*)
        ),
        by="target_s asc"
    ),
    on="id=target_s"
)
{
  "result-set": {
    "docs": [
      {
        "id": "a",
        "name_t": "parent a"
      },
      {
        "id": "b",
        "name_t": "parent b"
      },
      {
        "EOF": true,
        "RESPONSE_TIME": 3
      }
    ]
  }
}
Show the code
A = '''search(
        reltest,
        q="-_nest_path_:* AND !({!parent which='*:* -_nest_path_:*'}+target_s:*)",
        fl="id,name_t",
        sort="id asc",
    )'''
B ='''sort(
        facet(
            reltest,
            q="target_s:*",
            buckets="target_s",
            rows=100,
            count(*)
        ),
        by="target_s asc"
    )'''
expr = f'''innerJoin(
    {A},
    {B},
    on="id=target_s"
)'''
print(expr)
res = solr.sendExpr(expr)
solr.render(example_graph.docs, res, show_docs=True, show_graph=False)
innerJoin(
    search(
        reltest,
        q="-_nest_path_:* AND !({!parent which='*:* -_nest_path_:*'}+target_s:*)",
        fl="id,name_t",
        sort="id asc",
    ),
    sort(
        facet(
            reltest,
            q="target_s:*",
            buckets="target_s",
            rows=100,
            count(*)
        ),
        by="target_s asc"
    ),
    on="id=target_s"
)
{
  "result-set": {
    "docs": [
      {
        "count(*)": 2,
        "id": "a",
        "target_s": "a",
        "name_t": "parent a"
      },
      {
        "count(*)": 2,
        "id": "b",
        "target_s": "b",
        "name_t": "parent b"
      },
      {
        "EOF": true,
        "RESPONSE_TIME": 2
      }
    ]
  }
}