Utility functions

append_triples(graph, triples)

Add all items in iterable triples to graph (modify in place).

Source code in rdf/utils.py
def append_triples(graph, triples):
    """ Add all items in iterable `triples` to `graph` (modify in place). """
    for triple in triples:
        graph.add(triple)

get_conjunctive_graph()

Returns the conjunctive graph of our SPARQL store.

Source code in rdf/utils.py
def get_conjunctive_graph():
    """ Returns the conjunctive graph of our SPARQL store. """
    return ConjunctiveGraph(settings.RDFLIB_STORE)

graph_from_triples(triples, ctor=Graph)

Return a new Graph containing all items in iterable triples.

Source code in rdf/utils.py
def graph_from_triples(triples, ctor=Graph):
    """ Return a new Graph containing all items in iterable `triples`. """
    graph = ctor()
    append_triples(graph, triples)
    return graph

patched_inject_prefixes(self, query, extra_bindings)

Monkeypatch for SPARQLStore prefix injection Parses the incoming query for prefixes, and ignores these when injecting additional namespaces. Better implementation is possibly available, e.g. use rdfblibs query parser to extract prefixes.

Source code in rdf/utils.py
def patched_inject_prefixes(self, query, extra_bindings):
    ''' Monkeypatch for SPARQLStore prefix injection
    Parses the incoming query for prefixes,
    and ignores these when injecting additional namespaces.
    Better implementation is possibly available,
    e.g. use rdfblibs query parser to extract prefixes.
    '''
    query_prefixes = re.findall(PREFIX_PATTERN, query)

    # prefixes available in the query should be deducted from the store's nsBindings
    # prefixes that were provided through initNs should take precedence over all others
    bindings = {x for x in set(self.nsBindings.items())
                if x[0] not in query_prefixes}
    bindings |= set(extra_bindings.items())

    # remove the extra bindings from the original query
    for k in set(extra_bindings.keys()):
        if k in query_prefixes:
            replace_pattern = re.compile(
                fr'PREFIX\s+{k}:\s*<.+>', re.IGNORECASE)
            query = re.sub(replace_pattern, '', query)

    if not bindings:
        return query
    return "\n".join(
        [
            "\n".join(["PREFIX %s: <%s>" % (k, v) for k, v in bindings]),
            "",  # separate ns_bindings from query with an empty line
            query,
        ]
    )

patched_sparqlconnector_update(self, query, default_graph=None, named_graph=None)

Monkeypatch for SPARQLConnector's update method Changes Content-Type header to include utf-8 charset

Source code in rdf/utils.py
def patched_sparqlconnector_update(self, query,
                                   default_graph: Optional[str] = None,
                                   named_graph: Optional[str] = None):
    '''Monkeypatch for SPARQLConnector's update method
    Changes Content-Type header to include utf-8 charset
    '''
    if not self.update_endpoint:
        raise SPARQLConnectorException("Query endpoint not set!")

    params = {}

    if default_graph is not None:
        params["using-graph-uri"] = default_graph

    if named_graph is not None:
        params["using-named-graph-uri"] = named_graph

    # Single difference from original method, changing Content-Type header
    headers = {
        "Accept": _response_mime_types[self.returnFormat],
        "Content-Type": "application/sparql-update; charset=utf-8",
    }

    args = dict(self.kwargs)  # other QSAs

    args.setdefault("params", {})
    args["params"].update(params)
    args.setdefault("headers", {})
    args["headers"].update(headers)

    qsa = "?" + urlencode(args["params"])
    res = urlopen(
        Request(self.update_endpoint + qsa, data=query.encode(),
                headers=args["headers"])
    )

prune_recursively(graph, subject, graphs_applied_to=[], privileged_predicates=[])

Recursively remove subject and all related resources from graph. Specify which graphs qualify, i.e. from which triples will be deleted, in graphs_applied_to. Optionally, skip deletion of (i.e. keep) items related via specific (privileged) predicates.

Source code in rdf/utils.py
def prune_recursively(graph, subject, graphs_applied_to=[], privileged_predicates=[]):
    """
    Recursively remove subject and all related resources from `graph`.
    Specify which graphs qualify, i.e. from which triples will be deleted, in `graphs_applied_to`.
    Optionally, skip deletion of (i.e. keep) items related via specific (privileged) predicates.
    """
    related_by_subject = list(graph.quads((subject, None, None)))

    for s, p, o, c in related_by_subject:
        if isinstance(o, URIRef) and o != s and p not in privileged_predicates and c in graphs_applied_to:
            prune_recursively(graph, o, graphs_applied_to,
                              privileged_predicates)

    prune_triples(graph, related_by_subject)

prune_triples(graph, triples)

Remove all items in iterable triples from graph (modify in place).

Source code in rdf/utils.py
def prune_triples(graph, triples):
    """Remove all items in iterable `triples` from `graph` (modify in place)."""
    for triple in triples:
        graph.remove(triple)

prune_triples_cascade(graph, triples, graphs_applied_to=[], privileged_predicates=[])

Recursively remove subjects in triples and all related resources from graph. Specify which graphs qualify, i.e. from which triples will be deleted, in graphs_applied_to. Optionally, skip items related via specific (privileged) predicates.

Source code in rdf/utils.py
def prune_triples_cascade(graph, triples, graphs_applied_to=[], privileged_predicates=[]):
    """
    Recursively remove subjects in `triples` and all related resources from `graph`.
    Specify which graphs qualify, i.e. from which triples will be deleted, in `graphs_applied_to`.
    Optionally, skip items related via specific (privileged) predicates.
    """
    for triple in triples:
        prune_recursively(
            graph, triple[0], graphs_applied_to, privileged_predicates
        )

recode_latin1_triples(g, latin1_triples, commit=False)

Find and recodes latin1-encoded strings to utf-8 If commit, also replace them in the triplestore.

Source code in rdf/utils.py
def recode_latin1_triples(g: Graph, latin1_triples: Graph, commit=False) -> None:
    '''Find and recodes latin1-encoded strings to utf-8
    If commit, also replace them in the triplestore.
    '''
    cnt = 0
    for (s, p, o) in latin1_triples:
        recoded = latin1_to_utf8(o)
        if o != recoded:
            if not commit:
                # manual sanity check
                print(o)
                print(recoded)
                print('---')
            else:
                g.add((s, p, Literal(recoded)))
                g.remove((s, p, o))
                cnt += 1
    print(f'updated {cnt} triples')

sample_graph(graph, subjects, request)

Return a random sample from a graph, optionally filtering with a list containing [predicate, object].

Source code in rdf/utils.py
def sample_graph(graph, subjects, request):
    """ Return a random sample from a graph, optionally filtering with a list containing [predicate, object]. """
    n_results = int(request.GET.get('n_results'))
    if len(subjects) > n_results:
        sampled_subjects = random.sample(list(subjects), n_results)
    else:
        sampled_subjects = subjects
    output = Graph()
    for sub in sampled_subjects:
        suggestions = graph.triples((sub, None, None))
        [output.add(s) for s in suggestions]
    return output

traverse_backward(full_graph, fringe, plys)

Traverse full_graph by subject plys times, starting from fringe.

Returns a graph with all triples accumulated during the traversal, excluding fringe. This result always contains complete resources, i.e., all triples of each subject in the graph are included.

Source code in rdf/utils.py
def traverse_backward(full_graph, fringe, plys):
    """
    Traverse `full_graph` by subject `plys` times, starting from `fringe`.

    Returns a graph with all triples accumulated during the traversal,
    excluding `fringe`. This result always contains complete
    resources, i.e., all triples of each subject in the graph are
    included.
    """
    result = Graph()
    subjects = set(fringe.subjects())
    visited_subjects = set()
    while plys > 0:
        if not len(subjects):
            break
        fringe = Graph()
        fringe_subjects = set()
        for s in subjects:
            parents = set(full_graph.subjects(None, s))
            for ss in parents - fringe_subjects:
                append_triples(fringe, full_graph.triples((ss, None, None)))
            fringe_subjects |= parents
        result |= fringe
        visited_subjects |= subjects
        subjects = set(fringe.subjects()) - visited_subjects
        plys -= 1
    return result

traverse_forward(full_graph, fringe, plys)

Traverse full_graph by object plys times, starting from fringe.

Returns a graph with all triples accumulated during the traversal, excluding fringe.

Source code in rdf/utils.py
def traverse_forward(full_graph, fringe, plys):
    """
    Traverse `full_graph` by object `plys` times, starting from `fringe`.

    Returns a graph with all triples accumulated during the traversal,
    excluding `fringe`.
    """
    result = Graph()
    visited_objects = set()
    while plys > 0:
        objects = set(fringe.objects()) - visited_objects
        if not len(objects):
            break
        fringe = Graph()
        for o in objects:
            if not isinstance(o, Literal):
                append_triples(fringe, full_graph.triples((o, None, None)))
        result |= fringe
        visited_objects |= objects
        plys -= 1
    return result