Randomization

This page describes functions contained in the `randomization` module used to generate of randomized controls of connectomes.

`ER_model(adj, threads=8, seed=(None, None))`

Creates an Erdos Renyi digraph.

Parameters:

Name	Type	Description	Default
`adj`	`sparse matrix or 2d-array`	Adjacency matrix	required
`threads`	`int`	Number of parallel threads to be used to generate model	`8`
`seed`	`pair of ints`	Random seed to be used, if none is provided a seed is randomly selected	`(None, None)`

Returns:

Type	Description
`coo matrix`	Matrix of the generated control

Raises:

Type	Description
`AssertionError`	If adj is not square

Source code in src/connalysis/randomization/randomization.py

def ER_model(adj, threads=8, seed=(None,None)):
    """Creates an Erdos Renyi digraph.

    Parameters
    ----------
    adj : sparse matrix or 2d-array
        Adjacency matrix
    threads : int
        Number of parallel threads to be used to generate model
    seed : pair of ints
        Random seed to be used, if none is provided a seed is randomly selected

    Returns
    -------
    coo matrix
        Matrix of the generated control


    Raises
    ------
    AssertionError
        If adj is not square

    """
    assert adj.shape[0] == adj.shape[1], "The matrix is not square"
    n = adj.shape[0]
    p = adj.astype(bool).sum()/((n)*(n-1))
    if isinstance(seed, int):
      seed=(seed,seed)
    return run_ER(n=n, p=p, threads=threads, seed=seed)

`ER_shuffle(adj, neuron_properties=[], shuffle_type='sparse')`

Creates an Erdős Renyi digraph with exactly the same number of edges and weights as adj by shuffling the non-diagonla entries of adj.

Parameters:

Name	Type	Description	Default
`adj`	`sparse matrix or 2d array`	Base digraph for which the ER control is built.	required
`shuffle_type`	`string`	If `dense` if first converts adj to a dense array and then shuffle. If `sparse` it keeps sparsity during the shuffle.	`'sparse'`
`seed`	`int`	Random seed to be used, if none is provided a seed is randomly selected	required

Returns:

Type	Description
`coo matrix`	Matrix of the generated control

Raises:

Type	Description
`AssertionError`	If adj is not not sparse and `shuffle_type` is set to sparse. A sparse shuffle can only be done to a sparse matrix.

Source code in src/connalysis/randomization/randomization.py

@_seed_random_state
def ER_shuffle(adj, neuron_properties=[], shuffle_type="sparse"):
    """Creates an Erdős Renyi digraph with exactly the same number of edges and weights
    as adj by shuffling the non-diagonla entries of adj.

    Parameters
    ----------
    adj : sparse matrix or 2d array
        Base digraph for which the ER control is built.
    shuffle_type : string
        If ``dense`` if first converts adj to a dense array and then shuffle.
        If ``sparse`` it keeps sparsity during the shuffle.
    seed : int
        Random seed to be used, if none is provided a seed is randomly selected

    Returns
    -------
    coo matrix
        Matrix of the generated control

    Raises
    ------
    AssertionError
        If adj is not not sparse and ``shuffle_type`` is set to sparse.  A sparse shuffle can only be done to a sparse matrix.

    """
    N = adj.shape[0]
    if shuffle_type == "dense":
        if sp.issparse(adj):
            adj = adj.toarray()
        above_diagonal = adj[np.triu_indices(N, k=1)]
        below_diagonal = adj[np.tril_indices(N, k=-1)]
        off_diagonal = np.concatenate([above_diagonal, below_diagonal])

        # Shuffle away from the diagonal
        np.random.shuffle(off_diagonal)
        adj[np.triu_indices(N, k=1)] = off_diagonal[0:N * (N - 1) // 2]
        adj[np.tril_indices(N, k=-1)] = off_diagonal[N * (N - 1) // 2:]
        # Return matrix
        return sp.coo_matrix(adj)
    elif shuffle_type == "sparse":
        # Keep matrix sparse
        assert sp.issparse(adj), "Matrix must be sparse for sparse shuffle"
        data = adj.data
        K = adj.nnz  # number of non zero entries
        T = N * (N - 1)  # number of entries away from the diagonal

        # Select K entries in np.arange(T) representing the indices of the array of
        # pairs of tuples of entries away from the diagonal
        # [(0,1), (0,2), (0,3), ...,(0,n),
        #  (1,0) (1,2) (1,3) ... (1,n),
        # ....
        #  (n,0) (n,1) (n,2) ... (n,n-1)]
        rnd_index = np.random.choice(T, size=K, replace=False)

        # Get corresponding row and column indices
        rows = rnd_index // (N - 1)  # row indices
        alpha = rnd_index % (N - 1)  # colum indices candidates shifted by 1
        cols = alpha + 1 - (alpha < rows).astype(int)  # column indices

        # Return matrix
        return sp.coo_matrix((data, (rows, cols)), shape=(N, N))

`_seed_random_state(shuffler, seeder=np.random.seed)`

Decorate a connectivity shuffler to seed it's random-state before execution.

It is expected that the generator can be seeded calling seeder(seed).

Source code in src/connalysis/randomization/randomization.py

def _seed_random_state(shuffler, seeder=np.random.seed):
    """Decorate a connectivity shuffler to seed it's random-state before execution.

    It is expected that the generator can be seeded calling `seeder(seed)`.
    """
    def seed_and_run_method(adj, neuron_properties=[], seed=None, **kwargs):
        """Reinitialize numpy random state using the value of seed among `kwargs`.
        doing nothing if no `seed` provided --- expecting an external initialization.
        """
        if seed is None:
            LOG.warning("No seed among keyword arguments")
        else:
            seeder(seed)

        return shuffler(adj, neuron_properties, **kwargs)

    return seed_and_run_method

`add_connections(adj, nc, seed=0, sparse_mode=True, max_iter=30)`

Function add connections at random

Parameters:

Name	Type	Description	Default
`adj`	`matrix`	Adjacency matrix of a directed network	required
`nc`	`Number of connections to be added`		required
`seed`	`int`	Random seed to be used to selecte edges that will become reciprocal	`0`
`sparse_mode`		If sparse_mode is `True` the matrix is generated iteratively restricting to a sparse format. If `False` adj is converted to dense and edges are added in a single step	`True`

Returns:

Type	Description
`bool matrix`	Digraph with nc more edges than adj

Source code in src/connalysis/randomization/randomization.py

def add_connections(adj,nc, seed=0,sparse_mode=True, max_iter=30):
    """Function add connections at random

    Parameters
    ----------
    adj : matrix
        Adjacency matrix of a directed network
    nc : Number of connections to be added
    seed : int
        Random seed to be used to selecte edges that will become reciprocal
    sparse_mode: bool
        If sparse_mode is ``True`` the matrix is generated iteratively restricting to a sparse format.
        If ``False`` adj is converted to dense and edges are added in a single step

    Returns
    -------
    bool matrix
        Digraph with nc more edges than adj
    """
    adj=adj.astype(bool)
    # Add bidirectional connections
    if sparse_mode:
        # TODO: Search for more efficient way to do this
        N=adj.shape[0]; E=adj.sum(); k=0 # Number nodes, target edges and iteration counter
        while adj.sum()< E +nc: #target number of edges
            if k>max_iter:
                print("More than max_it iterations tried, increase number of iterations or try sparse_mode =False")
                break
            den=(E+nc-adj.sum())/(N*N) #density of matrix added
            generator = np.random.default_rng(seed)
            A=sp.random(*adj.shape, density=den, format='csr', dtype = 'bool', random_state = generator)
            A.setdiag(0)
            adj=adj+A; k+=1
        adj.eliminate_zeros()
    else:
        if sp.issparse(adj): adj=adj.toarray()
        ul_ind = np.where(np.eye(*adj.shape) == 0) # non-diagonal indices
        zero_ind=np.where(adj[ul_ind]==0)
        generator = np.random.default_rng(seed)
        selection=zero_ind[0][generator.choice(zero_ind[0].shape[0], replace=False, size=nc)]
        adj[(ul_ind[0][selection],ul_ind[1][selection])]=1
    return adj

`add_rc_connections(adj, n_rc, seed=0)`

Function to turn a fixed amount of unidirectional connections of adj into reciprocal connections.

Parameters:

Name	Type	Description	Default
`adj`	`sparse matrix`	Adjacency matrix of a directed network	required
`n_rc`	`Number of reciprocal connections to be added`		required
`seed`	`int`	Random seed to be used to selecte edges that will become reciprocal	`0`

Returns:

Type	Description
`matrix`	Digraph with n_rc more edges than adj, all of which form reciprocal connections

Source code in src/connalysis/randomization/randomization.py

def add_rc_connections(adj,n_rc, seed=0):
    """Function to turn a fixed amount of unidirectional connections of adj into reciprocal connections.

    Parameters
    ----------
    adj : sparse matrix
        Adjacency matrix of a directed network
    n_rc : Number of reciprocal connections to be added
    seed : int
        Random seed to be used to selecte edges that will become reciprocal

    Returns
    -------
    matrix
        Digraph with n_rc more edges than adj, all of which form reciprocal connections
    """
    # TODO: Move this function from utils and change dependencies
    from .rand_utils import add_bidirectional_connections
    # Add bidirectional connections
    generator=np.random.default_rng(seed)
    return add_bidirectional_connections(adj, n_rc, generator).astype(bool)

`add_rc_connections_skeleta(adj, factors, dimensions=None, skeleta=None, threads=8, seed=0, return_skeleta=False)`

Function to add reciprocal connections at random to adj on the skeleta of maximal simplices of adj

Parameters:

Name	Type	Description	Default
`adj`	`sparse matrix`	Adjacency matrix of a directed network	required
`factors`		Factor by which to multiply the reciprocal connections on the `k`-skeleta of adj. If factors is an int the same factor is used on all dimensions. Otherwise, factors can be a dictionary with keys dimensions and values the factor by which to multiply the number of reciprocal connections on that dimensions.	required
`dimensions`		The dimensions at which to increase the number of reciprocal connections. If `None` then all dimensions will be used	`None`
`skeleta`		Dictionary with keys f'dimension_{dim}' for dim in dimensions and values binary sparse sub-matrices of adj on which reciprocal connections will be added.	`None`
`threads`		Number of threads on which to parallelize the skeleta computation if not pre-computed	`8`
`seed`	`int`	Random seed to be used to selecte edges that will become reciprocal	`0`

Returns:

Type	Description
`(csc_matrix, dict)`	Digraph with add reciprocal connections If return_skeleta=True it also returns the skeleta of maximal simplices of adj in the dimensions selected

Source code in src/connalysis/randomization/randomization.py

def add_rc_connections_skeleta(adj,factors,dimensions=None, skeleta=None, threads=8, seed=0, return_skeleta=False):
    """Function to add reciprocal connections at random to adj on the skeleta of maximal simplices of adj

    Parameters
    ----------
    adj : sparse matrix
        Adjacency matrix of a directed network
    factors: int or dict
        Factor by which to multiply the reciprocal connections on the ``k``-skeleta of adj.  If factors is an int
        the same factor is used on all dimensions.  Otherwise, factors can be a dictionary with keys dimensions
        and values the factor by which to multiply the number of reciprocal connections on that dimensions.
    dimensions: array
        The dimensions at which to increase the number of reciprocal connections.  If ``None`` then all dimensions
        will be used
    skeleta: dict
        Dictionary with keys f'dimension_{dim}' for dim in dimensions and values binary sparse sub-matrices of adj
        on which reciprocal connections will be added.
    threads: int
        Number of threads on which to parallelize the skeleta computation if not pre-computed
    seed : int
        Random seed to be used to selecte edges that will become reciprocal

    Returns
    -------
    csc_matrix, dict
        Digraph with add reciprocal connections
        If return_skeleta=True it also returns the skeleta of maximal simplices of adj in the dimensions selected

    """
    adj=adj.tocsr()
    from connalysis.network.topology import rc_submatrix
    from .rand_utils import add_bidirectional_connections
    # Compute skeleton graphs if not precomputed
    if skeleta is None:
        from connalysis.network.topology import get_k_skeleta_graph
        max_simplices=True # Add option for all simplices?
        skeleta=get_k_skeleta_graph(adj, max_simplices=max_simplices, dimensions=dimensions, threads=threads)
    # Restrict to dimensions that contain simplices
    if dimensions is None:
        dimensions = np.array([int(key[10:]) for key in skeleta.keys()])
    else:
        dimensions = np.intersect1d(np.array([int(key[10:]) for key in skeleta.keys()]), dimensions)
    # Generate mapping between factors and dimensions or check the one provided
    if isinstance(factors,int):
        factors={dim:factors for dim in dimensions}
    else:
        assert isinstance(factors, dict), "factors must be int or dictionary"
        assert np.isin(dimensions, np.array(list(factors.keys()))).all(), "all dimensions must be a key in factors"

    # Add bidirectional connections
    generator=np.random.default_rng(seed)
    rc_add={dim:((factors[dim]-1)*(rc_submatrix(skeleta[f'dimension_{dim}']).sum()))//2 for dim in dimensions}
    print("Number of reciprocal connections added per dimension"); print(rc_add) # Remove or add verbose option
    M=adj.copy()
    for dim in dimensions:
        M+=add_bidirectional_connections(skeleta[f'dimension_{dim}'], rc_add[dim], generator).astype(bool)
    if return_skeleta:
        return M, skeleta
    else:
        return M

`adjusted_ER(adj, seed=None)`

Function to generate an Erdos Renyi model with adjusted bidirectional connections.

Parameters:

Name	Type	Description	Default
`adj`	`csc_matrix`	Adjacency matrix of a directed network.	required
`seed`	`int`	Random seed to be used	`None`

Returns:

Type	Description
`csc_matrix`	Erdos Renyi shuffled control with additional reciprocal connections added at random to match the number of reciprocal connections of the original matrix.

See Also

underlying_model : Function which returns a digraph with the same underlying undirected graph and same number of reciprocal connections

bishuffled_model : Function which returns a digraph with shuffled reciprocal connections

Source code in src/connalysis/randomization/randomization.py

def adjusted_ER(adj, seed=None):
    """Function to generate an Erdos  Renyi model with adjusted bidirectional connections.

    Parameters
    ----------
    adj :  csc_matrix
        Adjacency matrix of a directed network.
    seed : int
        Random seed to be used

    Returns
    -------
    csc_matrix
        Erdos Renyi shuffled control with additional reciprocal connections added at random
        to match the number of reciprocal connections of the original matrix.

    See Also
    --------
    [underlying_model](randomization.md#src.connalysis.randomization.randomization.underlying_model) :
    Function which returns a digraph with the same  underlying undirected graph
    and same number of reciprocal connections

    [bishuffled_model](randomization.md#src.connalysis.randomization.randomization.bishuffled_model) :
    Function which returns a digraph with shuffled reciprocal connections
    """
    from connalysis.network.topology import rc_submatrix
    from .rand_utils import adjust_bidirectional_connections
    generator = np.random.default_rng(seed)
    ER_matrix = ER_shuffle(adj, seed=seed).tocsc()
    bedges_to_add = int(rc_submatrix(adj).count_nonzero() -rc_submatrix(ER_matrix).count_nonzero())//2
    if bedges_to_add >= 0:
        return adjust_bidirectional_connections(ER_matrix, bedges_to_add, generator)
    else:
        LOG.info("Erdos-Renyi control has more reciprocal connections than original, so they are not adjusted.")
        return ER_matrix

`bishuffled_model(adj, seed=None)`

Function to generate a digraph with shuffled reciprocal connections

Parameters:

Name	Type	Description	Default
`adj`	`csc_matrix`	Adjacency matrix of a directed network.	required
`seed`	`int`	Random seed to be used	`None`

Returns:

Type	Description
`csc_matrix`	Digraph with shuffled reciprocal connections

See Also

adjusted_ER : Function to generate an Erdos Renyi model with adjusted bidirectional connections

underlying_model : Function which returns a digraph with the same underlying undirected graph and same number of reciprocal connections

Source code in src/connalysis/randomization/randomization.py

def bishuffled_model(adj, seed = None):
    """Function to generate a digraph with shuffled reciprocal connections

    Parameters
    ----------
    adj : csc_matrix
        Adjacency matrix of a directed network.
    seed : int
        Random seed to be used

    Returns
    -------
    csc_matrix
        Digraph with shuffled reciprocal connections

    See Also
    --------
    [adjusted_ER](randomization.md#src.connalysis.randomization.randomization.adjusted_ER) :
    Function to generate an Erdos  Renyi model with adjusted bidirectional connections

    [underlying_model](randomization.md#src.connalysis.randomization.randomization.underlying_model) :
    Function which returns a digraph with the same  underlying undirected graph
    and same number of reciprocal connections
    """
    from connalysis.network.topology import rc_submatrix
    from .rand_utils import  add_bidirectional_connections, half_matrix
    generator = np.random.default_rng(seed)
    ut_bedges = sp.triu(rc_submatrix(adj))
    target_bedges = ut_bedges.count_nonzero()
    bedges1, bedges2 = half_matrix(ut_bedges, generator)
    return add_bidirectional_connections(adj - bedges1 - bedges2.T, target_bedges, generator)

`configuration_model(M, seed=None)`

Function to generate the configuration control model, obtained by shuffling the row and column of coo format independently, to create new coo matrix, then removing any multiple edges and loops.

Parameters:

Name	Type	Description	Default
`adj`	`coo - matrix`	Adjacency matrix of a directed network.	required
`seed`	`int`	Random seed to be used	`None`

Returns:

Type	Description
`csr matrix`	Configuration model control of adj

See Also

run_SBM : Function which runs the stochastic block model

run_DD2 : Function which runs the 2nd distance dependent model

Source code in src/connalysis/randomization/randomization.py

def configuration_model(M, seed = None):
    """Function to generate the configuration control model, obtained by
    shuffling the row and column of coo format independently, to create
    new coo matrix, then removing any multiple edges and loops.

    Parameters
    ----------
    adj : coo-matrix
        Adjacency matrix of a directed network.
    seed : int
        Random seed to be used

    Returns
    -------
    csr matrix
        Configuration model control of adj

    See Also
    --------
    [run_SBM](randomization.md#src.connalysis.randomization.randomization.run_SBM) :
    Function which runs the stochastic block model

    [run_DD2](randomization.md#src.connalysis.randomization.randomization.run_DD2) :
    Function which runs the 2nd distance dependent model
    """
    adj=M.copy().tocoo()
    generator = np.random.default_rng(seed)
    R = adj.row
    C = adj.col
    generator.shuffle(R)
    generator.shuffle(C)
    CM_matrix = sp.coo_matrix(([1]*len(R),(R,C)),shape=adj.shape).tocsr()
    CM_matrix.setdiag(0)
    CM_matrix.eliminate_zeros()
    return CM_matrix

`random_geometric_model(pts, pts_x=None, n_neighbors=None, dist_neighbors=None, n_pick=None, p_pick=None, scale_axes=None, directionality_fac=0.0, directionality_axis=None, distance_func=None, custom_w_out=None, custom_w_in=None, no_diag=True, mirror=False)`

Generates a directed random geometric graph with optional modifications.

Nodes are given by embedded points in R^m with positions given by pts. Directed edges are created probabilistically through two successive filtering stages:

Candidate selection: For each node, select candidate neighbors either by:

a. Choosing the n_neighbors nearest neighbors in R^m, or b. Choosing all nodes within distance dist_neighbors.

Sub-selection: From the candidates, draw the final neighbors either by:

a. Sampling n_pick nodes uniformly at random, or b. Retaining each candidate independently with probability p_pick.

The sub-selection step is optional, and can moreover be biased to give asymmetric connectivity via directionality_fac and directionality_axis, or via a custom distance_func.

Parameters:

Name	Type	Description	Default
`pts`	`array`	Shape: n x m, where n is the number of nodes and m the number of dimensions. The locations of the vertices that the random geometric graph is to be constructed on.	required
`pts_x`	`array`	Optional input for a second set of vertex locations. If provided, edges from `pts` to `pts_x` will be placed according to the random geometric rules. That is, a random geometric graph on the concatenation of `pts` and `pts_x` is constructed, but only the entries in the top right part of the adjacency matrix are kept and the rest set to 0.	`None`
`n_neighbors`	`int`	One way of specifying the potential set of partners for each node. If specified, the `n_neighbors` nearest neighbors of a node are potential partners for outgoing connections to be placed.	`None`
`dist_neighbors`	`float`	The other way to specify the potential set of partners. If specified, all vertices within that distance of a node are potential partners for outgoing connections to be placed. Either `n_neighbors` or `dist_neighbors` must be used. If both or neither are used, an Exception is raised.	`None`
`n_pick`	`int`	One way of specifying the actual number of vertices that are connected from each node. If specified, `n_pick` vertices from the candidates (see above) are randomly picked and outgoing connections placed to all of them. If the number of candidates for a vertex is smaller than `n_pick`, then simply all candidates will be picked and no warning or exception raised.	`None`
`p_pick`	`float`	The other way to specify the actual number of vertices that are connected from each node. If specified, each candidate (see above) will be picked with probability `p_pick`; if picked a connection will be placed between them. Either `n_pick` or `p_pick` must be used. If both or neither are used, and Exception is raised.	`None`
`scale_axes`	`array`	Shape: (m, ), where m is the number of dimensions. One way of biasing which pairs are connected. If used, distances along each dimension are scaled by the corresponding factor in `scale_axes` before candidates for connection are picked, i.e., before `n_neighbors` or `dist_neighbors` is evaluated.	`None`
`directionality_axis`	`array`	Shape: (m, ), where m is the number of dimensions. Another way of biasing which pairs are connected. This introduces a directionality bias, i.e., connections are more likely if their direction aligns with a specified vector and less likely if their direction is in the opposite direction of the vector. This is calculated as the dot product of the direction vector of the potential connection with `directionality_axis`. This is evaluated after candidates have been picked and only affects the selection of actual connections from the candidates (see `n_neighbors` / `dist_neighbors` and `n_pick` / `p_pick`) above.	`None`
`directionality_fac`	`float`	Must be between -1 and 1. This specifies how much the dot product calculated using `directionality_axis` (see above) is weighed to calculate a bias. Formally: w = (delta o directionality_axis) * directionality_fac + 1, where o denotes the vector dot product, delta is the vector from the source to the target vertex of a potential connection. Note that these are relative weights that are scaled to fulfill the constraints on the number of connections to pick given by `n_pick` or `p_pick`. Note that this does NOT affect the selection of candidates, parameterized by `n_neighbors` or `dist_neighbors`, but only the selection from the candidates. Directionality bias cannot be combined with distance bias (see below).	`0.0`
`distance_func`	`function`	A function that is to be evaluated on pairwise distances of candidate pairs. Another way of biasing which pairs are connected. The function takes a distance as input and returns a relative weight. Note that these are relative weights that are scaled to fulfill the constraints on the number of connections to pick given by `n_pick` or `p_pick`. Note that this does NOT affect the selection of candidates, parameterized by `n_neighbors` or `dist_neighbors`, but only the selection from the candidates. Cannot be combined with a directionality bias (see above).	`None`
`custom_w_out`	`array`	Shape: (n, ). For details, see below.	`None`
`custom_w_in`	`array`	Shape: (n, ). custom_w_out and custom_w_in, if used, provide per-node biases for the selection of connection from candidates. Simply, for all potential outgoing connections from vertex i, entry custom_w_out[i] is used as a relative weight that is multiplied with any other potential weight. Similarly for custom_w_in[i] for potential incoming connections. Note that this does NOT affect the selection of candidates, parameterized by `n_neighbors` or `dist_neighbors`, but only the selection from the candidates.	`None`
`no_diag`	`bool`	If set to False, connections from a vertex to itself are allowed to be placed.	`True`
`symmetrize`	`bool`	If set to True, the output matrix is made symmetrical with the following strategy: If a connection from vertex i to j exists, a connection is also placed from j to i if it does not already exist. This is ignored without warning if `pts_x` is specified.	required

Returns:

Type	Description
`csc_matrix`	The adjacency matrix of an instance of a random geometric graph model.

Raises:

Type	Description
`ValueError`	If both n_neighbors and dist_neighbors is used
`ValueError`	If neither n_neighbors nor dist_neighbors is used
`ValueError`	If both n_pick and p_pick are used
`ValueError`	If neither n_pick not p_pick are used
`ValueError`	If p_pick is not between [0, 1]
`ValueError`	If directionality_fac is not between [-1, 1]
`ValueError`	If both directionality_axis and distance_func are used.

Source code in src/connalysis/randomization/randomization.py

def random_geometric_model(pts, pts_x=None, n_neighbors=None, dist_neighbors=None, n_pick=None,
                            p_pick=None, scale_axes=None,
                            directionality_fac=0.0, directionality_axis=None,
                            distance_func=None,
                            custom_w_out=None, custom_w_in=None,
                            no_diag=True, mirror=False):
    """
    Generates a directed random geometric graph with optional modifications.

    Nodes are given by embedded points in R^m with positions given by `pts`. Directed edges are
    created probabilistically through two successive filtering stages:

    1. **Candidate selection**: For each node, select candidate neighbors either by:

       a. Choosing the `n_neighbors` nearest neighbors in R^m, or
       b. Choosing all nodes within distance `dist_neighbors`.

    2. **Sub-selection**: From the candidates, draw the final neighbors either by:

       a. Sampling `n_pick` nodes uniformly at random, or
       b. Retaining each candidate independently with probability `p_pick`.

    The sub-selection step is optional, and can moreover be biased to give
    asymmetric connectivity via `directionality_fac` and `directionality_axis`,
    or via a custom `distance_func`.

    Parameters
    ----------
    pts : numpy.array
        Shape: n x m, where n is the number of nodes and m the number of dimensions. The locations of the
        vertices that the random geometric graph is to be constructed on.
    pts_x : numpy.array
        Optional input for a second set of vertex locations. If provided, edges from `pts` to `pts_x` will
        be placed according to the random geometric rules. That is, a random geometric graph on the 
        concatenation of `pts` and `pts_x` is constructed, but only the entries in the top right part of the
        adjacency matrix are kept and the rest set to 0.
    n_neighbors : int
        One way of specifying the potential set of partners for each node. If specified, the `n_neighbors` 
        nearest neighbors of a node are potential partners for outgoing connections to be placed.
    dist_neighbors : float
        The other way to specify the potential set of partners. If specified, all vertices within that distance
        of a node are potential partners for outgoing connections to be placed. 
        Either `n_neighbors` or `dist_neighbors` must be used. If both or neither are used, an Exception is raised.
    n_pick : int
        One way of specifying the actual number of vertices that are connected from each node. If specified, 
        `n_pick` vertices from the candidates (see above) are randomly picked and outgoing connections placed
        to all of them. If the number of candidates for a vertex is smaller than `n_pick`, then simply all 
        candidates will be picked and no warning or exception raised.
    p_pick : float
        The other way to specify the actual number of vertices that are connected from each node. If specified,
        each candidate (see above) will be picked with probability `p_pick`; if picked a connection will be
        placed between them.
        Either `n_pick` or `p_pick` must be used. If both or neither are used, and Exception is raised.
    scale_axes : numpy.array
        Shape: (m, ), where m is the number of dimensions. One way of biasing which pairs are connected. 
        If used, distances along each dimension are scaled by the corresponding factor in `scale_axes` before
        candidates for connection are picked, i.e., before `n_neighbors` or `dist_neighbors` is evaluated.
    directionality_axis : numpy.array
        Shape: (m, ), where m is the number of dimensions. Another way of biasing which pairs are connected.
        This introduces a directionality bias, i.e., connections are more likely if their direction aligns with
        a specified vector and less likely if their direction is in the opposite direction of the vector. This
        is calculated as the dot product of the direction vector of the potential connection with 
        `directionality_axis`. 
        This is evaluated _after_ candidates have been picked and only affects the selection of actual connections
        from the candidates (see `n_neighbors` / `dist_neighbors` and `n_pick` / `p_pick`) above.
    directionality_fac : float
        Must be between -1 and 1. This specifies how much the dot product calculated using `directionality_axis`
        (see above) is weighed to calculate a bias. Formally:
        w = (delta o directionality_axis) * directionality_fac + 1,
        where o denotes the vector dot product, delta is the vector from the source to the target vertex of a
        potential connection. 
        Note that these are relative weights that are scaled to fulfill the constraints
        on the number of connections to pick given by `n_pick` or `p_pick`.
        Note that this does NOT affect the selection of candidates, parameterized by `n_neighbors` or 
        `dist_neighbors`, but only the selection from the candidates.
        Directionality bias cannot be combined with distance bias (see below).
    distance_func : function
        A function that is to be evaluated on pairwise distances of candidate pairs. Another way of biasing 
        which pairs are connected. The function takes a distance as input and returns a relative weight. 
        Note that these are relative weights that are scaled to fulfill the constraints on the number of
        connections to pick given by `n_pick` or `p_pick`. 
        Note that this does NOT affect the selection of candidates, parameterized by `n_neighbors` or 
        `dist_neighbors`, but only the selection from the candidates.
        Cannot be combined with a directionality bias (see above).
    custom_w_out : numpy.array
        Shape: (n, ). For details, see below.
    custom_w_in : numpy.array
        Shape: (n, ). custom_w_out and custom_w_in, if used, provide per-node biases for the selection of
        connection from candidates. Simply, for all potential outgoing connections from vertex i, entry
        custom_w_out[i] is used as a relative weight that is multiplied with any other potential weight.
        Similarly for custom_w_in[i] for potential incoming connections.
        Note that this does NOT affect the selection of candidates, parameterized by `n_neighbors` or 
        `dist_neighbors`, but only the selection from the candidates.
    no_diag : bool
        If set to False, connections from a vertex to itself are allowed to be placed.
    symmetrize : bool
        If set to True, the output matrix is made symmetrical with the following strategy: If a connection
        from vertex i to j exists, a connection is also placed from j to i if it does not already exist.
        This is ignored without warning if `pts_x` is specified.

    Returns
    ----------
    sparse.csc_matrix
        The adjacency matrix of an instance of a random geometric graph model.

    Raises
    ----------
    ValueError
        If both n_neighbors and dist_neighbors is used
    ValueError
        If neither n_neighbors nor dist_neighbors is used
    ValueError
        If both n_pick and p_pick are used
    ValueError
        If neither n_pick not p_pick are used
    ValueError
        If p_pick is not between [0, 1]
    ValueError
        If directionality_fac is not between [-1, 1]
    ValueError
        If both directionality_axis and distance_func are used.
    """
    # Check inputs
    if (n_neighbors is not None) and (dist_neighbors is not None):
        raise ValueError("Cannot specify both n_neighbors and dist_neighbors!")
    if (n_neighbors is None) and (dist_neighbors is None):
        raise ValueError("Must specify either n_neighbors or dist_neighbors!")
    if (n_pick is not None) and (p_pick is not None):
        raise ValueError("Cannot specify both n_pick and p_pick!")
    if (n_pick is None) and (p_pick is None):
        raise ValueError("Must specify either n_pick or p_pick!")
    if p_pick is not None:
        if (p_pick < 0) or (p_pick > 1.0):
            raise ValueError(f"p_pick must be between 0 and 1! Found: {p_pick}")
    if (directionality_fac < -1) or (directionality_fac > 1):
        raise ValueError("directionality_fac must be between -1 and 1!")
    if (directionality_axis is not None) and (distance_func is not None):
        raise ValueError("Cannot specify both directionality bias and custom distance function!")

    # Evaluation of optional inputs
    mirror = bool(mirror)
    if pts_x is None:
        pts_x = pts
    else:
        mirror = False
    if scale_axes is not None:
        pts = pts / np.array(scale_axes).reshape((1, -1))
        pts_x = pts_x / np.array(scale_axes).reshape((1, -1))
    shape = (len(pts_x), len(pts))

    # Generate candidates for connections based on number or max. distance
    kd = KDTree(pts)
    if n_neighbors is not None: # Case 1: number of neighbor candidates given
        if no_diag:
            _, idx = kd.query(pts_x, np.arange(2, 2 + n_neighbors))  # len(idx_x) x n_neighbors
        else:
            _, idx = kd.query(pts_x, np.arange(1, 1 + n_neighbors))  # len(idx_x) x n_neighbors
    elif dist_neighbors is not None: # Case 2: max. distance for neighbor candidates given
        idx = list(kd.query_ball_point(pts_x, dist_neighbors))
        if no_diag:
            idx = [np.setdiff1d(_idx, _i) for _i, _idx in enumerate(idx)]  # This is slow. Filter after the fact?

    # Pick from candidates. Step 1: Calculate bias weights (if any)
    # Step 1a: Weights based on distance or direction.
    w = direction_or_distance_dependent_w(pts_x, pts, idx, distance_func=distance_func,
                                           directionality_axis=directionality_axis,
                                           directionality_fac=directionality_fac)
    # Step 1b: Weights based on per-node biases (in- or out-degree)
    if custom_w_in is not None and custom_w_out is not None:
        cust_w = evaluate_per_node_weights(custom_w_out, custom_w_in, idx)
        cust_w = cust_w / cust_w.mean()
        w = w * cust_w

    # Step 2: Actually pick connections based on constraints (n_pick, p_pick) and weights.
    if p_pick is not None:
        wmean = w.mean()
        picker_func = lambda _x: _x.loc[np.random.rand(len(_x)) < (p_pick * _x / wmean)]
    elif n_pick is not None:
        picker_func = lambda _x: _x.iloc[np.random.choice(len(_x), np.minimum(n_pick, len(_x)),
                                                             replace=False,
                                                             p=_x / _x.sum())]
    w = w.groupby("neuron").apply(picker_func)

    # Assemble output into sparse matrix.
    if len(w) > 0: # If any connection exists.
        w = w.droplevel(0)
        return connection_df_to_csc_matrix(w, mirror=mirror, shape=shape)
    # Fallback: Return empty matrix.
    return sp.csc_matrix((len(pts), len(pts)), dtype=bool)

`run_DD2(n, a, b, xyz, threads=8, seed=(None, None))`

Creates a random digraph using the 2nd-order probability model.

Parameters:

Name	Type	Description	Default
`n`	`int`	Number of vertices	required
`a`	`float`	Coefficient of probability function	required
`b`	`float`	Absolute value of power of exponent in probability function	required
`xyz`	`(n,3)-numpy array of floats`	Co-ordinates of vertices in \(\mathbb{R}^3\)	required
`threads`	`int`	Number of parallel threads to be used	`8`
`seed`	`pair of ints`	Random seed to be used, if none is provided a seed is randomly selected	`(None, None)`

Returns:

Type	Description
`coo matrix`	Matrix of the generated control

See Also

conn_prob_2nd_order_model : The modelling function from which the parameters a and bcan be obtained.

Source code in src/connalysis/randomization/randomization.py

def run_DD2(n,a,b,xyz,threads=8, seed=(None,None)):
    r"""Creates a random digraph using the 2nd-order probability model.

    Parameters
    ----------
    n : int
        Number of vertices
    a : float
        Coefficient of probability function
    b : float
        Absolute value of power of exponent in probability function
    xyz : (n,3)-numpy array of floats
        Co-ordinates of vertices in $\mathbb{R}^3$
    threads : int
        Number of parallel threads to be used
    seed : pair of ints
        Random seed to be used, if none is provided a seed is randomly selected

    Returns
    -------
    coo matrix
        Matrix of the generated control

    See Also
    --------
    [conn_prob_2nd_order_model](modelling.md#src.connalysis.modelling.modelling.conn_prob_2nd_order_model) :
    The modelling function from which the parameters ``a`` and ``b``can be obtained.


    """
    if seed[0]==None or seed[1]==None:
        adj = gm.DD2(n,a,b,xyz,threads)
    else:
        adj = gm.DD2(n,a,b,xyz,threads,seed[0],seed[1])
    return _dict_to_coo(adj,n)

`run_DD2_block(n, probs, blocks, xyz, threads, seed=(None, None))`

Creates a random digraph using a combination of the stochastic block model and the 2nd order distance dependent model. Such that the probability of an edge is given by the distance dependent equation, but the parameters of that equation vary depending on the block of the source of the edge and block of the target.

Parameters:

Name	Type	Description	Default
`n`	`int`	Number of vertices	required
`probs`	`numpy array of floats`	shape=(m,m,2) where m is the number of blocks. For source vertex i and target vertex j probs[i][j][0] is the coefficient of the distance dependent equation (value a) and probs[i][j][0] is the absolute value of power of exponent in the distance dependent equation (value b)	required
`blocks`	`numpy array of ints`	shape=(n,). The i'th entry is which block vertex i belongs to.	required
`xyz`	`(n,3)-numpy array of floats`	Co-ordinates of vertices in \(\mathbb{R}^3\)	required
`threads`	`int`	Number of parallel threads to be used	required
`seed`	`pair of ints`	Random seed to be used, if none is provided a seed is randomly selected	`(None, None)`

Returns:

Type	Description
`coo matrix`	Matrix of the generated control

Raises:

Type	Description
`TypeError`	If blocks contains non-integers

See Also

run_DD2 : Function which runs the 2nd distance dependent model

run_SBM : Function which runs the stochastic block model

run_DD2_block_pre : Similar function that only accounts for the block of the source vertex

Source code in src/connalysis/randomization/randomization.py

def run_DD2_block(n, probs, blocks, xyz, threads, seed=(None,None)):
    r"""Creates a random digraph using a combination of the stochastic block model
       and the 2nd order distance dependent model. Such that the probability of an edge
       is given by the distance dependent equation, but the parameters of that equation
       vary depending on the block of the source of the edge and block of the target.

    Parameters
    ----------
    n : int
        Number of vertices
    probs : numpy array of floats
        shape=(m,m,2) where m is the number of blocks. For source vertex i and target vertex j
        probs[i][j][0] is the coefficient of the distance dependent equation (value a) and
        probs[i][j][0] is the absolute value of power of exponent in the distance dependent equation (value b)
    blocks : numpy array of ints
        shape=(n,). The i'th entry is which block vertex i belongs to.
    xyz : (n,3)-numpy array of floats
        Co-ordinates of vertices in $\mathbb{R}^3$
    threads : int
        Number of parallel threads to be used
    seed : pair of ints
        Random seed to be used, if none is provided a seed is randomly selected

    Returns
    -------
    coo matrix
        Matrix of the generated control


    Raises
    ------
    TypeError
        If blocks contains non-integers

    See Also
    --------
    [run_DD2](randomization.md#src.connalysis.randomization.randomization.run_DD2) :
    Function which runs the 2nd distance dependent model

    [run_SBM](randomization.md#src.connalysis.randomization.randomization.run_SBM) :
    Function which runs the stochastic block model

    [run_DD2_block_pre](randomization.md#src.connalysis.randomization.randomization.run_DD2_block_pre) :
    Similar function that only accounts for the block of the source vertex

    """
    if seed[0]==None or seed[1]==None:
        adj = gm.DD2_block(n, probs, blocks, xyz, threads)
    else:
        adj = gm.DD2_block(n, probs, blocks, xyz, threads, seed[0], seed[1])
    return _dict_to_coo(adj,n)

`run_DD2_block_pre(n, probs, blocks, xyz, threads=8, seed=(None, None))`

Creates a random digraph using a combination of the stochastic block model and the 2nd order distance dependent model. Such that the probability of an edge is given by the distance dependent equation, but the parameters of that equation vary depending on the block of the source of the edge.

Parameters:

Name	Type	Description	Default
`n`	`int`	Number of vertices	required
`probs`	`numpy array of floats`	shape=(m,2) where m is the number of blocks. probs[i][0] is the coefficient of the distance dependent equation (value a) for source vertex i and probs[i][0] is the absolute value of power of exponent in the distance dependent equation (value b)	required
`blocks`	`numpy array of ints`	shape=(n,). The i'th entry is which block vertex i belongs to.	required
`xyz`	`(n,3)-numpy array of floats`	Co-ordinates of vertices in \(\mathbb{R}^3\)	required
`threads`	`int`	Number of parallel threads to be used	`8`
`seed`	`pair of ints`	Random seed to be used, if none is provided a seed is randomly selected	`(None, None)`

Returns:

Type	Description
`coo matrix`	Matrix of the generated control

Raises:

Type	Description
`TypeError`	If blocks contains non-integers

See Also

run_SBM: Function which runs the stochastic block model

run_DD2 : Function which runs the 2nd distance dependent model

run_DD2_block : Similar function that also accounts for the block of the target vertex

Source code in src/connalysis/randomization/randomization.py

def run_DD2_block_pre(n, probs, blocks, xyz, threads=8, seed=(None,None)):
    r"""Creates a random digraph using a combination of the stochastic block model
       and the 2nd order distance dependent model. Such that the probability of an edge
       is given by the distance dependent equation, but the parameters of that equation
       vary depending on the block of the source of the edge.

    Parameters
    ----------
    n : int
        Number of vertices
    probs : numpy array of floats
        shape=(m,2) where m is the number of blocks.
        probs[i][0] is the coefficient of the distance dependent equation (value a) for source vertex i and
        probs[i][0] is the absolute value of power of exponent in the distance dependent equation (value b)
    blocks : numpy array of ints
        shape=(n,). The i'th entry is which block vertex i belongs to.
    xyz : (n,3)-numpy array of floats
        Co-ordinates of vertices in $\mathbb{R}^3$
    threads : int
        Number of parallel threads to be used
    seed : pair of ints
        Random seed to be used, if none is provided a seed is randomly selected

    Returns
    -------
    coo matrix
        Matrix of the generated control


    Raises
    ------
    TypeError
        If blocks contains non-integers

    See Also
    --------
    [run_SBM](randomization.md#src.connalysis.randomization.randomization.run_SBM):
    Function which runs the stochastic block model

    [run_DD2](randomization.md#src.connalysis.randomization.randomization.run_DD2) :
    Function which runs the 2nd distance dependent model

    [run_DD2_block](randomization.md#src.connalysis.randomization.randomization.run_DD2_block) :
    Similar function that also accounts for the block of the target vertex

    """

    if seed[0]==None or seed[1]==None:
        adj = gm.DD2_block_pre(n, probs, blocks, xyz, threads)
    else:
        adj = gm.DD2_block_pre(n, probs, blocks, xyz, threads, seed[0], seed[1])
    return _dict_to_coo(adj,n)

`run_DD2_model(adj, node_properties, model_params_dd2=None, coord_names=['x', 'y', 'z'], threads=8, return_params=False, **config_dict)`

Wrapper for fitting a model and generating a random control graph based on 2nd order distance dependence model.

Parameters:

Name	Type	Description	Default
`adj`	`sparse matrix or 2d-array`	Adjacency matrix. If model_params_dd2 have already been computed, one can pass an empty matrix of the right size.	required
`node_properties`	`DataFrame`	DataFrame with information on the vertices of adj. It must have columns corresponding to the names of the coord_names to be used for distance computation (Default: ['x', 'y', 'z']).	required
`model_params_dd2`	`DataFrame`	Optional input of pre-computed model parameters as data frame with rows corresponding to seeds of model estimation (single row if subsampling is not used) and columns 'exp_model_scale' and 'exp_model_exponent' for the model parameters. See modelling.conn_prob_2nd_order_model for details.	`None`
`coord_names`	`list`	Names of the coordinates (corresponding to columns in neuron properties table) based on which to compute Euclidean distance. Default: ['x', 'y', 'z']	`['x', 'y', 'z']`
`threads`	`int`	Number of parallel threads to be used.	`8`
`return_params`	`bool`	If True, returns model_params_dd2 in addition to the generated control.	`False`
`config_dict`	`dict`	Dictionary with 2nd order model building settings. See modelling.conn_prob_2nd_order_model for details.	`{}`

Returns:

Type	Description
`coo_matrix`	Matrix of the generated control
`model_params_dd2`	pandas.DataFrame with model parameters (optional; if return_params is True)

See Also

conn_prob_2nd_order_model : The modelling function from which model_params_dd2 can be obtained.

Source code in src/connalysis/randomization/randomization.py

def run_DD2_model(adj, node_properties,
                  model_params_dd2=None, #an analysis that could be loaded from the pipeline
                  coord_names= ['x', 'y', 'z'],
                  threads=8, return_params=False, **config_dict):
    """Wrapper for fitting a model and generating a random control graph based on 2nd order distance dependence model.

    Parameters
    ----------
    adj : sparse matrix or 2d-array
        Adjacency matrix.
        If model_params_dd2 have already been computed, one can pass an empty matrix of the right size.
    node_properties : pandas.DataFrame
        DataFrame with information on the vertices of adj.
        It must have columns corresponding to the names of the coord_names to be used for distance computation (Default: ['x', 'y', 'z']).
    model_params_dd2 : pandas.DataFrame
        Optional input of pre-computed model parameters as data frame with rows corresponding to seeds of model estimation
        (single row if subsampling is not used) and columns 'exp_model_scale' and 'exp_model_exponent' for the model parameters.
        See modelling.conn_prob_2nd_order_model for details.
    coord_names : list
        Names of the coordinates (corresponding to columns in neuron properties table) based on which to compute Euclidean distance.
        Default: ['x', 'y', 'z']
    threads : int
        Number of parallel threads to be used.
    return_params : bool
        If True, returns model_params_dd2 in addition to the generated control.
    config_dict : dict
        Dictionary with 2nd order model building settings.
        See modelling.conn_prob_2nd_order_model for details.

    Returns
    -------
    coo_matrix
        Matrix of the generated control
    model_params_dd2
        pandas.DataFrame with model parameters (optional; if return_params is True)

    See Also
    --------
    [conn_prob_2nd_order_model](modelling.md#src.connalysis.modelling.modelling.conn_prob_2nd_order_model) :
    The modelling function from which model_params_dd2 can be obtained.

    """

    if model_params_dd2 is None:
        from .import modelling
        #TODO:  What to do if coord_names are also given in configdict and do not match coord_names?
        config_dict["coord_names"]=coord_names
        model_params_dd2 = modelling.conn_prob_2nd_order_model(adj, node_properties,**config_dict)
        LOG.warning("Fit parameters are used directly but should be checked by hand if the proper fit is obtained!")

    LOG.info("Run DD2 model with parameters: \n%s", model_params_dd2)

    n = adj.shape[0]
    a = model_params_dd2.mean(axis=0)['exp_model_scale']
    b = model_params_dd2.mean(axis=0)['exp_model_exponent']
    xyz = node_properties.loc[:,coord_names].to_numpy() #Make and assert that checks these columns exist!
    if len(coord_names)<3: #Extend by zeros if lower dimensional data was used to compute distance
        xyz=np.hstack([xyz,np.zeros((xyz.shape[0],3-xyz.shape[1]))])
    C=gm.DD2(n,a,b,xyz,threads)
    i=C['row']
    j=C['col']
    data=np.ones(len(i))
    if return_params==True:
        return sp.coo_matrix((data, (i, j)), [n,n]), model_params_dd2
    else:
        return sp.coo_matrix((data, (i, j)), [n,n])

`run_DD3(n, a1, b1, a2, b2, xyz, depths, threads=8, seed=(None, None))`

Creates a random digraph using the 2nd-order probability model.

Parameters:

Name	Type	Description	Default
`n`	`int`	Number of vertices	required
`a1`	`float`	Coefficient of probability function for negative depth	required
`b1`	`float`	Absolute value of power of exponent in probability function for negative depth	required
`a2`	`float`	Coefficient of probability function for positive depth	required
`b2`	`float`	Absolute value of power of exponent in probability function for positive depth	required
`xyz`	`(n,3)-numpy array of floats`	Co-ordinates of vertices in \(\mathbb{R}^3\)	required
`threads`	`int`	Number of parallel threads to be used	`8`
`seed`	`pair of ints`	Random seed to be used, if none is provided a seed is randomly selected	`(None, None)`

Returns:

Type	Description
`coo matrix`	Matrix of the generated control

See Also

conn_prob_3rd_order_model : The modelling function from which the parameters a1/a2 and b1/b2can be obtained.

Source code in src/connalysis/randomization/randomization.py

def run_DD3(n,a1,b1,a2,b2,xyz,depths,threads=8, seed=(None,None)):
    r"""Creates a random digraph using the 2nd-order probability model.

    Parameters
    ----------
    n : int
        Number of vertices
    a1 : float
        Coefficient of probability function for negative depth
    b1 : float
        Absolute value of power of exponent in probability function for negative depth
    a2 : float
        Coefficient of probability function for positive depth
    b2 : float
        Absolute value of power of exponent in probability function for positive depth
    xyz : (n,3)-numpy array of floats
        Co-ordinates of vertices in $\mathbb{R}^3$
    threads : int
        Number of parallel threads to be used
    seed : pair of ints
        Random seed to be used, if none is provided a seed is randomly selected

    Returns
    -------
    coo matrix
        Matrix of the generated control


    See Also
    --------
    [conn_prob_3rd_order_model](modelling.md#src.connalysis.modelling.modelling.conn_prob_3rd_order_model) :
    The modelling function from which the parameters ``a1/a2`` and ``b1/b2``can be obtained.



    """
    if seed[0]==None or seed[1]==None:
        adj = gm.DD3(n,a1,b1,a2,b2,xyz,depths,threads)
    else:
        adj = gm.DD3(n,a1,b1,a2,b2,xyz,depths,threads,seed[0],seed[1])
    return _dict_to_coo(adj,n)

`run_ER(n, p, threads=8, seed=(None, None))`

Creates an Erdos Renyi digraph.

Parameters:

Name	Type	Description	Default
`n`	`int`	Number of vertices	required
`p`	`float`	Edge probablity, must satisfy \(0 \le p \le 1\)	required
`threads`	`int`	Number of parallel threads to be used	`8`
`seed`	`pair of ints`	Random seed to be used, if none is provided a seed is randomly selected	`(None, None)`

Returns:

Type	Description
`coo matrix`	Matrix of the generated control

Examples:

Setting n=3 and p=1 gives the complete digraph on 3 vertices:

>>> connalysis.randomization.run_ER(3,1)
{'row': [0, 0, 1, 1, 2, 2], 'col': [1, 2, 0, 2, 0, 1]}

Raises:

Type	Description
`AssertionError`	If p is not between 0 and 1

Source code in src/connalysis/randomization/randomization.py

def run_ER(n, p, threads=8, seed=(None,None)):
    """Creates an Erdos Renyi digraph.

    Parameters
    ----------
    n : int
        Number of vertices
    p : float
        Edge probablity, must satisfy $0 \\le p \\le 1$
    threads : int
        Number of parallel threads to be used
    seed : pair of ints
        Random seed to be used, if none is provided a seed is randomly selected

    Returns
    -------
    coo matrix
        Matrix of the generated control

    Examples
    --------
    Setting n=3 and p=1 gives the complete digraph on 3 vertices:
    >>> connalysis.randomization.run_ER(3,1)
    {'row': [0, 0, 1, 1, 2, 2], 'col': [1, 2, 0, 2, 0, 1]}

    Raises
    ------
    AssertionError
        If p is not between 0 and 1

    """
    assert (p >= 0 and p <= 1), "p must be between 0 and 1"
    if seed[0]==None or seed[1]==None:
        adj = gm.ER(n,p,threads)
    else:
        adj = gm.ER(n,p,threads,seed[0],seed[1])
    return _dict_to_coo(adj,n)

`run_SBM(n, probs, blocks, threads=8, seed=(None, None))`

Creates a random digraph using the stochastic block model.

Parameters:

Name	Type	Description	Default
`n`	`int`	Number of vertices	required
`probs`	`numpy array of floats`	shape=(m,m) where m is the number of blocks. probs[i][j] is probability of an edge between block i and block j	required
`blocks`	`numpy array of ints`	shape=(n,). The i'th entry gives to which block vertex i belongs.	required
`threads`	`int`	Number of parallel threads to be used	`8`
`seed`	`pair of ints`	Random seed to be used, if none is provided a seed is randomly selected	`(None, None)`

Returns:

Type	Description
`coo matrix`	Matrix of the generated control

Examples:

To create an SBM digraph on 4 vertices where the even to odd, or odd to even, vertices connect with high probablity (p=0.9) and the even to evens or odd to odds connect with low probability (p=0.1):

>>> connalysis.randomization.run_SBM(4,np.array([[0.1,0.9],[0.9,0.1]]),np.array([0,1,0,1]))
{'row': [0, 0, 1, 1, 1, 2, 2, 3, 3], 'col': [1, 3, 0, 2, 3, 1, 3, 0, 2]

Raises:

Type	Description
`TypeError`	If blocks contains non-integers

References

[1] P.W. Holland, K. Laskey, S. Leinhardt, "Stochastic Blockmodels: First Steps", Soc Networks, 5-2, pp. 109-137, 1982

Source code in src/connalysis/randomization/randomization.py

def run_SBM(n, probs, blocks, threads=8, seed=(None,None)):
    """Creates a random digraph using the stochastic block model.

    Parameters
    ----------
    n : int
        Number of vertices
    probs : numpy array of floats
        shape=(m,m) where m is the number of blocks.
        probs[i][j] is probability of an edge between block i and block j
    blocks : numpy array of ints
        shape=(n,). The i'th entry gives to which block vertex i belongs.
    threads : int
        Number of parallel threads to be used
    seed : pair of ints
        Random seed to be used, if none is provided a seed is randomly selected

    Returns
    -------
    coo matrix
        Matrix of the generated control

    Examples
    --------
    To create an SBM digraph on 4 vertices where the even to
    odd, or odd to even, vertices connect with high probablity (p=0.9)
    and the even to evens or odd to odds connect with low probability (p=0.1):
    >>> connalysis.randomization.run_SBM(4,np.array([[0.1,0.9],[0.9,0.1]]),np.array([0,1,0,1]))
    {'row': [0, 0, 1, 1, 1, 2, 2, 3, 3], 'col': [1, 3, 0, 2, 3, 1, 3, 0, 2]


    Raises
    ------
    TypeError
        If blocks contains non-integers

    References
    ----------
    [1] P.W. Holland, K. Laskey, S. Leinhardt,
    ["Stochastic Blockmodels: First Steps"](https://www.sciencedirect.com/science/article/pii/0378873383900217),
    Soc Networks, 5-2, pp. 109-137, 1982

    """

    if seed[0]==None or seed[1]==None:
        adj = gm.SBM(n, probs, blocks, threads)
    else:
        adj = gm.SBM(n, probs, blocks, threads, seed[0], seed[1])
    return _dict_to_coo(adj, n)

`stochastic_spread_model(M, n_steps=100, n_protected=0, q=10.0, tgt_level='individual', decay=1.0, sum_exclusion=True, return_history=False, node_can_spread=None)`

Builds a stochastic spread graph. See https://doi.org/10.1101/2025.08.21.671478

Parameters:

Name	Type	Description	Default
`M`	`matrix`	Adjacency matrix of the underlying graph to spread on. If data type is float then the weight specifies the probability that the corresponding edge is crossed in a step. This weight / probability is further scaled if parameter q is specified. If data type is bool, then q must be specified.	required
`n_steps`	`int`	Maximum number of steps to evaluate. Should be picked `large enough` that the spreading process terminates naturally from lack of new nodes instead of reaching this maximum.	`100`
`n_protected`	`int`	Number of initial steps to take with reduced stochasticity. For this number of steps the process for a given source node will spread to exactly the expected number of nodes instead of a randomly determined number. This avoids a large number of source nodes with zero out-degree. Set to 0 to not use this feature.	`0`
`q`	`float`	Sets the expected number of nodes to spread to in each step. This is done by scaling the weights in M with weights dynamically determined in each step. If the data type of M is boolean, all entries in M are interpreted as 1.0 and q must be provided to determine "proper" weights. Set to None to not use this feature.	`10.0`
`tgt_level`	`str`	One of "mean" or "individual". Specifies how parameter q is interpreted. If "individual", then one scaling factor per source node is calculated. If "mean", then one global factor is used. If q is set to None, then this is ignored. Using "mean" leads to more diverse degree distributions.	`'individual'`
`decay`	`float`	Must be between 0 and 1. Paramter q is multiplied by this value after each step, reducing its value. This leads to shorter degree distributions.	`1.0`
`sum_exclusion`	`bool`	Determines how the node exclusion rule is updated. If True, then once a candidate node has been rejected once from spread it can not be spread to in future steps. If False, then it is only excluded in the next step.	`True`
`return_history`	`bool`	If True, then a second output is returned (see below).	`False`
`node_can_spread`	`iterable`	Individual elements must be bool. If provided, it specifies which nodes "grow" outgoing connections via the spreading mechanism. That is, for nodes where the corresponding entry of `node_can_spread` is False, the out-degree will be set to 0. If provided, the length of the iterable must match the first dimension of `M`. If not provided, all nodes will spread.	`None`

Returns:

Name	Type	Description
`full_instance`	`matrix`	Adjacency matrix of the output graph
`history`	`list`	Optional output only returned if `return_history` is True. List specifying for each evaluated step the mean number of nodes that the process spread to. To be used to improve parameter fitting or for debugging.

Raises:

Type	Description
`ValueError`	If M contains any float weights > 1.0
`ValueError`	If M has bool data type and q is not used.
`ValueError`	If tgt_level is not one of ["mean", "individual"]
`ValueError`	If decay is not between [0, 1]
`ValueError`	If node_can_spread is provided and its length does not match M

Source code in src/connalysis/randomization/randomization.py

def stochastic_spread_model(M, n_steps=100,
                            n_protected=0, q=10.0, 
                            tgt_level="individual",
                            decay=1.0,
                            sum_exclusion=True,
                            return_history=False,
                            node_can_spread=None):
    """
    Builds a stochastic spread graph. See https://doi.org/10.1101/2025.08.21.671478

    Parameters
    ----------
    M : sparse.matrix
        Adjacency matrix of the underlying graph to spread on. If data type is float then the weight
        specifies the probability that the corresponding edge is crossed in a step. This weight / probability
        is further scaled if parameter q is specified. If data type is bool, then q _must_ be specified.
    n_steps : int
        Maximum number of steps to evaluate. Should be picked `large enough` that the spreading process
        terminates naturally from lack of new nodes instead of reaching this maximum.
    n_protected : int
        Number of initial steps to take with reduced stochasticity. For this number of steps the process 
        for a given source node will spread to exactly the expected number of nodes instead of a randomly
        determined number. This avoids a large number of source nodes with zero out-degree. Set to 0 to
        not use this feature.
    q : float 
        Sets the expected number of nodes to spread to in each step. This is done by scaling the weights in
        M with weights dynamically determined in each step. If the data type of M is boolean, all entries
        in M are interpreted as 1.0 and q must be provided to determine "proper" weights.
        Set to None to not use this feature.
    tgt_level : str
        One of "mean" or "individual". Specifies how parameter q is interpreted. If "individual", then one 
        scaling factor per source node is calculated. If "mean", then one global factor is used. If q is
        set to None, then this is ignored. Using "mean" leads to more diverse degree distributions.
    decay : float
        Must be between 0 and 1. Paramter q is multiplied by this value after each step, reducing its value.
        This leads to shorter degree distributions.
    sum_exclusion : bool
        Determines how the node exclusion rule is updated. If True, then once a candidate node has been
        rejected once from spread it can not be spread to in future steps. If False, then it is only 
        excluded in the next step.
    return_history : bool
        If True, then a second output is returned (see below).
    node_can_spread : iterable
        Individual elements must be bool. If provided, it specifies which nodes "grow" outgoing connections
        via the spreading mechanism. That is, for nodes where the corresponding entry of `node_can_spread` is 
        False, the out-degree will be set to 0. If provided, the length of the iterable must match the first
        dimension of `M`. If not provided, all nodes will spread.

    Returns
    ----------
    full_instance : sparse.matrix
        Adjacency matrix of the output graph
    history : list
        Optional output only returned if `return_history` is True. List specifying for each evaluated
        step the mean number of nodes that the process spread to. To be used to improve parameter fitting or
        for debugging.

    Raises
    ----------
    ValueError
        If M contains any float weights > 1.0
    ValueError
        If M has bool data type and q is not used.
    ValueError
        If tgt_level is not one of ["mean", "individual"]
    ValueError
        If decay is not between [0, 1]
    ValueError
        If node_can_spread is provided and its length does not match M
    """
    # Checking and setting up input variables
    if M.dtype != bool:
        if np.any(M.data > 1.0):
            raise ValueError("Weights in input matrix must be <= 1.0!")
    else:
        if q is None:
            raise ValueError("If q is set to None, then M must specify probabilities (float between 0 and 1)!")
    if tgt_level not in ["mean", "individual"]:
        raise ValueError(f"Unknown value for tgt_level: {tgt_level}. Expected one of ['mean', 'individual']!")
    if (decay < 0) or (decay > 1.0):
        raise ValueError("Parameter decay must be between 0 and 1!")
    sum_exclusion = bool(sum_exclusion)

    # Setting up the initial
    exclusion = sp.coo_matrix(([], ([], [])), shape=M.shape)
    initial = sp.diags(
        diagonals=np.ones(M.shape[0]),
        offsets=0, shape=M.shape, format="csr"
    )
    if node_can_spread is None:
        state = initial
    else:
        if len(node_can_spread) != M.shape[0]:
            raise ValueError("Length of node_can_spread must match first dimension of M!")
        state = sp.diags(
            diagonals=np.array(node_can_spread, dtype=int),
            offsets=0, shape=M.shape, format="csr"
        )
    M = M.transpose()

    # Set up output lists
    row = []
    col = []
    data = []
    history = []

    # Main loop
    for _step in range(n_steps):
        # Where the process can spread to. Subtracting exclusion ensures values < 0
        candidates = state * M - 1E6 * (exclusion + initial)
        candidates.data = np.minimum(np.maximum(candidates.data, 0), 1.0)

        # Scaling of spread probabilities based on configuration
        if q is not None:
            if tgt_level == "mean":
                csum = np.array(candidates.sum(axis=1))[:, 0]
                fac = q * np.ones(M.shape[0]) / csum[csum > 0].mean()
            else:
                fac = q / np.array(candidates.sum(axis=1) + 1E-3)[:, 0]
            q = q * decay
        else:
            fac = None

        # Take a new step
        new_state = evaluate_probs(candidates, adjust=fac, less_random=(_step < n_protected))
        row.extend(new_state.row)
        col.extend(new_state.col)
        data.extend(_step * np.ones(new_state.nnz, dtype=int))
        new_state = new_state.tocsr()

        # Step added to history
        h_i = new_state.sum(axis=1).mean()  # Mean number added per original neuron
        history.append(h_i)

        # Update exclusion rule
        if sum_exclusion:
            exclusion = exclusion + state
        else:
            exclusion = state
        state = new_state

    # Create output matrix
    full_instance = sp.coo_matrix((
        np.ones(len(row), dtype=bool),
        (row, col)
    ), shape=M.shape).tocsr()

    if return_history:
        return full_instance, history
    return full_instance

`underlying_model(adj, seed: int = None)`

Function to generate a digraph with the same underlying undirected graph as adj and the same number of reciprocal connections

Parameters:

Name	Type	Description	Default
`adj`	`csc_matrix`	Adjacency matrix of a directed network.	required
`seed`	`int`	Random seed to be used	`None`

Returns:

Type	Description
`csc_matrix`	Digraph with the same underlying undirected graph as adj and the same number of reciprocal connections

See Also

adjusted_ER : Function to generate an Erdos Renyi model with adjusted bidirectional connections

bishuffled_model : Function which returns a digraph with shuffled reciprocal connections

Source code in src/connalysis/randomization/randomization.py

def underlying_model(adj, seed: int=None):
    """Function to generate a digraph with the same  underlying undirected graph as adj
        and the same number of reciprocal connections

    Parameters
    ----------
    adj : csc_matrix
        Adjacency matrix of a directed network.
    seed : int
        Random seed to be used

    Returns
    -------
    csc_matrix
        Digraph with the same  underlying undirected graph as adj and the same number of reciprocal connections

    See Also
    --------
    [adjusted_ER](randomization.md#src.connalysis.randomization.randomization.adjusted_ER) :
    Function to generate an Erdos  Renyi model with adjusted bidirectional connections

    [bishuffled_model](randomization.md#src.connalysis.randomization.randomization.bishuffled_model) :
    Function which returns a digraph with shuffled reciprocal connections
    """
    from connalysis.network.topology import rc_submatrix
    from .rand_utils import  add_bidirectional_connections
    generator = np.random.default_rng(seed)
    target_bedges = int(rc_submatrix(adj).count_nonzero() / 2)
    ut_matrix = sp.triu(adj + adj.T)
    return add_bidirectional_connections(ut_matrix, target_bedges, generator)

Randomization

This page describes functions contained in the randomization module used to generate of randomized controls of connectomes.

ER_model(adj, threads=8, seed=(None, None))

ER_shuffle(adj, neuron_properties=[], shuffle_type='sparse')

_seed_random_state(shuffler, seeder=np.random.seed)

add_connections(adj, nc, seed=0, sparse_mode=True, max_iter=30)

add_rc_connections(adj, n_rc, seed=0)

add_rc_connections_skeleta(adj, factors, dimensions=None, skeleta=None, threads=8, seed=0, return_skeleta=False)

adjusted_ER(adj, seed=None)

bishuffled_model(adj, seed=None)

configuration_model(M, seed=None)

random_geometric_model(pts, pts_x=None, n_neighbors=None, dist_neighbors=None, n_pick=None, p_pick=None, scale_axes=None, directionality_fac=0.0, directionality_axis=None, distance_func=None, custom_w_out=None, custom_w_in=None, no_diag=True, mirror=False)

run_DD2(n, a, b, xyz, threads=8, seed=(None, None))

run_DD2_block(n, probs, blocks, xyz, threads, seed=(None, None))

run_DD2_block_pre(n, probs, blocks, xyz, threads=8, seed=(None, None))

run_DD2_model(adj, node_properties, model_params_dd2=None, coord_names=['x', 'y', 'z'], threads=8, return_params=False, **config_dict)

run_DD3(n, a1, b1, a2, b2, xyz, depths, threads=8, seed=(None, None))

run_ER(n, p, threads=8, seed=(None, None))

run_SBM(n, probs, blocks, threads=8, seed=(None, None))

stochastic_spread_model(M, n_steps=100, n_protected=0, q=10.0, tgt_level='individual', decay=1.0, sum_exclusion=True, return_history=False, node_can_spread=None)

underlying_model(adj, seed: int = None)

This page describes functions contained in the `randomization` module used to generate of randomized controls of connectomes.

`ER_model(adj, threads=8, seed=(None, None))`

`ER_shuffle(adj, neuron_properties=[], shuffle_type='sparse')`

`_seed_random_state(shuffler, seeder=np.random.seed)`

`add_connections(adj, nc, seed=0, sparse_mode=True, max_iter=30)`

`add_rc_connections(adj, n_rc, seed=0)`

`add_rc_connections_skeleta(adj, factors, dimensions=None, skeleta=None, threads=8, seed=0, return_skeleta=False)`

`adjusted_ER(adj, seed=None)`

`bishuffled_model(adj, seed=None)`

`configuration_model(M, seed=None)`

`random_geometric_model(pts, pts_x=None, n_neighbors=None, dist_neighbors=None, n_pick=None, p_pick=None, scale_axes=None, directionality_fac=0.0, directionality_axis=None, distance_func=None, custom_w_out=None, custom_w_in=None, no_diag=True, mirror=False)`

`run_DD2(n, a, b, xyz, threads=8, seed=(None, None))`

`run_DD2_block(n, probs, blocks, xyz, threads, seed=(None, None))`

`run_DD2_block_pre(n, probs, blocks, xyz, threads=8, seed=(None, None))`

`run_DD2_model(adj, node_properties, model_params_dd2=None, coord_names=['x', 'y', 'z'], threads=8, return_params=False, **config_dict)`

`run_DD3(n, a1, b1, a2, b2, xyz, depths, threads=8, seed=(None, None))`

`run_ER(n, p, threads=8, seed=(None, None))`

`run_SBM(n, probs, blocks, threads=8, seed=(None, None))`

`stochastic_spread_model(M, n_steps=100, n_protected=0, q=10.0, tgt_level='individual', decay=1.0, sum_exclusion=True, return_history=False, node_can_spread=None)`

`underlying_model(adj, seed: int = None)`