forest_navigating_uav/worldgen/forest_worldgen/spatial_stats.py
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
"""
Spatial statistics for validating point patterns.

Computes post-generation diagnostics:
  - Clark-Evans nearest-neighbor index  R
  - Pair correlation function            g(r)
  - Ripley's L-function deviation        L(r) - r

All functions operate on a plain list of (x, y) tuples inside a square
domain [-L/2, L/2]^2 and return JSON-serialisable dicts / lists.
"""

import math

try:
    import numpy as np
    from scipy.spatial import KDTree

    _HAS_SCIPY = True
except ImportError:
    _HAS_SCIPY = False


# ---------------------------------------------------------------------------
# helpers
# ---------------------------------------------------------------------------


def _nn_distances(positions):
    """Return list of nearest-neighbour distances.

    Uses scipy KDTree for O(n log n) when available, falls back to
    brute-force O(n**2) otherwise.
    """
    n = len(positions)
    if n < 2:
        return []

    if _HAS_SCIPY:
        pts = np.asarray(positions, dtype=np.float64)
        tree = KDTree(pts)
        dists, _ = tree.query(pts, k=2)  # k=2: closest is self (dist=0)
        return dists[:, 1].tolist()

    # brute-force fallback
    nn = []
    for i in range(n):
        xi, yi = positions[i]
        best = math.inf
        for j in range(n):
            if i == j:
                continue
            d = math.hypot(xi - positions[j][0], yi - positions[j][1])
            if d < best:
                best = d
        nn.append(best)
    return nn


def _pairwise_distances(positions, r_max=None):
    """Return flat list of pairwise distances (no duplicates).

    When *r_max* is given and scipy is available, uses KDTree.sparse_distance_matrix
    to avoid materialising all n*(n-1)/2 pairs - only pairs within r_max are stored.
    """
    n = len(positions)

    if _HAS_SCIPY and r_max is not None and r_max > 0:
        pts = np.asarray(positions, dtype=np.float64)
        tree = KDTree(pts)
        sparse = tree.sparse_distance_matrix(tree, r_max, output_type="ndarray")
        # sparse is structured array with (i, j, v); i < j not guaranteed
        dists = [float(row[2]) for row in sparse if row[0] < row[1]]
        return dists

    # full brute-force fallback (or when r_max is None)
    dists = []
    for i in range(n):
        xi, yi = positions[i]
        for j in range(i + 1, n):
            dists.append(math.hypot(xi - positions[j][0], yi - positions[j][1]))
    return dists


# ---------------------------------------------------------------------------
# Clark-Evans R
# ---------------------------------------------------------------------------


def clark_evans_R(positions, area):
    """
    Clark-Evans nearest-neighbour index.

    R = mean_obs_nn / mean_csr_nn
    where mean_csr_nn = 1 / (2 * sqrt(density))

    Returns:
        float  R  (>1 regular, ~1 random, <1 clustered)
    """
    nn = _nn_distances(positions)
    if not nn or area <= 0:
        return float("nan")
    mean_obs = sum(nn) / len(nn)
    density = len(positions) / area
    mean_csr = 0.5 / math.sqrt(density)
    if mean_csr <= 0:
        return float("nan")
    return mean_obs / mean_csr


# ---------------------------------------------------------------------------
# Pair correlation  g(r)
# ---------------------------------------------------------------------------


def pair_correlation(positions, area, r_max=None, n_bins=25):
    """
    Estimate the pair correlation function g(r) in annular bins.

    g(r) = K'(r) / (2 * pi * r)   where K'(r) is the derivative of K,
    but we estimate directly from pair counts in annular rings.

    Returns:
        list of dicts  [{r, g}, ...]   one per bin centre.
    """
    n = len(positions)
    if n < 2 or area <= 0:
        return []

    if r_max is None:
        # need all distances to determine r_max; fall back to full computation
        dists = _pairwise_distances(positions)
        if not dists:
            return []
        r_max = max(dists) * 0.5  # don't trust edge region
    else:
        dists = _pairwise_distances(positions, r_max=r_max)
        if not dists:
            return []

    dr = r_max / n_bins
    if dr <= 0:
        return []

    counts = [0] * n_bins
    for d in dists:
        idx = int(d / dr)
        if 0 <= idx < n_bins:
            counts[idx] += 1

    results = []
    for b in range(n_bins):
        r_lo = b * dr
        r_hi = (b + 1) * dr
        r_mid = (r_lo + r_hi) / 2.0
        ring_area = math.pi * (r_hi**2 - r_lo**2)
        if ring_area <= 0:
            continue
        # each pair counted once; expected under CSR = n*(n-1)/2 * ring_area/area
        expected = 0.5 * n * (n - 1) * ring_area / area
        g = counts[b] / expected if expected > 0 else 0.0
        results.append({"r": round(r_mid, 4), "g": round(g, 4)})

    return results


# ---------------------------------------------------------------------------
# Ripley's L(r) - r
# ---------------------------------------------------------------------------


def ripley_L_minus_r(positions, area, r_max=None, n_bins=25):
    """
    Compute L(r) - r  for a set of evaluation radii.

    K(r) = area / n^2  *  sum_{i!=j} 1(d_ij <= r)
    L(r) = sqrt(K(r) / pi)
    deviation = L(r) - r

    Returns:
        list of dicts  [{r, L_minus_r}, ...]
    """
    n = len(positions)
    if n < 2 or area <= 0:
        return []

    if r_max is None:
        half_side = math.sqrt(area) / 2.0
        r_max = half_side * 0.5

    dists = _pairwise_distances(positions, r_max=r_max)
    dists.sort()

    dr = r_max / n_bins
    if dr <= 0:
        return []

    results = []
    pair_idx = 0
    cum_count = 0  # cumulative count of pairs with dist <= r

    for b in range(1, n_bins + 1):
        r = b * dr
        while pair_idx < len(dists) and dists[pair_idx] <= r:
            cum_count += 1
            pair_idx += 1
        # each pair counted once in dists; K uses ordered pairs so multiply by 2
        K = area / (n * n) * (2 * cum_count)
        L = math.sqrt(K / math.pi) if K >= 0 else 0.0
        results.append({"r": round(r, 4), "L_minus_r": round(L - r, 4)})

    return results


# ---------------------------------------------------------------------------
# Public convenience: compute all stats at once
# ---------------------------------------------------------------------------


def compute_validation_stats(positions, area_size):
    """
    Compute a full suite of spatial statistics for the generated positions.

    Parameters:
        positions : list of (x, y)
        area_size : side length K of the square world

    Returns:
        dict ready for JSON serialisation.
    """
    area = area_size**2
    R = clark_evans_R(positions, area)

    # choose r_max relative to world; ~ 1/4 of the side
    r_max = area_size * 0.25

    g = pair_correlation(positions, area, r_max=r_max, n_bins=20)
    L = ripley_L_minus_r(positions, area, r_max=r_max, n_bins=20)

    # summarise g(r) at small r  (first 5 bins)
    g_small = g[:5] if g else []
    g_small_mean = sum(item["g"] for item in g_small) / len(g_small) if g_small else float("nan")

    # summarise L(r)-r at small r
    L_small = L[:5] if L else []
    L_small_mean = (
        sum(item["L_minus_r"] for item in L_small) / len(L_small) if L_small else float("nan")
    )

    return {
        "clark_evans_R": round(R, 4) if not math.isnan(R) else None,
        "g_small_r_mean": round(g_small_mean, 4) if not math.isnan(g_small_mean) else None,
        "L_small_r_mean": round(L_small_mean, 4) if not math.isnan(L_small_mean) else None,
        "pair_correlation": g,
        "ripley_L_minus_r": L,
    }