Skip to content

CLUSTER_CENTERS

Function description

The CLUSTER_CENTERS() function can be used to determine the K cluster centers of a vector column. Returns a line of JSON array string containing all cluster centers.

Syntax Structure

SELECT cluster_centers(col kmeans 'k, op_type, init_type, normalize') FROM tbl;

Parameter definition

Parameters Description
col Required. To determine the vector column of the cluster center.
k Required. The number of clusters to be divided into is greater than 0, less than or equal to the total number of rows.
op_type Required. The distance function used in clustering calculation. Currently vector_l2_ops is supported.
init_type Required. The initial cluster center algorithm to be used. Currently we support random and kmeansplusplus(K-means++).
normalize Required. Boolean value, the clustering algorithm to be used, true represents Spherical Kmeans, false represents Regular Kmeans.

Example

drop table if exists points;
CREATE TABLE points (id int auto_increment PRIMARY KEY,coordinate vecf32(2));
insert into points(coordinate) VALUES
 ("[-7.68905443,6.62034649]"),
 ("[-9.57651383,-6.93440446]"),
 ("[6.82968177,1.1648714]"),
 ("[-2.90130578,7.55077118]"),
 ("[-5.67841327,-7.28818497]"),
 ("[-6.04929137,-7.73619342]"),
 ("[-6.27824322,7.22746302]");
SET GLOBAL experimental_ivf_index = 1;--The parameter experimental_ivf_index value is 1 (default 0) to use vector index
--create index idx_t1 using ivfflat on points(coordinate) lists=1 op_type "vector_l2_ops";

-- Each point represents its coordinates on the x and y axes, query the cluster center, using Regular Kmeans
--K-means++
mysql> SELECT cluster_centers(coordinate kmeans '2,vector_l2_ops,kmeansplusplus,false') AS centers FROM points;
+-----------------------------------------------------------------------------+
| centers |
+-----------------------------------------------------------------------------+
| [ [-2.5097303, 5.640863],[-7.101406, -7.3195944] ] |
+-----------------------------------------------------------------------------+
1 row in set (0.01 sec)

--KMeans
mysql> SELECT cluster_centers(coordinate kmeans '2,vector_l2_ops,random,false') AS centers FROM points;
+-----------------------------------------------------------------------------+
| centers |
+-----------------------------------------------------------------------------+
| [ [-6.362137, -0.09336702],[6.829682, 1.1648715] ] |
+-----------------------------------------------------------------------------+
1 row in set (0.00 sec)

-- Each point represents the latitude and longitude coordinates, query the cluster center, using Spherical Kmeans
mysql> SELECT cluster_centers(coordinate kmeans '2,vector_l2_ops,kmeansplusplus,true') AS centers FROM points;
+----------------------------------------------------------------------------+
| centers |
+----------------------------------------------------------------------------+
| [ [0.70710677, 0.70710677],[0.83512634, 0.5500581] ] |
+----------------------------------------------------------------------------+
1 row in set (0.00 sec)

--The clustering centers in JSON type data can be retrieved by combining CROSS JOIN and UNNEST syntax.
mysql> SELECT value FROM (
    -> SELECT cluster_centers(coordinate kmeans '2,vector_l2_ops,kmeansplusplus,false') AS centers FROM points
    -> ) AS subquery
    -> CROSS JOIN UNNEST(subquery.centers) AS u;
+------------------------------+
| value |
+------------------------------+
| [-2.5097303, 5.640863] |
| [-7.101406, -7.3195944] |
+------------------------------+
2 rows in set (0.00 sec)