资源描述
1直接复制在MATLAB中运行
%这是一个FCM函数处理的程序
clc,
clear all
close all
load ca.txt
yangben = load('ca.txt');
t = size(yangben);
t1 = t(1);
t2 = t(2);
J=yangben(:,2:t2-1);
data=J(:,:);
N_CLUSTER=2; %该值需要根据要求更改,即分类数
[center,U,obj_fcn] = fcm(data,N_CLUSTER); %FCM调用
N_CLUSTER
U=U'
2.数据,请复制保存为ca.txt. 第一列是编号,最后一列是结果。聚类结束后,请注意转换结果
1000025,5,1,1,1,2,1,3,1,1,2
1002945,5,4,4,5,7,10,3,2,1,2
1015425,3,1,1,1,2,2,3,1,1,2
1016277,6,8,8,1,3,4,3,7,1,2
1017023,4,1,1,3,2,1,3,1,1,2
1017122,8,10,10,8,7,10,9,7,1,4
1018099,1,1,1,1,2,10,3,1,1,2
1018561,2,1,2,1,2,1,3,1,1,2
1033078,2,1,1,1,2,1,1,1,5,2
1033078,4,2,1,1,2,1,2,1,1,2
1035283,1,1,1,1,1,1,3,1,1,2
1036172,2,1,1,1,2,1,2,1,1,2
1041801,5,3,3,3,2,3,4,4,1,4
1043999,1,1,1,1,2,3,3,1,1,2
1044572,8,7,5,10,7,9,5,5,4,4
1047630,7,4,6,4,6,1,4,3,1,4
1048672,4,1,1,1,2,1,2,1,1,2
1049815,4,1,1,1,2,1,3,1,1,2
1050670,10,7,7,6,4,10,4,1,2,4
1050718,6,1,1,1,2,1,3,1,1,2
1054590,7,3,2,10,5,10,5,4,4,4
1054593,10,5,5,3,6,7,7,10,1,4
1056784,3,1,1,1,2,1,2,1,1,2
1059552,1,1,1,1,2,1,3,1,1,2
1065726,5,2,3,4,2,7,3,6,1,4
1066373,3,2,1,1,1,1,2,1,1,2
1066979,5,1,1,1,2,1,2,1,1,2
1067444,2,1,1,1,2,1,2,1,1,2
1070935,1,1,3,1,2,1,1,1,1,2
1070935,3,1,1,1,1,1,2,1,1,2
1071760,2,1,1,1,2,1,3,1,1,2
1072179,10,7,7,3,8,5,7,4,3,4
1074610,2,1,1,2,2,1,3,1,1,2
1075123,3,1,2,1,2,1,2,1,1,2
1079304,2,1,1,1,2,1,2,1,1,2
1080185,10,10,10,8,6,1,8,9,1,4
1081791,6,2,1,1,1,1,7,1,1,2
1084584,5,4,4,9,2,10,5,6,1,4
1091262,2,5,3,3,6,7,7,5,1,4
1099510,10,4,3,1,3,3,6,5,2,4
1100524,6,10,10,2,8,10,7,3,3,4
1102573,5,6,5,6,10,1,3,1,1,4
1103608,10,10,10,4,8,1,8,10,1,4
1103722,1,1,1,1,2,1,2,1,2,2
1105257,3,7,7,4,4,9,4,8,1,4
1105524,1,1,1,1,2,1,2,1,1,2
1106095,4,1,1,3,2,1,3,1,1,2
1106829,7,8,7,2,4,8,3,8,2,4
1108370,9,5,8,1,2,3,2,1,5,4
1108449,5,3,3,4,2,4,3,4,1,4
1110102,10,3,6,2,3,5,4,10,2,4
1110503,5,5,5,8,10,8,7,3,7,4
1110524,10,5,5,6,8,8,7,1,1,4
1111249,10,6,6,3,4,5,3,6,1,4
1112209,8,10,10,1,3,6,3,9,1,4
1113038,8,2,4,1,5,1,5,4,4,4
1113483,5,2,3,1,6,10,5,1,1,4
1113906,9,5,5,2,2,2,5,1,1,4
1115282,5,3,5,5,3,3,4,10,1,4
1115293,1,1,1,1,2,2,2,1,1,2
1116116,9,10,10,1,10,8,3,3,1,4
1116132,6,3,4,1,5,2,3,9,1,4
1116192,1,1,1,1,2,1,2,1,1,2
1116998,10,4,2,1,3,2,4,3,10,4
1117152,4,1,1,1,2,1,3,1,1,2
1118039,5,3,4,1,8,10,4,9,1,4
1120559,8,3,8,3,4,9,8,9,8,4
1121732,1,1,1,1,2,1,3,2,1,2
1121919,5,1,3,1,2,1,2,1,1,2
1123061,6,10,2,8,10,2,7,8,10,4
1124651,1,3,3,2,2,1,7,2,1,2
1125035,9,4,5,10,6,10,4,8,1,4
1126417,10,6,4,1,3,4,3,2,3,4
1131294,1,1,2,1,2,2,4,2,1,2
1132347,1,1,4,1,2,1,2,1,1,2
1133041,5,3,1,2,2,1,2,1,1,2
1133136,3,1,1,1,2,3,3,1,1,2
1136142,2,1,1,1,3,1,2,1,1,2
1137156,2,2,2,1,1,1,7,1,1,2
1143978,4,1,1,2,2,1,2,1,1,2
1143978,5,2,1,1,2,1,3,1,1,2
1147044,3,1,1,1,2,2,7,1,1,2
1147699,3,5,7,8,8,9,7,10,7,4
1147748,5,10,6,1,10,4,4,10,10,4
1148278,3,3,6,4,5,8,4,4,1,4
1148873,3,6,6,6,5,10,6,8,3,4
1152331,4,1,1,1,2,1,3,1,1,2
1155546,2,1,1,2,3,1,2,1,1,2
1160476,2,1,1,1,2,1,3,1,1,2
1164066,1,1,1,1,2,1,3,1,1,2
1165297,2,1,1,2,2,1,1,1,1,2
1165790,5,1,1,1,2,1,3,1,1,2
1165926,9,6,9,2,10,6,2,9,10,4
1166630,7,5,6,10,5,10,7,9,4,4
1166654,10,3,5,1,10,5,3,10,2,4
1167439,2,3,4,4,2,5,2,5,1,4
1167471,4,1,2,1,2,1,3,1,1,2
1168359,8,2,3,1,6,3,7,1,1,4
1168736,10,10,10,10,10,1,8,8,8,4
1169049,7,3,4,4,3,3,3,2,7,4
3 fcm函数源代码(在MATLAB中输入 type fcm 可以查看)
function [idx, C, sumD, D] = kmeans(X, k, varargin)
%KMEANS K-means clustering.
% IDX = KMEANS(X, K) partitions the points in the N-by-P data matrix
% X into K clusters. This partition minimizes the sum, over all
% clusters, of the within-cluster sums of point-to-cluster-centroid
% distances. Rows of X correspond to points, columns correspond to
% variables. KMEANS returns an N-by-1 vector IDX containing the
% cluster indices of each point. By default, KMEANS uses squared
% Euclidean distances.
%
% KMEANS treats NaNs as missing data, and removes any rows of X that
% contain NaNs.
%
% [IDX, C] = KMEANS(X, K) returns the K cluster centroid locations in
% the K-by-P matrix C.
%
% [IDX, C, SUMD] = KMEANS(X, K) returns the within-cluster sums of
% point-to-centroid distances in the 1-by-K vector sumD.
%
% [IDX, C, SUMD, D] = KMEANS(X, K) returns distances from each point
% to every centroid in the N-by-K matrix D.
%
% [ ... ] = KMEANS(..., 'PARAM1',val1, 'PARAM2',val2, ...) allows you to
% specify optional parameter name/value pairs to control the iterative
% algorithm used by KMEANS. Parameters are:
%
% 'Distance' - Distance measure, in P-dimensional space, that KMEANS
% should minimize with respect to. Choices are:
% {'sqEuclidean'} - Squared Euclidean distance
% 'cityblock' - Sum of absolute differences, a.k.a. L1
% 'cosine' - One minus the cosine of the included angle
% between points (treated as vectors)
% 'correlation' - One minus the sample correlation between
% points (treated as sequences of values)
% 'Hamming' - Percentage of bits that differ (only
% suitable for binary data)
%
% 'Start' - Method used to choose initial cluster centroid positions,
% sometimes known as "seeds". Choices are:
% {'sample'} - Select K observations from X at random
% 'uniform' - Select K points uniformly at random from
% the range of X. Not valid for Hamming distance.
% 'cluster' - Perform preliminary clustering phase on
% random 10% subsample of X. This preliminary
% phase is itself initialized using 'sample'.
% matrix - A K-by-P matrix of starting locations. In
% this case, you can pass in [] for K, and
% KMEANS infers K from the first dimension of
% the matrix. You can also supply a 3D array,
% implying a value for 'Replicates'
% from the array's third dimension.
%
% 'Replicates' - Number of times to repeat the clustering, each with a
% new set of initial centroids [ positive integer | {1}]
%
% 'Maxiter' - The maximum number of iterations [ positive integer | {100}]
%
% 'EmptyAction' - Action to take if a cluster loses all of its member
% observations. Choices are:
% {'error'} - Treat an empty cluster as an error
% 'drop' - Remove any clusters that become empty, and
% set corresponding values in C and D to NaN.
% 'singleton' - Create a new cluster consisting of the one
% observation furthest from its centroid.
%
% 'Display' - Display level [ 'off' | {'notify'} | 'final' | 'iter' ]
%
% Example:
%
% X = [randn(20,2)+ones(20,2); randn(20,2)-ones(20,2)];
% [cidx, ctrs] = kmeans(X, 2, 'dist','city', 'rep',5, 'disp','final');
% plot(X(cidx==1,1),X(cidx==1,2),'r.', ...
% X(cidx==2,1),X(cidx==2,2),'b.', ctrs(:,1),ctrs(:,2),'kx');
%
% See also LINKAGE, CLUSTERDATA, SILHOUETTE.
% KMEANS uses a two-phase iterative algorithm to minimize the sum of
% point-to-centroid distances, summed over all K clusters. The first
% phase uses what the literature often describes as "batch" updates,
% where each iteration consists of reassigning points to their nearest
% cluster centroid, all at once, followed by recalculation of cluster
% centroids. This phase may be thought of as providing a fast but
% potentially only approximate solution as a starting point for the
% second phase. The second phase uses what the literature often
% describes as "on-line" updates, where points are individually
% reassigned if doing so will reduce the sum of distances, and cluster
% centroids are recomputed after each reassignment. Each iteration
% during this second phase consists of one pass though all the points.
% KMEANS can converge to a local optimum, which in this case is a
% partition of points in which moving any single point to a different
% cluster increases the total sum of distances. This problem can only be
% solved by a clever (or lucky, or exhaustive) choice of starting points.
%
% References:
%
% [1] Seber, G.A.F., Multivariate Observations, Wiley, New York, 1984.
% [2] Spath, H. (1985) Cluster Dissection and Analysis: Theory, FORTRAN
% Programs, Examples, translated by J. Goldschmidt, Halsted Press,
% New York, 226 pp.
% Copyright 1993-2004 The MathWorks, Inc.
% $Revision: 1.4.4.5 $ $Date: 2004/03/02 21:49:12 $
if nargin < 2
error('stats:kmeans:TooFewInputs','At least two input arguments required.');
end
if any(isnan(X(:)))
warning('stats:kmeans:MissingDataRemoved','Removing rows of X with missing data.');
X = X(~any(isnan(X),2),:);
end
% n points in p dimensional space
[n, p] = size(X);
Xsort = []; Xord = [];
pnames = { 'distance' 'start' 'replicates' 'maxiter' 'emptyaction' 'display'};
dflts = {'sqeuclidean' 'sample' [] 100 'error' 'notify'};
[eid,errmsg,distance,start,reps,maxit,emptyact,display] ...
= statgetargs(pnames, dflts, varargin{:});
if ~isempty(eid)
error(sprintf('stats:kmeans:%s',eid),errmsg);
end
if ischar(distance)
distNames = {'sqeuclidean','cityblock','cosine','correlation','hamming'};
i = strmatch(lower(distance), distNames);
if length(i) > 1
error('stats:kmeans:AmbiguousDistance', ...
'Ambiguous ''distance'' parameter value: %s.', distance);
elseif isempty(i)
error('stats:kmeans:UnknownDistance', ...
'Unknown ''distance'' parameter value: %s.', distance);
end
distance = distNames{i};
switch distance
case 'cityblock'
[Xsort,Xord] = sort(X,1);
case 'cosine'
Xnorm = sqrt(sum(X.^2, 2));
if any(min(Xnorm) <= eps(max(Xnorm)))
error('stats:kmeans:ZeroDataForCos', ...
['Some points have small relative magnitudes, making them ', ...
'effectively zero.\nEither remove those points, or choose a ', ...
'distance other than ''cosine''.']);
end
X = X ./ Xnorm(:,ones(1,p));
case 'correlation'
X = X - repmat(mean(X,2),1,p);
Xnorm = sqrt(sum(X.^2, 2));
if any(min(Xnorm) <= eps(max(Xnorm)))
error('stats:kmeans:ConstantDataForCorr', ...
['Some points have small relative standard deviations, making them ', ...
'effectively constant.\nEither remove those points, or choose a ', ...
'distance other than ''correlation''.']);
end
X = X ./ Xnorm(:,ones(1,p));
case 'hamming'
if ~all(ismember(X(:),[0 1]))
error('stats:kmeans:NonbinaryDataForHamm', ...
'Non-binary data cannot be clustered using Hamming distance.');
end
end
else
error('stats:kmeans:InvalidDistance', ...
'The ''distance'' parameter value must be a string.');
end
if ischar(start)
startNames = {'uniform','sample','cluster'};
i = strmatch(lower(start), startNames);
if length(i) > 1
error('stats:kmeans:AmbiguousStart', ...
'Ambiguous ''start'' parameter value: %s.', start);
elseif isempty(i)
error('stats:kmeans:UnknownStart', ...
'Unknown ''start'' parameter value: %s.', start);
elseif isempty(k)
error('stats:kmeans:MissingK', ...
'You must specify the number of clusters, K.');
end
start = startNames{i};
if strcmp(start, 'uniform')
if strcmp(distance, 'hamming')
error('stats:kmeans:UniformStartForHamm', ...
'Hamming distance cannot be initialized with uniform random values.');
end
Xmins = min(X,1);
Xmaxs = max(X,1);
end
elseif isnumeric(start)
CC = start;
start = 'numeric';
if isempty(k)
k = size(CC,1);
elseif k ~= size(CC,1);
error('stats:kmeans:MisshapedStart', ...
'The ''start'' matrix must have K rows.');
elseif size(CC,2) ~= p
error('stats:kmeans:MisshapedStart', ...
'The ''start'' matrix must have the same number of columns as X.');
end
if isempty(reps)
reps = size(CC,3);
elseif reps ~= size(CC,3);
error('stats:kmeans:MisshapedStart', ...
'The third dimension of the ''start'' array must match the ''replicates'' parameter value.');
end
% Need to center explicit starting points for 'correlation'. (Re)normalization
% for 'cosine'/'correlation' is done at each iteration.
if isequal(distance, 'correlation')
CC = CC - repmat(mean(CC,2),[1,p,1]);
end
else
error('stats:kmeans:InvalidStart', ...
'The ''start'' parameter value must be a string or a numeric matrix or array.');
end
if ischar(emptyact)
emptyactNames = {'error','drop','singleton'};
i = strmatch(lower(emptyact), emptyactNames);
if length(i) > 1
error('stats:kmeans:AmbiguousEmptyAction', ...
'Ambiguous ''emptyaction'' parameter value: %s.', emptyact);
elseif isempty(i)
error('stats:kmeans:UnknownEmptyAction', ...
'Unknown ''emptyaction'' parameter value: %s.', emptyact);
end
emptyact = emptyactNames{i};
else
error('stats:kmeans:InvalidEmptyAction', ...
'The ''emptyaction'' parameter value must be a string.');
end
if ischar(display)
i = strmatch(lower(display), strvcat('off','notify','final','iter'));
if length(i) > 1
error('stats:kmeans:AmbiguousDisplay', ...
'Ambiguous ''display'' parameter value: %s.', display);
elseif isempty(i)
error('stats:kmeans:UnknownDisplay', ...
'Unknown ''display'' parameter value: %s.', display);
end
display = i-1;
else
error('stats:kmeans:InvalidDisplay', ...
'The ''display'' parameter value must be a string.');
end
if k == 1
error('stats:kmeans:OneCluster', ...
'The number of clusters must be greater than 1.');
elseif n < k
error('stats:kmeans:TooManyClusters', ...
'X must have more rows than the number of clusters.');
end
% Assume one replicate
if isempty(reps)
reps = 1;
end
%
% Done with input argument processing, begin clustering
%
dispfmt = '%6d\t%6d\t%8d\t%12g';
D = repmat(NaN,n,k); % point-to-cluster distances
Del = repmat(NaN,n,k); % reassignment criterio
展开阅读全文