mclust/0000755000175000017500000000000014157124112011711 5ustar nileshnileshmclust/MD50000644000175000017500000002052114157124112012221 0ustar nileshnilesh3111dbac78536d0757a0b08390d591f2 *DESCRIPTION f29893646b89738c493bd22446314bc4 *NAMESPACE 2b1dd2a90efd9be7407bc170bd166548 *NEWS.md fe5b51db8754d5ca6d32248ca9d4db1b *R/bootstrap.R eeb0e042ca8bcc1eb2d32da9ef594baf *R/clustCombi.R 97c8d1f45c59c4da2ea7f6db09b68f36 *R/densityMclust.R 3cbe94a3f3c162d293154dc0a055c9bb *R/gmmhd.R 2dc5411e8512b4da058597e17dd187fe *R/graphics.R ba84097ee321529784cd9c6db5143e77 *R/icl.R 4b9155a000a45bb1f37fd1507c15f172 *R/impute.R d85f070c21b0742a056b87a9654773d2 *R/mbahc.R 7b5f0026d92006d1a76e09714894f7d9 *R/mclust.R 1c0ad0629bdef87f46a9811aa87011ea *R/mclustaddson.R 3953487293520a00bc4e50a5483726f1 *R/mclustda.R 37f2401aee7a35ce6d7bb0e2ca6839eb *R/mclustdr.R de7a05949adf6c735f8355bb495b6df5 *R/mclustssc.R b2216ff923e4a79da4d35b12a4015f6f *R/options.R 781ff8bea2efa10350791cd781eb4a2d *R/toremove.R ba0e509dd6446b85a79e6c24e45d59c8 *R/util.R ce4d61899bfc151f01017ae2a753baee *R/weights.R ea54da7ad04ea6cf47aad932539d5945 *R/zzz.R aa315fbfff90cd89ac8637b562a133bc *build/vignette.rds 6fc941eda7a2bcddd64d711015c9d7db *data/Baudry_etal_2010_JCGS_examples.rda eae098659ecc267feb763f52c0de1a37 *data/EuroUnemployment.rda fa93433db4115af78edf00264634a412 *data/GvHD.rda 2fbaa53e4f9b0aaa0d415e164883c364 *data/acidity.rda dcf0404be80a56cd040ed8cb8da1b428 *data/banknote.txt.gz 6135e56ad3df989ce553eaf5924da9b6 *data/chevron.rda 47be3bcb96ffb22598a9067ecd2701c8 *data/cross.rda cc529e91ffc45020a05da77f76e9f330 *data/diabetes.rda 9e0bf66683dc263d24e018632136569c *data/thyroid.rda b2a529e265721bcd2cb632d42ec6cc11 *data/wdbc.txt.gz 2e2ce1f6b143173c17b6027c82cf7e7e *data/wreath.rda adc627c8c5c93d652e2b0109376135ac *inst/CITATION b2f3cb62dc61485b2a6fe6210ebd0d03 *inst/doc/mclust.R c7680542d4532804d5be21b3684986c7 *inst/doc/mclust.Rmd 48dd50ef41d5d80914f3228dcead2180 *inst/doc/mclust.html 8d39c22f943d32be50a07dcad31bb27c *man/Baudry_etal_2010_JCGS_examples.Rd 226b01ed5c258d406de2af5cfade2ba1 *man/BrierScore.Rd 9260d85565aad1fa28e84cd2ca14d23d *man/EuroUnemployment.Rd 0e1b6313cb53d6e937acd55cdded818e *man/GvHD.Rd 1496c9e40a8326f4d922857a9beff127 *man/Mclust.Rd ed866eb3da5e973d336818b076d3685c *man/MclustBootstrap.Rd 6e1ae52a737008b85c9ed1c97a65479a *man/MclustDA.Rd a6a45328b7d5eaf35c375008599fe18f *man/MclustDR.Rd 82520ec704397fa02b331969088e91be *man/MclustDRsubsel.Rd b8d666ee6dceb9fbad3be67dea8e5c35 *man/MclustSSC.Rd 38f7f5f909ac5e15e37b866ab979e842 *man/acidity.Rd 6c924f5dc94fb10416016d5d027932e3 *man/adjustedRandIndex.Rd 40bf7f2db2b9677c5b9bf7733da4aeac *man/banknote.Rd 840501df1b562d5ba9df4d31c33672c6 *man/bic.Rd 9458a1636740fbd8443308670f153d68 *man/cdens.Rd 00ce09bbbe3b468350de0f9ee5bcbfae *man/cdensE.Rd 3061f14dc3f95151d31f4183ff8f9aec *man/cdfMclust.Rd b0cfe540c4eb91f99f49c2127562cd49 *man/chevron.Rd 342f855aaedba7fe96aaf6a425a77519 *man/clPairs.Rd 63d108fe436518c400eebb43430a5958 *man/classError.Rd a794c30cb8d2365ce1b5845338e933cf *man/classPriorProbs.Rd fd20be5ef505890b5fb82337022b9f0a *man/clustCombi-internals.Rd f6385cd6c121cb424cc1dc5c172a278b *man/clustCombi.Rd bdb14988bc5193739060f58cca67fd5f *man/clustCombiOptim.Rd fd9027722a416f5955196a5cdc985c53 *man/combMat.Rd e1914a2b193b93b0344c25945495c851 *man/combiPlot.Rd 39620282beb115c8f76e50c01dfd08d9 *man/combiTree.Rd 831d195e9c3513946f54daac01b81db5 *man/coordProj.Rd 0ad10b7d97e45bebe7a10f69a7f919b9 *man/covw.Rd 70ed6b3a3f2365d7a9d1f5d61d99ccb8 *man/crimcoords.Rd c18ca42f0c7ebdce593067d063d278ca *man/cross.Rd d8037dbf01147532dabd4fabbce22626 *man/cvMclustDA.Rd 9f86088d9d21126e7cc1ac28d4e5fa6a *man/decomp2sigma.Rd 4175c1b284dbb6b7c5a5da13f32d037e *man/defaultPrior.Rd 2676ec2d5d2b5b82dccc0cbd3c11bc13 *man/dens.Rd 91ec0e3bb8bfa83dc7368653ebd203d6 *man/densityMclust.Rd 266b5b8c466661154f0904ef3d615431 *man/densityMclust.diagnostic.Rd ec183d2e2f2cee6758cef7445c1f2d33 *man/diabetes.Rd 92ea0d6609afc260770a66d2ad957f04 *man/dmvnorm.Rd d8e808a964b5deb680007ae1ee1b5997 *man/dupPartition.Rd e0d38101ddddf5fd476ec651f2527af5 *man/em.Rd df3e1e471eccd851260c5c668f4a33e4 *man/emControl.Rd 4cc7c95403c93c80dbc47f05c6b70a52 *man/emE.Rd 7ec8253676ce2d0d911a74b74ceee450 *man/entPlot.Rd 6e7e4d7ec91567f07c07d95891720b0b *man/errorBars.Rd cbd0f5744473f1e9939cfa60e4608b12 *man/estep.Rd 262848b35d252a53470aad0d956e48ae *man/estepE.Rd 246f72baa397cd928f97c9cb6c3ff940 *man/figures/logo.png d592f984a4d0741fe7282e03e630acb4 *man/gmmhd.Rd 9937da21c7a75537781f65f9b5670c13 *man/hc.Rd b4d9712d43b640bf6e90642b59d33af3 *man/hcE.Rd ed3b16d80e13872b6cf8ce7e80d0041c *man/hcRandomPairs.Rd dcaccecf8c2189f5a3138173a258f591 *man/hclass.Rd 2ddfa6fedfaf2a8b469024689cad3e32 *man/hdrlevels.Rd f5c538311fa12c3298e7524d9eb62817 *man/hypvol.Rd 918939d5858154bd5aa2a789e8adda3a *man/icl.Rd a91d9fd699250bc9a4e3757ae60b6247 *man/imputeData.Rd 327ef4d86112a911b2648b7a47b2e60e *man/imputePairs.Rd cfb07fb48ca73468e3283687cdd54d97 *man/logLik.Mclust.Rd 525b30e85bd1a3ea08b30745b3cdadd3 *man/logLik.MclustDA.Rd 699915f3a4cf8bfd6718f71f2a754c48 *man/majorityVote.Rd 7d8989594ce3a00a8633781ff84658f0 *man/map.Rd 4e3901ea67e0c125d8e5ac4330df2e38 *man/mapClass.Rd f1edaf4e08180fa4edad91f6023b58c3 *man/mclust-deprecated.Rd c2686ac817e8bbb871363f12f143cc1d *man/mclust-internal.Rd 0bc49dba3f3014ad2758c8ea1ff880d0 *man/mclust-package.Rd dbdb7e3336c3760b23504cb5361bb265 *man/mclust.options.Rd 06f681ededb7169ee1e4dbe256233f4a *man/mclust1Dplot.Rd 0d32e83fa5e8a19883b7189e597e2d08 *man/mclust2Dplot.Rd dd8294d60e0475003a4e94ecbb9f2cdf *man/mclustBIC.Rd 238e7aa5fdbff7f89e2444d3022fddfb *man/mclustBICupdate.Rd 38a76cbae6cbf9afdbe7e516e33c6c56 *man/mclustBootstrapLRT.Rd 9193ecabab69de75262cc9ea251c479c *man/mclustICL.Rd 26b1e6ddb4c2016044aaef2315e66fb8 *man/mclustLoglik.Rd 9d69c87d5861e6de08a57854ed4bf324 *man/mclustModel.Rd caf6238f3fe60beaaf874d6c1c98065b *man/mclustModelNames.Rd 9c49fb43d204333caacaa786cac65af9 *man/mclustVariance.Rd 1400e1fe29cf0dbeb443d6e89a0c3706 *man/me.Rd 2c269df04f607068c021e07414bae71a *man/me.weighted.Rd ac659d4036d5e464339507dd9d76ec3f *man/meE.Rd d46877118126083ae9bff0f5f72208d0 *man/mstep.Rd 8ba56642820275d19834149d7d552327 *man/mstepE.Rd 16c6ef6baa11a8e7f6dc3f515aa9f9ed *man/mvn.Rd fff5c19cf5ffeae6fe2fc54f35c34da3 *man/mvnX.Rd c3796dae5e43a2c26a9ee9b73fcf2276 *man/nMclustParams.Rd 2099a0c748ce32038c52f4aedf1886c5 *man/nVarParams.Rd 36355172607046a0c37f10dee1197ed5 *man/partconv.Rd b0354eb07d5c849fc6c0be463c30b31b *man/partuniq.Rd d84812bf9da9c9e76465ae24ae554882 *man/plot.Mclust.Rd 25b5261ddbced669fefaba45a72e4f04 *man/plot.MclustBoostrap.Rd 01f1cd87231cbb640308280faf53d535 *man/plot.MclustDA.Rd cfb781f6b9fe5bf1d2c8cdf49e1a8c04 *man/plot.MclustDR.Rd 3fd0d79ca9549c14b822845673cad5c1 *man/plot.MclustSSC.Rd 458f2b4c3940e714a015e03e553d364b *man/plot.clustCombi.Rd ba93777b3a9c268702cdad7a85bbc2f4 *man/plot.densityMclust.Rd 339efd96c69d386b0e4a34094084a486 *man/plot.hc.Rd 67acd4e69e1861bebf51d1f649e775a7 *man/plot.mclustBIC.Rd 640264c2366087d920e4518af50281a7 *man/plot.mclustICL.Rd f640c08bd9098247a97a44f30c89a4cf *man/predict.Mclust.Rd 79be9849129ebfbdf9c0a6cdfac3d328 *man/predict.MclustDA.Rd e84b696c5b8eff056814c3cba07bee9f *man/predict.MclustDR.Rd 66b20d05c52e7dc62fcb4971f1b2287b *man/predict.MclustSSC.Rd bdb1679df5c649df2371270753107cca *man/predict.densityMclust.Rd 8384a865dade63acabf451b136217287 *man/priorControl.Rd 1a32c267ea6deb106ae19b6cec847115 *man/randProj.Rd d9477d3e3d801b783e42d10e37271e05 *man/randomOrthogonalMatrix.Rd e43a5fead9ef355e3d522ec94e224f66 *man/sigma2decomp.Rd 1ed03f440760a36b9c66bf6c5259f1cb *man/sim.Rd 77797cb68e57027ea316e1b068b9ed63 *man/simE.Rd 63cfca85d2a01ff42db2c501780a8d8f *man/summary.Mclust.Rd 2d9de49152e45c3362bdaf30a242d56c *man/summary.MclustBootstrap.Rd 4a8c675b46da86ca7075999c22e635f4 *man/summary.MclustDA.Rd 103fff818063adef9f3262398342fb0a *man/summary.MclustDR.Rd fe2f8df16cc881677e427811d2d8a0d9 *man/summary.MclustSSC.Rd 986ecadd060b62810d73fa37ec72dc19 *man/summary.mclustBIC.Rd 35a02e5f739cbc7ced68ce7f232852cf *man/surfacePlot.Rd f4c0b79cc4812c5ebac9d75ea85239d1 *man/thyroid.Rd 16f301b425aebb1ac6b6f0175427dabc *man/uncerPlot.Rd b76a9e2d21683188dc8bce832e2ec9d1 *man/unmap.Rd be15964c22ee1ec25420a1352049519a *man/wdbc.Rd c1b81d23059192faf2b0fdb40b0bc0d2 *man/wreath.Rd 2a6f9e9e044a78154d3cfda5936d6f48 *src/Makevars d784799104d2c2350f9350b268761a2b *src/dmvnorm.f 394a717e565a6647678c758462d11eb2 *src/init.c 5ddcdc7e9d5c82abda7bcb62fb594cb3 *src/mclust.f 015387e2f8c8f04d9e8900f82002a925 *src/mclustaddson.f c7680542d4532804d5be21b3684986c7 *vignettes/mclust.Rmd 71148153fcf7f0388c1ae6f5da2e9f06 *vignettes/vignette.css mclust/NEWS.md0000644000175000017500000004214414157116747013033 0ustar nileshnilesh# mclust 5.4.9 - Added `crimcoords()` to compute discriminant coordinates or crimcoords. - Fixed man page for `cvMclustDA()`. # mclust 5.4.8 - `densityMclust()` by default draw a graph of the density estimate. - Fixed a bug in computing mixture density if the noise component is present. - Changed default behaviour of `hc()` when called to perform agglomerative hierarchical clustering instead of using for EM initialization. - The default `mclust.options("hcModelName")` now returns only the model to be used. - Changed default `partition` argument of `hc()` function by adding `dupPartion()` to remove data duplicates. - Added checks to `mclustBootstrapLRT()` to stop if an invalid `modelName` is provided or a one-component mixture model is provided. - Extended the functionality of `cvMclustDA()` by including as cross-validated metrics both the classification error and the Brier score. - Updated info on dataset man pages. # mclust 5.4.7 - Updated plot method (dendrogram) for hierarchical clustering --- now based on classification likelihood. - Added `MclustSSC()` function (and related `print`, `summary`, `plot`, and `predict`, methods) for semi-supervised classification. - Exchanged order of models VEE and EVE to account for increasing complexity of EVE. - Added `cex` argument to `clPairs()` to control character expansion used in plotting symbols. - `em()` and `me()` have now `data` as first argument. # mclust 5.4.6 - Fixed issues with source Fortran code with gfortran 10 as reported by CRAN. - Clean code of `hcCriterion()`. - Replaced `CEX` argument in functions with standard base graph `cex` argument. - Removed `ylim` argument in function; it can be passed via `...`. - MclustDA models use the default SVD transformation of the data for initialisation of the EM algorithm. - Added `icl` criterion to object returned by `Mclust()`. - Fixed number of pages for the RJ reference. - quantileMclust() uses bisection line search method for numerically computing quantiles. # mclust 5.4.5 - Fixed warnings in Fortran calls raised by CRAN. # mclust 5.4.4 - Added `classPriorProbs()` to estimate prior class probabilities. - Added `BrierScore()` to compute the Brier score for assessing the accuracy of probabilistic predictions. - Added `randomOrthogonalMatrix()` to generate random orthogonal basis matrices. - Partial rewriting of `summary.MclustDA()` internals to provide both the classification error and the Brier score for training and/or test data. - Partial rewriting of `plot.MclustDA()` internals. - Added `dmvnorm()` for computing the density of a general multivariate Gaussian distribution via efficient Fortran code. - Added Wisconsin diagnostic breast cancer (WDBC) data. - Added EuroUnemployment data. - Fixed mismatches in Fortran calls. - Bugs fix. # mclust 5.4.3 - Added website site and update DESCRIPTION with URL. - Fixed a bug when checking for univariate data with a single observation in several instances. Using `NCOL()` works both for n-values vector or nx1 matrix. - Fixed a bug when `hcPairs` are provided in the `initialization` argument of `mclustBIC()` (and relatives) and the number of observations exceed the threshold for subsetting. - Fixed bugs on axes for some manual pairs plots. - Renamed `type = "level"` to `type = "hdr"`, and `level.prob` to `prob`, in `surfacePlot()` for getting HDRs graphs - Fixed a bug in `type = "hdr"` plot on `surfacePlot()`. - Fixed a bug in `as.Mclust()`. - Small changes to `summary.MclustDA()` when `modelType = "EDDA"` and in general for a more compact output. # mclust 5.4.2 - Added `mclustBICupdate()` to merge the best values from two BIC results as returned by `mclustBIC()`. - Added `mclustLoglik()` to compute the maximal log-likelihood values from BIC results as returned by `mclustBIC()`. - Added option `type = "level"` to `plot.densityMclust()` and `surfacePlot()` to draw highest density regions. - Added `meXXI()` and `meXXX()` to exported functions. - Updated vignette. # mclust 5.4.1 - Added parametric bootstrap option (`type = "pb"`) in `MclustBootstrap()`. - Added the options to get averages of resampling distributions in `summary.MclustBootstrap()` and to plot resampling-based confidence intervals in `plot.MclustBootstrap()`. - Added function `catwrap()` for wrapping printed lines at `getOption("width")` when using `cat()`. - `mclust.options()` now modify the variable `.mclust` in the namespace of the package, so it should work even inside an mclust-function call. - Fixed a bug in `covw()` when `normalize = TRUE`. - Fixed a bug in `estepVEV()` and `estepVEE()` when parameters contains `Vinv`. - Fixed a bug in `plotDensityMclustd()` when drawing marginal axes. - Fixed a bug in `summary.MclustDA()` when computing classification error in the extreme case of a minor class of assignment. - Fixed a bug in the initialisation of `mclustBIC()` when a noise component is present for 1-dimensional data. - Fixed bugs in some examples documenting `clustCombi()` and related functions. # mclust 5.4 - Model-based hierarchical clustering used to start the EM-algorithm is now based on the scaled SVD transformation proposed by Scrucca and Raftery (2016). This change is not backward compatible. However, previous results can be easily obtained by issuing the command: `mclust.options(hcUse = "VARS") For more details see help("mclust.options")`. - Added `subset` parameter in `mclust.options()` to control the maximal sample size to be used in the initial model-based hierarchical phase. - `predict.densityMclust()` can optionally returns the density on a logarithm scale. - Removed normalization of mixing proportions for new models in single mstep. - Internal rewrite of code used by `packageStartupMessage()`. - Fixed a small bug in `MclustBootstrap()` in the univariate data case. - Fixed bugs when both the noise and subset are provided for initialization. - Vignette updated to include references, startup message, css style, etc. - Various bug fixes in plotting methods when noise is present. - Updated references in `citation()` and man pages. # mclust 5.3 (2017-05) - Added `gmmhd()` function and relative methods. - Added `MclustDRsubsel()` function and relative methods. - Added option to use subset in the hierarchical initialization step when a noise component is present. - `plot.clustCombi()` presents a menu in interactive sessions, no more need of data for classification plots but extract the data from the `clustCombi` object. - Added `combiTree()` plot for `clustCombi` objects. - `clPairs()` now produces a single scatterplot in the bivariate case. - Fixed a bug in `imputeData()` when seed is provided. Now if a seed is provided the data matrix is reproducible. - in `imputeData()` and `imputePairs()` some name of arguments have been modified to be coherent with the rest of the package. - Added functions `matchCluster()` and `majorityVote()`. - Rewrite of print and summary methods for `clustCombi` class objects. - Added `clustCombiOptim()`. - Fixed a bug in `randomPairs()` when nrow of input data is odd. - Fixed a bug in `plotDensityMclust2()`, `plotDensityMclustd()` and `surfacePlot()` when a noise component is present. # mclust 5.2.3 (2017-03) - Added native routine registration for Fortran code. - Fixed lowercase argument PACKAGE in `.Fortran()` calls. # mclust 5.2.2 (2017-01) - Fixed a bug in rare case when performing an extra M step at the end of EM algorithm. # mclust 5.2.1 (2017-01) - Replaced `structure(NULL, *)` with `structure(list(), *)` # mclust 5.2 (2016-03) - Added argument `x` to `Mclust()` to use BIC values from previous computations to avoid recomputing for the same models. The same argument and functionality was already available in `mclustBIC()`. - Added argument `x` to `mclustICL()` to use ICL values from previous computations to avoid recomputing for the same models. - Fixed a bug on `plot.MclustBootstrap()` for the `"mean"` and `"var"` in the univariate case. - Fixed uncertainty plots. - Added functions `as.Mclust()` and `as.densityMclust()` to convert object to specific mclust classes. - Solved a numerical accuracy problem in `qclass()` when the scale of `x` is (very) large by making the tolerance eps scale dependent. - Use transpose subroutine instead of non-Fortran 77 TRANSPOSE function in `mclustaddson.f`. - Fixed `predict.Mclust()` and `predict.MclustDR()` by implementing a more efficient and accurate algorithm for computing the densities. # mclust 5.1 (2015-10) - Fixed slow convergence for VVE and EVE models. - Fixed a bug in orientation for model VEE. - Added an extra M-step and parameters update in `Mclust()` call via `summaryMclustBIC()`. # mclust 5.0.2 (2015-07) - Added option to `MclustBootstrap()` for using weighted likelihood bootstrap. - Added a plot method for `MclustBootstrap` objects. - Added `errorBars()` function. - Added `clPairsLegend()` function. - Added `covw()` function. - Fixed rescaling of mixing probabilities in new models. - bug fixes. # mclust 5.0.1 (2015-04) - Fixed bugs. - Added print method for `hc` objects. # mclust 5.0.0 (2015-03) - Added the four missing models (EVV, VEE, EVE, VVE) to the mclust family. A noise component is allowed, but no prior is available. - Added `mclustBootstrapLRT()` function (and corresponding print and plot methods) for selecting the number of mixture components based on the sequential bootstrap likelihood ratio test. - Added `MclustBootstrap()` function (and corresponding print and summary methods) for performing bootstrap inference. This provides standard errors for parameters and confidence intervals. - Added `"A quick tour of mclust"` vignette as html generated using rmarkdown and knitr. Older vignettes are included as other documentation for the package. - Modified arguments to `mvn2plot()` to control colour, lty, lwd, and pch of ellipses and mean point. - Added functions `emX()`, `emXII()`, `emXXI()`, `emXXX()`, `cdensX()`, `cdensXII()`, `cdensXXI()`, and `cdensXXX()`, to deal with single-component cases, so calling the em function works even if `G = 1`. - Small changes to `icl()`, now it is a generic method, with specialized methods for `Mclust` and `MclustDA` objects. - Fixed bug for transformations in the initialization step when some variables are constant (i.e. the variance is zero) or a one-dimensional data is provided. - Changed the order of arguments in `hc()` (and all the functions calling it). - Small modification to `CITATION` file upon request of CRAN maintainers. - Various bug fixes. # mclust 4.4 (2014-09) - Added option for using transformation of variables in the hierarchical initialization step. - Added `quantileMclust()` for computing the quantiles from a univariate Gaussian mixture distribution. - Fixed bugs on `summaryMclustBIC()`, `summaryMclustBICn()`, `Mclust()` to return a matrix of 1s on a single column for `z` even in the case of `G = 1`. This is to avoid error on some plots. - Moved pdf files (previously included as vignettes) to `inst/doc` with corresponding `index.html`. # mclust 4.3 (2014-03) - Fixed bug for `logLik.MclustDA()` in the univariate case. - Added argument `"what"` to `predict.densityMclust()` function for choosing what to retrieve, the mixture density or component density. - `hc()` function has an additional parameter to control if the original variables or a transformation of them should be used for hierarchical clustering. - Added `"hcUse"` argument in `mclust.options()` to be passed as default to `hc()`. - Added the storing of original data (and class for classification models) in the object returned by the main functions. - Added component `hypvol` to `Mclust` object which provide the hypervolume of the noise component when required, otherwise is set to NA. - Added a warning when prior is used and BIC returns NAs. - Fixed bugs in `summary.Mclust()`, `print.summary.Mclust()`, `plot.Mclust()` and `icl()` in the case of presence of a noise component. - Fixed bug on some plots in `plot.MclustDR()` which requires `plot.new()` before calling `plot.window()`. - Fixed a bug in `MclustDR()` for the one-dimensional case. - Corrections to `Mclust` man page. - Various small bug fixes. # mclust 4.2 (2013-07) - Fixed bug in `sim*()` functions when no obs are assigned to a component. - `MclustDA()` allows to fit a single class model. - Fixex bug in `summary.Mclust()` when a subset is used for initialization. - Fixed a bug in the function `qclass()` when ties are present in quantiles, so it always return the required number of classes. - Various small bug fixes. # mclust 4.1 (2013-04) - Added `icl()` function for computing the integrated complete-data likelihood. - Added `mclustICL()` function with associated print and plot methods. - `print.mclustBIC()` shows also the top models based on BIC. - Modified `summary.Mclust()` to return also the icl. - Rewrite of `adjustedRandIndex()` function. This version is more efficient for large vectors. - Updated help for `adjustedRandIndex()`. - Modifications to `MclustDR()` and its summary method. - Changed behavior of `plot.MclustDR(..., what = "contour")`. - Improved plot of uncertainty for `plot.MclustDR(..., what = "boundaries")`. - Corrected a bug for malformed GvHD data. - Corrected version of qclass for selecting initial values in case of 1D data when successive quantiles coincide. - Corrected version of plot BIC values when only a single G component models are fitted. - Various bug fixes. # mclust 4.0 (2012-08) - Added new summary and print methods for `Mclust()`. - Added new summary and print methods for `densityMclust()`. - Included `MclustDA()` function and methods. - Included `MclustDR()` function and methods. - Included `me.weighted()` function. - Restored hierarchical clustering capability for the EEE model (hcEEE). - Included vignettes for mclust version 4 from Technical Report No. 597 and for using weights in mclust. - Adoption of GPL (>= 2) license. # mclust 3.5 (2012-07) - Added `summary.Mclust()`. - New functions for plotting and summarizing density estimation. - Various bug fixes. - Added `clustCombi()` and related functions (code and doc provided by Jean-Patrick Baudry). - Bug fix: variable names lost when G = 1. # mclust 3.4.11 (2012-01) - Added `NAMESPACE`. # mclust 3.4.10 (2011-05) - Removed intrinsic gamma- # mclust 3.4.9 (2011-05) - Fixed `hypvol()` function to avoid overflow. - Fixed `hypvol()` help file value description. - Removed unused variables and tabs from source code. - Switched to intrinsic gamma in source code. - Fixed default warning in estepVEV and mstepVEV. # mclust 3.4.8 (2010-12) - Fixed output when G = 1 (it had NA for the missing `z` component). # mclust 3.4.7 (2010-10) - Removed hierarchical clustering capability for the `EEE` model (hcEEE). - The R 2.12.0 build failed due to a 32-bit Windows compiler error, forcing removal of the underlying Fortran code for hcEEE from the package, which does not contain errors and compiles on other platforms. # mclust 3.4.6 (2010-08) - Added description of parameters output component to `Mclust` and `summary.mclustBIC` help files. # mclust 3.4.5 (2010-07) - Added `densityMclust()` function. # mclust 3.4.4 (2010-04) - Fixed bug in covariance matrix output for EEV and VEV models. # mclust 3.4.3 (2010-02) - Bug fixes. # mclust 3.4.2 (2010-02) - Moved CITATION to inst and used standard format - BibTex entries are in inst/cite. - Fixed bug in handling missing classes in `mclustBIC()`. - Clarified license wording. # mclust 3.4.1 (2010-01) - Corrected output description in `mclustModel` help file. - Updated mclust manual reference to show revision. # mclust 3.4 (2009-12) - Updated `defaultPrior` help file. - Added utility functions for imputing missing data with the mix package. - Changed default max to number of mixture components in each class from 9 to 3. # mclust 3.3.2 (2009-10) - Fixed problems with \cr in `mclustOptions` help file # mclust 3.3.1 (2009-06) - Fixed `plot.mclustBIC()` and `plot.Mclust()` to handle `modelNames`. - Changed "orientation" for VEV, VVV models to be consistent with R `eigen()` and the literature - Fixed some problems including doc for the noise option. - Updated the `unmap()` function to optionally include missing groups. # mclust 3.3 (2009-06) - Fixed bug in the `"errors"` option for `randProj()`. - Fixed boundary cases for the `"noise"` option. # mclust 3.2 (2009-04) - Added permission for CRAN distribution to LICENSE. - Fixed problems with help files found by new parser. - Changed PKG_LIBS order in src/Makevars. - Fixed `Mclust()` to handle sampling in data expression in call. # mclust 3.1.10 (2008-11) - Added `EXPR = to` all switch functions that didn't already have it. # mclust 3.1.9 (2008-10) - Added `pro` component to parameters in `dens()` help file. - Fixed some problems with the noise option. # mclust 3.1.1 (2007-03) - Default seed changed in `sim*()` functions. - Added model name check to various functions. - Otherwise backward compatible with version 3.0 # mclust 3.1 (2007-01) - Most plotting functions changed to use color. - `Mclust()` and `mclustBIC()` fixed to work with G=1 - Otherwise backward compatible with version 3.0. # mclust 3.0 (2006-10) - New functionality added, including conjugate priors for Bayesian regularization. - Backward compatibility is not guaranteed since the implementation of some functions has changed to make them easier to use or maintain. mclust/DESCRIPTION0000644000175000017500000000335314157124112013423 0ustar nileshnileshPackage: mclust Version: 5.4.9 Date: 2021-12-17 Title: Gaussian Mixture Modelling for Model-Based Clustering, Classification, and Density Estimation Description: Gaussian finite mixture models fitted via EM algorithm for model-based clustering, classification, and density estimation, including Bayesian regularization, dimension reduction for visualisation, and resampling-based inference. Authors@R: c(person("Chris", "Fraley", role = "aut"), person("Adrian E.", "Raftery", role = "aut", comment = c(ORCID = "0000-0002-6589-301X")), person("Luca", "Scrucca", role = c("aut", "cre"), email = "luca.scrucca@unipg.it", comment = c(ORCID = "0000-0003-3826-0484")), person("Thomas Brendan", "Murphy", role = "ctb", comment = c(ORCID = "0000-0002-5668-7046")), person("Michael", "Fop", role = "ctb", comment = c(ORCID = "0000-0003-3936-2757"))) Depends: R (>= 3.0) Imports: stats, utils, graphics, grDevices Suggests: knitr (>= 1.12), rmarkdown (>= 0.9), mix (>= 1.0), geometry (>= 0.3-6), MASS License: GPL (>= 2) URL: https://mclust-org.github.io/mclust/ VignetteBuilder: knitr Repository: CRAN ByteCompile: true NeedsCompilation: yes LazyData: yes Encoding: UTF-8 Packaged: 2021-12-17 14:26:42 UTC; luca Author: Chris Fraley [aut], Adrian E. Raftery [aut] (), Luca Scrucca [aut, cre] (), Thomas Brendan Murphy [ctb] (), Michael Fop [ctb] () Maintainer: Luca Scrucca Date/Publication: 2021-12-17 15:10:02 UTC mclust/man/0000755000175000017500000000000014157117042012470 5ustar nileshnileshmclust/man/meE.Rd0000644000175000017500000001216113752164615013476 0ustar nileshnilesh\name{meE} \alias{meE} \alias{meV} \alias{meX} \alias{meEII} \alias{meVII} \alias{meEEI} \alias{meVEI} \alias{meEVI} \alias{meVVI} \alias{meEEE} \alias{meVEE} \alias{meEVE} \alias{meVVE} \alias{meEEV} \alias{meVEV} \alias{meEVV} \alias{meVVV} \alias{meXII} \alias{meXXI} \alias{meXXX} \title{EM algorithm starting with M-step for a parameterized Gaussian mixture model} \description{ Implements the EM algorithm for a parameterized Gaussian mixture model, starting with the maximization step. } \usage{ meE(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meV(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meX(data, prior = NULL, warn = NULL, \dots) meEII(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meVII(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meEEI(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meVEI(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meEVI(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meVVI(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meEEE(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meVEE(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meEVE(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meVVE(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meEEV(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meVEV(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meEVV(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meVVV(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meXII(data, prior = NULL, warn = NULL, \dots) meXXI(data, prior = NULL, warn = NULL, \dots) meXXX(data, prior = NULL, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the ith observation belonging to the \emph{k}th component of the mixture. } \item{prior}{ Specification of a conjugate prior on the means and variances. The default assumes no prior. } \item{control}{ A list of control parameters for EM. The defaults are set by the call \code{emControl()}. } \item{Vinv}{ An estimate of the reciprocal hypervolume of the data region, when the model is to include a noise term. Set to a negative value or zero if a noise term is desired, but an estimate is unavailable --- in that case function \code{hypvol} will be used to obtain the estimate. The default is not to assume a noise term in the model through the setting \code{Vinv=NULL}. } \item{warn}{ A logical value indicating whether or not certain warnings (usually related to singularity) should be issued when the estimation fails. The default is given by \code{mclust.options("warn")}. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the \emph{i}th observation belonging to the \emph{k}th component of the mixture. } \item{parameters}{ \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ The estimate of the reciprocal hypervolume of the data region used in the computation when the input indicates the addition of a noise component to the model. } } } \item{loglik}{ The log likelihood for the data in the mixture model. } \item{Attributes:}{ \code{"info"} Information on the iteration.\cr \code{"WARNING"} An appropriate warning if problems are encountered in the computations.\cr } } \seealso{ \code{\link{em}}, \code{\link{me}}, \code{\link{estep}}, \code{\link{mclust.options}} } \examples{ meVVV(data = iris[,-5], z = unmap(iris[,5])) } \keyword{cluster} mclust/man/mclustBootstrapLRT.Rd0000644000175000017500000001111714124774626016562 0ustar nileshnilesh\name{mclustBootstrapLRT} \alias{mclustBootstrapLRT} \alias{print.mclustBootstrapLRT} \alias{plot.mclustBootstrapLRT} \title{Bootstrap Likelihood Ratio Test for the Number of Mixture Components} \description{Perform the likelihood ratio test (LRT) for assessing the number of mixture components in a specific finite mixture model parameterisation. The observed significance is approximated by using the (parametric) bootstrap for the likelihood ratio test statistic (LRTS).} \usage{ mclustBootstrapLRT(data, modelName = NULL, nboot = 999, level = 0.05, maxG = NULL, verbose = interactive(), \dots) \method{print}{mclustBootstrapLRT}(x, \dots) \method{plot}{mclustBootstrapLRT}(x, G = 1, hist.col = "grey", hist.border = "lightgrey", breaks = "Scott", col = "forestgreen", lwd = 2, lty = 3, main = NULL, \dots) } \arguments{ \item{data}{A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables.} \item{modelName}{A character string indicating the mixture model to be fitted. The help file for \code{\link{mclustModelNames}} describes the available models.} \item{nboot}{The number of bootstrap replications to use (by default 999).} \item{level}{The significance level to be used to terminate the sequential bootstrap procedure.} \item{maxG}{The maximum number of mixture components \eqn{G} to test. If not provided the procedure is stopped when a test is not significant at the specified \code{level}.} \item{verbose}{A logical controlling if a text progress bar is displayed during the bootstrap procedure. By default is \code{TRUE} if the session is interactive, and \code{FALSE} otherwise.} \item{\dots}{Further arguments passed to or from other methods. In particular, see the optional arguments in \code{\link{mclustBIC}}.} \item{x}{An \code{'mclustBootstrapLRT'} object.} \item{G}{A value specifying the number of components for which to plot the bootstrap distribution.} \item{hist.col}{The colour to be used to fill the bars of the histogram.} \item{hist.border}{The color of the border around the bars of the histogram.} \item{breaks}{See the argument in function \code{\link[graphics]{hist}}.} \item{col, lwd, lty}{The color, line width and line type to be used to represent the observed LRT statistic.} \item{main}{The title for the graph.} } \details{The implemented algorithm for computing the LRT observed significance using the bootstrap is the following. Let \eqn{G_0} be the number of mixture components under the null hypothesis versus \eqn{G_1 = G_0+1} under the alternative. Bootstrap samples are drawn by simulating data under the null hypothesis. Then, the p-value may be approximated using eq. (13) on McLachlan and Rathnayake (2014). Equivalently, using the notation of Davison and Hinkley (1997) it may be computed as \deqn{\textnormal{p-value} = \frac{1 + \#\{LRT^*_b \ge LRTS_{obs}\}}{B+1}}{% p-value = (1 + #{LRTS*_b \ge LRT_obs}) / (B+1)} where \cr \eqn{B} = number of bootstrap samples \cr \eqn{LRT_{obs}}{LRT_obs} = LRTS computed on the observed data\cr \eqn{LRT^*_b}{LRT*_b} = LRTS computed on the \eqn{b}th bootstrap sample. } \value{An object of class \code{'mclustBootstrapLRT'} with the following components: \item{G}{A vector of number of components tested under the null hypothesis.} \item{modelName}{A character string specifying the mixture model as provided in the function call (see above).} \item{obs}{The observed values of the LRTS.} \item{boot}{A matrix of dimension \code{nboot} x the number of components tested containing the bootstrap values of LRTS.} \item{p.value}{A vector of p-values.} } \references{ Davison, A. and Hinkley, D. (1997) \emph{Bootstrap Methods and Their Applications}. Cambridge University Press. McLachlan G.J. (1987) On bootstrapping the likelihood ratio test statistic for the number of components in a normal mixture. \emph{Applied Statistics}, 36, 318-324. McLachlan, G.J. and Peel, D. (2000) \emph{Finite Mixture Models}. Wiley. McLachlan, G.J. and Rathnayake, S. (2014) On the number of components in a Gaussian mixture model. \emph{Wiley Interdisciplinary Reviews: Data Mining and Knowledge Discovery}, 4(5), pp. 341-355. } \seealso{\code{\link{mclustBIC}}, \code{\link{mclustICL}}, \code{\link{Mclust}}} \examples{ \donttest{ data(faithful) faithful.boot = mclustBootstrapLRT(faithful, model = "VVV") faithful.boot plot(faithful.boot, G = 1) plot(faithful.boot, G = 2) } } \keyword{htest} \keyword{cluster} mclust/man/clustCombi-internals.Rd0000644000175000017500000000034612460535131017061 0ustar nileshnilesh\name{clustCombi-internal} \title{Internal clustCombi functions} \alias{combi} \alias{pcws2_reg} \alias{pcws3_reg} \alias{xlog} \description{ Internal functions not intended to be called directly by users. } \keyword{internal} mclust/man/mclustModelNames.Rd0000644000175000017500000000373713752164012016244 0ustar nileshnilesh\name{mclustModelNames} \alias{mclustModelNames} \title{ MCLUST Model Names } \description{ Description of model names used in the \emph{MCLUST} package. } \usage{ mclustModelNames(model) } \arguments{ \item{model}{A string specifying the model.} } \details{ The following models are available in package \pkg{mclust}:\cr \bold{univariate mixture} \cr \describe{ \item{\code{"E"}}{equal variance (one-dimensional)} \item{\code{"V"}}{variable/unqual variance (one-dimensional)} } \bold{multivariate mixture}\cr \describe{ \item{\code{"EII"}}{spherical, equal volume} \item{\code{"VII"}}{spherical, unequal volume} \item{\code{"EEI"}}{diagonal, equal volume and shape} \item{\code{"VEI"}}{diagonal, varying volume, equal shape} \item{\code{"EVI"}}{diagonal, equal volume, varying shape} \item{\code{"VVI"}}{diagonal, varying volume and shape} \item{\code{"EEE"}}{ellipsoidal, equal volume, shape, and orientation} \item{\code{"VEE"}}{ellipsoidal, equal shape and orientation (*)} \item{\code{"EVE"}}{ellipsoidal, equal volume and orientation (*)} \item{\code{"VVE"}}{ellipsoidal, equal orientation (*)} \item{\code{"EEV"}}{ellipsoidal, equal volume and equal shape} \item{\code{"VEV"}}{ellipsoidal, equal shape} \item{\code{"EVV"}}{ellipsoidal, equal volume (*)} \item{\code{"VVV"}}{ellipsoidal, varying volume, shape, and orientation} } \bold{single component}\cr \describe{ \item{\code{"X"}}{univariate normal} \item{\code{"XII"}}{spherical multivariate normal} \item{\code{"XXI"}}{diagonal multivariate normal} \item{\code{"XXX"}}{ellipsoidal multivariate normal} } (*) new models in \pkg{mclust} version >= 5.0.0. } \value{Returns a list with the following components: \item{model}{a character string indicating the model (as in input).} \item{type}{the description of the indicated model (see Details section).} } \seealso{ \code{\link{Mclust}}, \code{\link{mclustBIC}} } \examples{ mclustModelNames("E") mclustModelNames("EEE") mclustModelNames("VVV") mclustModelNames("XXI") } \keyword{cluster} mclust/man/mclust-internal.Rd0000644000175000017500000000133414156711510016100 0ustar nileshnilesh\name{mclust-internal} \title{Internal MCLUST functions} \alias{pickBIC} \alias{bicFill} \alias{grid1} \alias{grid2} \alias{mvn2plot} \alias{vecnorm} \alias{traceW} \alias{qclass} \alias{unchol} \alias{shapeO} \alias{orth2} \alias{charconv} \alias{[.mclustBIC} \alias{checkModelName} \alias{balancedFolds} \alias{permuteRows} \alias{projpar.MclustDR} \alias{projdir.MclustDR} %\alias{mvdnorm} \alias{ellipse} \alias{eigen.decomp} \alias{getParameters.MclustDA} \alias{as.Mclust} \alias{as.Mclust.default} \alias{as.Mclust.densityMclust} \alias{as.densityMclust} \alias{as.densityMclust.default} \alias{as.densityMclust.Mclust} \description{ Internal functions not intended to be called directly by users. } \keyword{internal} mclust/man/summary.MclustDR.Rd0000644000175000017500000000214713175055251016156 0ustar nileshnilesh\name{summary.MclustDR} \alias{summary.MclustDR} \alias{print.summary.MclustDR} \title{Summarizing dimension reduction method for model-based clustering and classification} \description{Summary method for class \code{"MclustDR"}.} \usage{ \method{summary}{MclustDR}(object, numdir, std = FALSE, \dots) \method{print}{summary.MclustDR}(x, digits = max(5, getOption("digits") - 3), \dots) } \arguments{ \item{object}{An object of class \code{'MclustDR'} resulting from a call to \code{\link{MclustDR}}.} \item{x}{An object of class \code{'summary.MclustDR'}, usually, a result of a call to \code{summary.MclustDR}.} \item{numdir}{An integer providing the number of basis directions to be printed.} \item{std}{if \code{TRUE} the coefficients basis are scaled such that all predictors have unit standard deviation.} \item{digits}{The number of significant digits to use when printing.} \item{\dots}{Further arguments passed to or from other methods.} } %\details{} %\value{} \author{Luca Scrucca} %\note{} \seealso{ \code{\link{MclustDR}}, \code{\link{plot.MclustDR}} } %\examples{} %\keyword{} mclust/man/mclustICL.Rd0000644000175000017500000001044214124774626014632 0ustar nileshnilesh\name{mclustICL} \alias{mclustICL} \alias{print.mclustICL} \alias{summary.mclustICL} \alias{print.summary.mclustICL} \title{ICL Criterion for Model-Based Clustering} \description{ ICL (Integrated Complete-data Likelihood) for parameterized Gaussian mixture models fitted by EM algorithm initialized by model-based hierarchical clustering. } \usage{ mclustICL(data, G = NULL, modelNames = NULL, initialization = list(hcPairs = NULL, subset = NULL, noise = NULL), x = NULL, \dots) \method{summary}{mclustICL}(object, G, modelNames, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{G}{ An integer vector specifying the numbers of mixture components (clusters) for which the criteria should be calculated. The default is \code{G = 1:9}. } \item{modelNames}{ A vector of character strings indicating the models to be fitted in the EM phase of clustering. The help file for \code{\link{mclustModelNames}} describes the available models. The default is: \describe{ \item{\code{c("E", "V")}}{for univariate data} \item{\code{mclust.options("emModelNames")}}{for multivariate data (n > d)} \item{\code{c("EII", "VII", "EEI", "EVI", "VEI", "VVI")}}{the spherical and diagonal models for multivariate data (n <= d)} } } \item{initialization}{ A list containing zero or more of the following components: \describe{ \item{\code{hcPairs}}{ A matrix of merge pairs for hierarchical clustering such as produced by function \code{hc}. For multivariate data, the default is to compute a hierarchical clustering tree by applying function \code{hc} with \code{modelName = "VVV"} to the data or a subset as indicated by the \code{subset} argument. The hierarchical clustering results are to start EM. For univariate data, the default is to use quantiles to start EM. } \item{\code{subset}}{ A logical or numeric vector specifying a subset of the data to be used in the initial hierarchical clustering phase. } } } \item{x}{ An object of class \code{'mclustICL'}. If supplied, \code{mclustICL} will use the settings in \code{x} to produce another object of class \code{'mclustICL'}, but with \code{G} and \code{modelNames} as specified in the arguments. Models that have already been computed in \code{x} are not recomputed. All arguments to \code{mclustICL} except \code{data}, \code{G} and \code{modelName} are ignored and their values are set as specified in the attributes of \code{x}. Defaults for \code{G} and \code{modelNames} are taken from \code{x}. } \item{\dots}{ Futher arguments used in the call to \code{\link{Mclust}}. See also \code{\link{mclustBIC}}. } \item{object}{ An integer vector specifying the numbers of mixture components (clusters) for which the criteria should be calculated. The default is \code{G = 1:9}. } } \value{ Returns an object of class \code{'mclustICL'} containing the the ICL criterion for the specified mixture models and numbers of clusters. The corresponding \code{print} method shows the matrix of values and the top models according to the ICL criterion. The \code{summary} method shows only the top models. } \references{ Biernacki, C., Celeux, G., Govaert, G. (2000). Assessing a mixture model for clustering with the integrated completed likelihood. \emph{IEEE Trans. Pattern Analysis and Machine Intelligence}, 22 (7), 719-725. Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317. } \seealso{ \code{\link{plot.mclustICL}}, \code{\link{Mclust}}, \code{\link{mclustBIC}}, \code{\link{mclustBootstrapLRT}}, \code{\link{bic}}, \code{\link{icl}} } \examples{ data(faithful) faithful.ICL <- mclustICL(faithful) faithful.ICL summary(faithful.ICL) plot(faithful.ICL) \donttest{ # compare with faithful.BIC <- mclustBIC(faithful) faithful.BIC plot(faithful.BIC) } } \keyword{cluster} mclust/man/mstepE.Rd0000644000175000017500000001171514124774626014234 0ustar nileshnilesh\name{mstepE} \alias{mstepE} \alias{mstepV} \alias{mstepEII} \alias{mstepVII} \alias{mstepEEI} \alias{mstepVEI} \alias{mstepEVI} \alias{mstepVVI} \alias{mstepEEE} \alias{mstepEEV} \alias{mstepVEV} \alias{mstepVVV} \alias{mstepEVE} \alias{mstepEVV} \alias{mstepVEE} \alias{mstepVVE} \title{M-step for a parameterized Gaussian mixture model} \description{ Maximization step in the EM algorithm for a parameterized Gaussian mixture model. } \usage{ mstepE( data, z, prior = NULL, warn = NULL, \dots) mstepV( data, z, prior = NULL, warn = NULL, \dots) mstepEII( data, z, prior = NULL, warn = NULL, \dots) mstepVII( data, z, prior = NULL, warn = NULL, \dots) mstepEEI( data, z, prior = NULL, warn = NULL, \dots) mstepVEI( data, z, prior = NULL, warn = NULL, control = NULL, \dots) mstepEVI( data, z, prior = NULL, warn = NULL, \dots) mstepVVI( data, z, prior = NULL, warn = NULL, \dots) mstepEEE( data, z, prior = NULL, warn = NULL, \dots) mstepEEV( data, z, prior = NULL, warn = NULL, \dots) mstepVEV( data, z, prior = NULL, warn = NULL, control = NULL,\dots) mstepVVV( data, z, prior = NULL, warn = NULL, \dots) mstepEVE( data, z, prior = NULL, warn = NULL, control = NULL, \dots) mstepEVV( data, z, prior = NULL, warn = NULL, \dots) mstepVEE( data, z, prior = NULL, warn = NULL, control = NULL, \dots) mstepVVE( data, z, prior = NULL, warn = NULL, control = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the ith observation belonging to the \emph{k}th component of the mixture. In analyses involving noise, this should not include the conditional probabilities for the noise component. } \item{prior}{ Specification of a conjugate prior on the means and variances. The default assumes no prior. } \item{warn}{ A logical value indicating whether or not certain warnings (usually related to singularity) should be issued when the estimation fails. The default is given by \code{mclust.options("warn")}. } \item{control}{ Values controlling termination for models \code{"VEI"} and \code{"VEV"} that have an iterative M-step. This should be a list with components named \emph{itmax} and \emph{tol}. These components can be of length 1 or 2; in the latter case, \code{mstep} will use the second value, under the assumption that the first applies to an outer iteration (as in the function \code{me}). The default uses the default values from the function \code{emControl}, which sets no limit on the number of iterations, and a relative tolerance of \code{sqrt(.Machine$double.eps)} on successive iterates. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{parameters}{ \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{Attributes:}{ \code{"info"} For those models with iterative M-steps (\code{"VEI"} and \code{"VEV"}), information on the iteration.\cr \code{"WARNING"} An appropriate warning if problems are encountered in the computations. } } \note{ This function computes the M-step only for MVN mixtures, so in analyses involving noise, the conditional probabilities input should exclude those for the noise component. \cr In contrast to \code{me} for the EM algorithm, computations in \code{mstep} are carried out unless failure due to overflow would occur. To impose stricter tolerances on a single \code{mstep}, use \code{me} with the \emph{itmax} component of the \code{control} argument set to 1. } \seealso{ \code{\link{mstep}}, \code{\link{me}}, \code{\link{estep}}, \code{\link{mclustVariance}}, \code{\link{priorControl}}, \code{\link{emControl}}. } \examples{ \donttest{ mstepVII(data = iris[,-5], z = unmap(iris[,5]))} } \keyword{cluster} mclust/man/cdens.Rd0000644000175000017500000000600513766613336014070 0ustar nileshnilesh\name{cdens} \alias{cdens} \title{ Component Density for Parameterized MVN Mixture Models } \description{ Computes component densities for observations in MVN mixture models parameterized by eigenvalue decomposition. } \usage{ cdens(data, modelName, parameters, logarithm = FALSE, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{parameters}{ The parameters of the model: \describe{ \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{logarithm}{ A logical value indicating whether or not the logarithm of the component densities should be returned. The default is to return the component densities, obtained from the log component densities by exponentiation. } \item{warn}{ A logical value indicating whether or not a warning should be issued when computations fail. The default is \code{warn=FALSE}. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A numeric matrix whose \code{[i,k]}th entry is the density or log density of observation \emph{i} in component \emph{k}. The densities are not scaled by mixing proportions. } \note{ When one or more component densities are very large in magnitude, it may be possible to compute the logarithm of the component densities but not the component densities themselves due to overflow. } \seealso{ \code{\link{cdensE}}, \dots, \code{\link{cdensVVV}}, \code{\link{dens}}, \code{\link{estep}}, \code{\link{mclustModelNames}}, \code{\link{mclustVariance}}, \code{\link{mclust.options}}, \code{\link{do.call}} } \examples{ z2 <- unmap(hclass(hcVVV(faithful),2)) # initial value for 2 class case model <- me(modelName = "EEE", data = faithful, z = z2) cdens(modelName = "EEE", data = faithful, logarithm = TRUE, parameters = model$parameters)[1:5,] data(cross) odd <- seq(1, nrow(cross), by = 2) oddBIC <- mclustBIC(cross[odd,-1]) oddModel <- mclustModel(cross[odd,-1], oddBIC) ## best parameter estimates names(oddModel) even <- odd + 1 densities <- cdens(modelName = oddModel$modelName, data = cross[even,-1], parameters = oddModel$parameters) cbind(class = cross[even,1], densities)[1:5,] } \keyword{cluster} mclust/man/Baudry_etal_2010_JCGS_examples.Rd0000644000175000017500000000371214124774626020376 0ustar nileshnilesh\name{Baudry_etal_2010_JCGS_examples} \alias{Baudry_etal_2010_JCGS_examples} \alias{ex4.1} \alias{ex4.2} \alias{ex4.3} \alias{ex4.4.1} \alias{ex4.4.2} \alias{Test1D} \docType{data} \title{Simulated Example Datasets From Baudry et al. (2010)} \description{ Simulated datasets used in Baudry et al. (2010) to illustrate the proposed mixture components combining method for clustering. Please see the cited article for a detailed presentation of these datasets. The data frame with name exN.M is presented in Section N.M in the paper. Test1D (not in the article) has been simulated from a Gaussian mixture distribution in R. ex4.1 and ex4.2 have been simulated from a Gaussian mixture distribution in R^2. ex4.3 has been simulated from a mixture of a uniform distribution on a square and a spherical Gaussian distribution in R^2. ex4.4.1 has been simulated from a Gaussian mixture model in R^2 ex4.4.2 has been simulated from a mixture of two uniform distributions in R^3. } \usage{data(Baudry_etal_2010_JCGS_examples)} \format{ \code{ex4.1} is a data frame with 600 observations on 2 real variables. \code{ex4.2} is a data frame with 600 observations on 2 real variables. \code{ex4.3} is a data frame with 200 observations on 2 real variables. \code{ex4.4.1} is a data frame with 800 observations on 2 real variables. \code{ex4.4.2} is a data frame with 300 observations on 3 real variables. \code{Test1D} is a data frame with 200 observations on 1 real variable. } \references{ J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics, 19(2):332-353.} } \examples{ \donttest{ data(Baudry_etal_2010_JCGS_examples) output <- clustCombi(data = ex4.4.1) output # is of class clustCombi # plots the hierarchy of combined solutions, then some "entropy plots" which # may help one to select the number of classes plot(output) } } \keyword{datasets} mclust/man/emControl.Rd0000644000175000017500000000442213752164214014726 0ustar nileshnilesh\name{emControl} \alias{emControl} \title{Set control values for use with the EM algorithm} \description{ Supplies a list of values including tolerances for singularity and convergence assessment, for use functions involving EM within \emph{MCLUST}. } \usage{ emControl(eps, tol, itmax, equalPro) } \arguments{ \item{eps}{ A scalar tolerance associated with deciding when to terminate computations due to computational singularity in covariances. Smaller values of \code{eps} allow computations to proceed nearer to singularity. The default is the relative machine precision \code{.Machine$double.eps}, which is approximately \eqn{2e-16} on IEEE-compliant machines. } \item{tol}{ A vector of length two giving relative convergence tolerances for the log-likelihood and for parameter convergence in the inner loop for models with iterative M-step ("VEI", "VEE", "EVE", "VVE", "VEV"), respectively. The default is \code{c(1.e-5, sqrt(.Machine$double.eps))}. If only one number is supplied, it is used as the tolerance for the outer iterations and the tolerance for the inner iterations is as in the default. } \item{itmax}{ A vector of length two giving integer limits on the number of EM iterations and on the number of iterations in the inner loop for models with iterative M-step ("VEI", "VEE", "EVE", "VVE", "VEV"), respectively. The default is \code{c(.Machine$integer.max, .Machine$integer.max)} allowing termination to be completely governed by \code{tol}. If only one number is supplied, it is used as the iteration limit for the outer iteration only. } \item{equalPro}{ Logical variable indicating whether or not the mixing proportions are equal in the model. Default: \code{equalPro = FALSE}. } } \value{ A named list in which the names are the names of the arguments and the values are the values supplied to the arguments. } \details{ \code{emControl} is provided for assigning values and defaults for EM within \emph{MCLUST}. } \seealso{ \code{\link{em}}, \code{\link{estep}}, \code{\link{me}}, \code{\link{mstep}}, \code{\link{mclustBIC}} } \examples{ irisBIC <- mclustBIC(iris[,-5], control = emControl(tol = 1.e-6)) summary(irisBIC, iris[,-5]) } \keyword{cluster} mclust/man/me.Rd0000644000175000017500000001074114124774626013376 0ustar nileshnilesh\name{me} \alias{me} \title{EM algorithm starting with M-step for parameterized MVN mixture models} \description{ Implements the EM algorithm for MVN mixture models parameterized by eignevalue decomposition, starting with the maximization step. } \usage{ me(data, modelName, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{z}{ A matrix whose \code{[i,k]}th entry is an initial estimate of the conditional probability of the ith observation belonging to the \emph{k}th component of the mixture. } \item{prior}{ Specification of a conjugate prior on the means and variances. See the help file for \code{priorControl} for further information. The default assumes no prior. } \item{control}{ A list of control parameters for EM. The defaults are set by the call \code{emControl()}. } \item{Vinv}{ If the model is to include a noise term, \code{Vinv} is an estimate of the reciprocal hypervolume of the data region. If set to a negative value or 0, the model will include a noise term with the reciprocal hypervolume estimated by the function \code{hypvol}. The default is not to assume a noise term in the model through the setting \code{Vinv=NULL}. } \item{warn}{ A logical value indicating whether or not certain warnings (usually related to singularity) should be issued when the estimation fails. The default is set in \code{mclust.options("warn")}. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{n}{ The number of observations in the data. } \item{d}{ The dimension of the data. } \item{G}{ The number of mixture components. } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the \emph{i}th observation belonging to the \emph{k}th component of the mixture. } \item{parameters}{ \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ The estimate of the reciprocal hypervolume of the data region used in the computation when the input indicates the addition of a noise component to the model. } } } \item{loglik}{ The log likelihood for the data in the mixture model. } \item{control}{ The list of control parameters for EM used. } \item{prior}{ The specification of a conjugate prior on the means and variances used, \code{NULL} if no prior is used. } \item{Attributes:}{ \code{"info"} Information on the iteration.\cr \code{"WARNING"} An appropriate warning if problems are encountered in the computations. } } \seealso{ \code{\link{meE}}, \dots, \code{\link{meVVV}}, \code{\link{em}}, \code{\link{mstep}}, \code{\link{estep}}, \code{\link{priorControl}}, \code{\link{mclustModelNames}}, \code{\link{mclustVariance}}, \code{\link{mclust.options}} } \examples{ \donttest{ me(modelName = "VVV", data = iris[,-5], z = unmap(iris[,5]))} } \keyword{cluster} mclust/man/mclust2Dplot.Rd0000644000175000017500000001316614124774626015375 0ustar nileshnilesh\name{mclust2Dplot} \alias{mclust2Dplot} \title{Plot two-dimensional data modelled by an MVN mixture} \description{ Plot two-dimensional data given parameters of an MVN mixture model for the data. } \usage{ mclust2Dplot(data, parameters = NULL, z = NULL, classification = NULL, truth = NULL, uncertainty = NULL, what = c("classification", "uncertainty", "error"), addEllipses = TRUE, fillEllipses = mclust.options("fillEllipses"), symbols = NULL, colors = NULL, xlim = NULL, ylim = NULL, xlab = NULL, ylab = NULL, scale = FALSE, cex = 1, PCH = ".", main = FALSE, swapAxes = FALSE, \dots) } \arguments{ \item{data}{ A numeric matrix or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. In this case the data are two dimensional, so there are two columns. } \item{parameters}{ A named list giving the parameters of an \emph{MCLUST} model, used to produce superimposing ellipses on the plot. The relevant components are as follows: \describe{ \item{\code{pro}}{ Mixing proportions for the components of the mixture. There should one more mixing proportion than the number of Gaussian components if the mixture model includes a Poisson noise term. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{z}{ A matrix in which the \code{[i,k]}th entry gives the probability of observation \emph{i} belonging to the \emph{k}th class. Used to compute \code{classification} and \code{uncertainty} if those arguments aren't available. } \item{classification}{ A numeric or character vector representing a classification of observations (rows) of \code{data}. If present argument \code{z} will be ignored. } \item{truth}{ A numeric or character vector giving a known classification of each data point. If \code{classification} or \code{z} is also present, this is used for displaying classification errors. } \item{uncertainty}{ A numeric vector of values in \emph{(0,1)} giving the uncertainty of each data point. If present argument \code{z} will be ignored. } \item{what}{ Choose from one of the following three options: \code{"classification"} (default), \code{"error"}, \code{"uncertainty"}. } \item{addEllipses}{ A logical indicating whether or not to add ellipses with axes corresponding to the within-cluster covariances. } \item{fillEllipses}{ A logical specifying whether or not to fill ellipses with transparent colors when \code{addEllipses = TRUE}. } \item{symbols}{ Either an integer or character vector assigning a plotting symbol to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given by \code{mclust.options("classPlotSymbols")}. } \item{colors}{ Either an integer or character vector assigning a color to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given is \code{mclust.options("classPlotColors")}. } \item{xlim, ylim}{ Optional argument specifying bounds for the ordinate, abscissa of the plot. This may be useful for when comparing plots. } \item{xlab, ylab}{ Optional argument specifying labels for the x-axis and y-axis. } \item{scale}{ A logical variable indicating whether or not the two chosen dimensions should be plotted on the same scale, and thus preserve the shape of the distribution. Default: \code{scale=FALSE} } \item{cex}{ An argument specifying the size of the plotting symbols. The default value is 1. } \item{PCH}{ An argument specifying the symbol to be used when a classificatiion has not been specified for the data. The default value is a small dot ".". } \item{main}{ A logical variable or \code{NULL} indicating whether or not to add a title to the plot identifying the dimensions used. } \item{swapAxes}{ A logical variable indicating whether or not the axes should be swapped for the plot. } \item{\dots}{ Other graphics parameters. } } \value{ A plot showing the data, together with the location of the mixture components, classification, uncertainty, and/or classification errors. } \seealso{ \code{\link{surfacePlot}}, \code{\link{clPairs}}, \code{\link{coordProj}}, \code{\link{mclust.options}} } \examples{ \donttest{ faithfulModel <- Mclust(faithful) mclust2Dplot(faithful, parameters=faithfulModel$parameters, z=faithfulModel$z, what = "classification", main = TRUE) mclust2Dplot(faithful, parameters=faithfulModel$parameters, z=faithfulModel$z, what = "uncertainty", main = TRUE) } } \keyword{cluster} mclust/man/MclustDRsubsel.Rd0000644000175000017500000001225514124774626015712 0ustar nileshnilesh\name{MclustDRsubsel} \alias{MclustDRsubsel} \alias{print.MclustDRsubsel} \alias{MclustDRsubsel_classif} \alias{MclustDRsubsel_cluster} \alias{MclustDRrecoverdir} \alias{MclustDRsubsel1cycle} \alias{print.MclustDRsubsel} \alias{summary.MclustDRsubsel} \title{Subset selection for GMMDR directions based on BIC} \description{ Implements a subset selection method for selecting the relevant directions spanning the dimension reduction subspace for visualizing the clustering or classification structure obtained from a finite mixture of Gaussian densities.} \usage{ MclustDRsubsel(object, G = 1:9, modelNames = mclust.options("emModelNames"), \dots, bic.stop = 0, bic.cutoff = 0, mindir = 1, verbose = interactive()) } \arguments{ \item{object}{An object of class \code{'MclustDR'} resulting from a call to \code{\link{MclustDR}}.} \item{G}{An integer vector specifying the numbers of mixture components or clusters.} \item{modelNames}{A vector of character strings indicating the models to be fitted. See \code{\link{mclustModelNames}} for a description of the available models.} \item{\dots}{Further arguments passed through \code{\link{Mclust}} or \code{\link{MclustDA}}.} \item{bic.stop}{A criterion to terminate the search. If maximal BIC difference is less than \code{bic.stop} then the algorithm stops. \cr Two tipical values are: \describe{ \item{}{\code{0}: algorithm stops when the BIC difference becomes negative (default)} \item{}{\code{-Inf}: algorithm continues until all directions have been selected} }} \item{bic.cutoff}{A value specifying how to select simplest ``best'' model within \code{bic.cutoff} from the maximum value achieved. Setting this to \code{0} (default) simply select the model with the largest BIC difference.} \item{mindir}{An integer value specifying the minimum number of directions to be estimated.} \item{verbose}{A logical or integer value specifying if and how much detailed information should be reported during the iterations of the algorithm. \cr Possible values are: \describe{ \item{}{\code{0} or \code{FALSE}: no trace info is shown;} \item{}{\code{1} or \code{TRUE}: a trace info is shown at each step of the search;} \item{}{\code{2}: a more detailed trace info is is shown.} } } } \details{ The GMMDR method aims at reducing the dimensionality by identifying a set of linear combinations, ordered by importance as quantified by the associated eigenvalues, of the original features which capture most of the clustering or classification structure contained in the data. This is implemented in \code{\link{MclustDR}}. The \code{MclustDRsubsel} function implements the greedy forward search algorithm discussed in Scrucca (2010) to prune the set of all GMMDR directions. The criterion used to select the relevant directions is based on the BIC difference between a clustering model and a model in which the feature proposal has no clustering relevance. The steps are the following: 1. Select the first feature to be the one which maximizes the BIC difference between the best clustering model and the model which assumes no clustering, i.e. a single component. 2. Select the next feature amongst those not previously included, to be the one which maximizes the BIC difference. 3. Iterate the previous step until all the BIC differences for the inclusion of a feature become less than \code{bic.stop}. At each step, the search over the model space is performed with respect to the model parametrisation and the number of clusters. } \value{ An object of class \code{'MclustDRsubsel'} which inherits from \code{'MclustDR'}, so it has the same components of the latter plus the following: \item{basisx}{The basis of the estimated dimension reduction subspace expressed in terms of the original variables.} \item{std.basisx}{The basis of the estimated dimension reduction subspace expressed in terms of the original variables standardized to have unit standard deviation.} } \references{ Scrucca, L. (2010) Dimension reduction for model-based clustering. \emph{Statistics and Computing}, 20(4), pp. 471-484. Scrucca, L. (2014) Graphical Tools for Model-based Mixture Discriminant Analysis. \emph{Advances in Data Analysis and Classification}, 8(2), pp. 147-165 } \author{Luca Scrucca} %\note{} \seealso{ \code{\link{MclustDR}}, \code{\link{Mclust}}, \code{\link{MclustDA}}. } \examples{ \donttest{ # clustering data(crabs, package = "MASS") x <- crabs[,4:8] class <- paste(crabs$sp, crabs$sex, sep = "|") mod <- Mclust(x) table(class, mod$classification) dr <- MclustDR(mod) summary(dr) plot(dr) drs <- MclustDRsubsel(dr) summary(drs) table(class, drs$classification) plot(drs, what = "scatterplot") plot(drs, what = "pairs") plot(drs, what = "contour") plot(drs, what = "boundaries") plot(drs, what = "evalues") # classification data(banknote) da <- MclustDA(banknote[,2:7], banknote$Status) table(banknote$Status, predict(da)$class) dr <- MclustDR(da) summary(dr) drs <- MclustDRsubsel(dr) summary(drs) table(banknote$Status, predict(drs)$class) plot(drs, what = "scatterplot") plot(drs, what = "classification") plot(drs, what = "boundaries")} } \keyword{multivariate} mclust/man/errorBars.Rd0000644000175000017500000000317212542725574014736 0ustar nileshnilesh\name{errorBars} \alias{errorBars} \title{Draw error bars on a plot} \description{ Draw error bars at x from upper to lower. If \code{horizontal = FALSE} (default) bars are drawn vertically, otherwise horizontally. } \usage{ errorBars(x, upper, lower, width = 0.1, code = 3, angle = 90, horizontal = FALSE, \dots) } \arguments{ \item{x}{A vector of values where the bars must be drawn.} \item{upper}{A vector of upper values where the bars must end.} \item{lower}{A vector of lower values where the bars must start.} \item{width}{A value specifying the width of the end-point segment.} \item{code}{An integer code specifying the kind of arrows to be drawn. For details see \code{\link[graphics]{arrows}}.} \item{angle}{A value specifying the angle at the arrow edge. For details see \code{\link[graphics]{arrows}}.} \item{horizontal}{A logical specifying if bars should be drawn vertically (default) or horizontally.} \item{\dots}{Further arguments are passed to \code{\link[graphics]{arrows}}.} } %\value{} \examples{ par(mfrow=c(2,2)) # Create a simple example dataset x <- 1:5 n <- c(10, 15, 12, 6, 3) se <- c(1, 1.2, 2, 1, .5) # upper and lower bars b <- barplot(n, ylim = c(0, max(n)*1.5)) errorBars(b, lower = n-se, upper = n+se, lwd = 2, col = "red3") # one side bars b <- barplot(n, ylim = c(0, max(n)*1.5)) errorBars(b, lower = n, upper = n+se, lwd = 2, col = "red3", code = 1) # plot(x, n, ylim = c(0, max(n)*1.5), pch = 0) errorBars(x, lower = n-se, upper = n+se, lwd = 2, col = "red3") # dotchart(n, labels = x, pch = 19, xlim = c(0, max(n)*1.5)) errorBars(x, lower = n-se, upper = n+se, col = "red3", horizontal = TRUE) } mclust/man/hc.Rd0000644000175000017500000001250514124774626013367 0ustar nileshnilesh\name{hc} \alias{hc} \alias{print.hc} \title{Model-based Agglomerative Hierarchical Clustering} \description{ Agglomerative hierarchical clustering based on maximum likelihood criteria for Gaussian mixture models parameterized by eigenvalue decomposition. } \usage{ hc(data, modelName = "VVV", use = "VARS", partition = dupPartition(data), minclus = 1, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations (\eqn{n}) and columns correspond to variables (\eqn{d}). } \item{modelName}{ A character string indicating the model to be used in model-based agglomerative hierarchical clustering.\cr Possible models are: \describe{ \item{\code{"E"}}{equal variance (one-dimensional);} \item{\code{"V"}}{spherical, variable variance (one-dimensional);} \item{\code{"EII"}}{spherical, equal volume;} \item{\code{"VII"}}{spherical, unequal volume;} \item{\code{"EEE"}}{ellipsoidal, equal volume, shape, and orientation;} \item{\code{"VVV"}}{ellipsoidal, varying volume, shape, and orientation (default).} } If \code{hc()} is used for initialization of EM algorithm then the default is taken from \code{mclust.options("hcModelName")}. See \code{\link{mclust.options}}. } \item{use}{ A character string specifying the type of input variables/data transformation to be used for model-based agglomerative hierarchical clustering.\cr Possible values are: \describe{ \item{\code{"VARS"}}{original variables (default);} \item{\code{"STD"}}{standardized variables (centered and scaled);} \item{\code{"SPH"}}{sphered variables (centered, scaled and uncorrelated) computed using SVD;} \item{\code{"PCS"}}{principal components computed using SVD on centered variables (i.e. using the covariance matrix);} \item{\code{"PCR"}}{principal components computed using SVD on standardized (center and scaled) variables (i.e. using the correlation matrix);} \item{\code{"SVD"}}{scaled SVD transformation.} } If \code{hc()} is used for initialization of EM algorithm then the default is taken from \code{mclust.options("hcUse")}. See \code{\link{mclust.options}}.\cr For further details see Scrucca and Raftery (2015). } \item{partition}{ A numeric or character vector representing a partition of observations (rows) of \code{data}. If provided, group merges will start with this partition. Otherwise, each observation is assumed to be in a cluster by itself at the start of agglomeration. Starting with version 5.4.8, by default the function \code{\link{dupPartition}} is used to start with all duplicated observations in the same group, thereby keeping duplicates in the same group throughout the modelling process. } \item{minclus}{ A number indicating the number of clusters at which to stop the agglomeration. The default is to stop when all observations have been merged into a single cluster. } \item{\dots}{ Arguments for the method-specific \code{hc} functions. See for example \code{\link{hcE}}. } } \value{ The function \code{hc()} returns a numeric two-column matrix in which the \emph{i}th row gives the minimum index for observations in each of the two clusters merged at the \emph{i}th stage of agglomerative hierarchical clustering. Several other informations are also returned as attributes. The plotting method \code{plot.hc()} draws a dendrogram, which can be based on either the classification loglikelihood or the merge level (number of clusters). For details, see the associated help file. } \details{ Most models have memory usage of the order of the square of the number groups in the initial partition for fast execution. Some models, such as equal variance or \code{"EEE"}, do not admit a fast algorithm under the usual agglomerative hierarchical clustering paradigm. These use less memory but are much slower to execute. } \note{ If \code{modelName = "E"} (univariate with equal variances) or \code{modelName = "EII"} (multivariate with equal spherical covariances), then underlying model is the same as that for Ward's method for hierarchical clustering. } \references{ Banfield J. D. and Raftery A. E. (1993). Model-based Gaussian and non-Gaussian Clustering. \emph{Biometrics}, 49:803-821. Fraley C. (1998). Algorithms for model-based Gaussian hierarchical clustering. \emph{SIAM Journal on Scientific Computing}, 20:270-281. Fraley C. and Raftery A. E. (2002). Model-based clustering, discriminant analysis, and density estimation. \emph{Journal of the American Statistical Association}, 97:611-631. Scrucca L. and Raftery A. E. (2015). Improved initialisation of model-based clustering using Gaussian hierarchical partitions. \emph{Advances in Data Analysis and Classification}, 9/4:447-460. } \seealso{ \code{\link{hcE}}, \dots, \code{\link{hcVVV}}, \code{\link{plot.hc}}, \code{\link{hclass}}, \code{\link{mclust.options}} } \examples{ hcTree <- hc(modelName = "VVV", data = iris[,-5]) hcTree cl <- hclass(hcTree,c(2,3)) table(cl[,"2"]) table(cl[,"3"]) \donttest{ clPairs(iris[,-5], classification = cl[,"2"]) clPairs(iris[,-5], classification = cl[,"3"]) } } \keyword{cluster} mclust/man/summary.Mclust.Rd0000644000175000017500000000241613523050564015726 0ustar nileshnilesh\name{summary.Mclust} \alias{summary.Mclust} \alias{print.summary.Mclust} \title{Summarizing Gaussian Finite Mixture Model Fits} \description{Summary method for class \code{"Mclust"}.} \usage{ \method{summary}{Mclust}(object, classification = TRUE, parameters = FALSE, \dots) \method{print}{summary.Mclust}(x, digits = getOption("digits"), \dots) } \arguments{ \item{object}{An object of class \code{'Mclust'} resulting of a call to \code{\link{Mclust}} or \code{\link{densityMclust}}.} \item{x}{An object of class \code{'summary.Mclust'}, usually, a result of a call to \code{summary.Mclust}.} \item{classification}{Logical; if \code{TRUE} a table of MAP classification/clustering of observations is printed.} \item{parameters}{Logical; if \code{TRUE}, the parameters of mixture components are printed.} \item{digits}{The number of significant digits to use when printing.} \item{\dots}{Further arguments passed to or from other methods.} } % \details{} % \value{} \author{Luca Scrucca} % \note{} \seealso{\code{\link{Mclust}}, \code{\link{densityMclust}}.} \examples{ mod1 = Mclust(iris[,1:4]) summary(mod1) summary(mod1, parameters = TRUE, classification = FALSE) mod2 = densityMclust(faithful) summary(mod2) summary(mod2, parameters = TRUE) } \keyword{cluster} mclust/man/hcE.Rd0000644000175000017500000000604314124774626013474 0ustar nileshnilesh\name{hcE} \alias{hcE} \alias{hcV} \alias{hcEII} \alias{hcVII} \alias{hcEEE} \alias{hcVVV} \title{Model-based Hierarchical Clustering} \description{ Agglomerative hierarchical clustering based on maximum likelihood for a Gaussian mixture model parameterized by eigenvalue decomposition. } \usage{ hcE(data, partition = NULL, minclus=1, \dots) hcV(data, partition = NULL, minclus = 1, alpha = 1, \dots) hcEII(data, partition = NULL, minclus = 1, \dots) hcVII(data, partition = NULL, minclus = 1, alpha = 1, \dots) hcEEE(data, partition = NULL, minclus = 1, \dots) hcVVV(data, partition = NULL, minclus = 1, alpha = 1, beta = 1, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{partition}{ A numeric or character vector representing a partition of observations (rows) of \code{data}. If provided, group merges will start with this partition. Otherwise, each observation is assumed to be in a cluster by itself at the start of agglomeration. } \item{minclus}{ A number indicating the number of clusters at which to stop the agglomeration. The default is to stop when all observations have been merged into a single cluster. } \item{alpha, beta}{ Additional tuning parameters needed for initializatiion in some models. For details, see Fraley 1998. The defaults provided are usually adequate. } \item{\dots}{ Catch unused arguments from a \code{do.call} call. } } \value{ A numeric two-column matrix in which the \emph{i}th row gives the minimum index for observations in each of the two clusters merged at the \emph{i}th stage of agglomerative hierarchical clustering. } \details{ Most models have memory usage of the order of the square of the number groups in the initial partition for fast execution. Some models, such as equal variance or \code{"EEE"}, do not admit a fast algorithm under the usual agglomerative hierachical clustering paradigm. These use less memory but are much slower to execute. } \references{ J. D. Banfield and A. E. Raftery (1993). Model-based Gaussian and non-Gaussian Clustering. \emph{Biometrics 49:803-821}. C. Fraley (1998). Algorithms for model-based Gaussian hierarchical clustering. \emph{SIAM Journal on Scientific Computing 20:270-281}. C. Fraley and A. E. Raftery (2002). Model-based clustering, discriminant analysis, and density estimation. \emph{Journal of the American Statistical Association 97:611-631}. } \seealso{ \code{\link{hc}}, \code{\link{hclass}} \code{\link{hcRandomPairs}} } \examples{ hcTree <- hcEII(data = iris[,-5]) cl <- hclass(hcTree,c(2,3)) \donttest{ par(pty = "s", mfrow = c(1,1)) clPairs(iris[,-5],cl=cl[,"2"]) clPairs(iris[,-5],cl=cl[,"3"]) par(mfrow = c(1,2)) dimens <- c(1,2) coordProj(iris[,-5], classification=cl[,"2"], dimens=dimens) coordProj(iris[,-5], classification=cl[,"3"], dimens=dimens) } } \keyword{cluster} mclust/man/randomOrthogonalMatrix.Rd0000644000175000017500000000260013751305610017456 0ustar nileshnilesh\name{randomOrthogonalMatrix} \alias{randomOrthogonalMatrix} \title{Random orthogonal matrix} \description{ Generate a random orthogonal basis matrix of dimension \eqn{(nrow x ncol)} using the method in Heiberger (1978). } \usage{ randomOrthogonalMatrix(nrow, ncol, n = nrow, d = ncol, seed = NULL) } \arguments{ \item{nrow}{the number of rows of the resulting orthogonal matrix.} \item{ncol}{the number of columns of the resulting orthogonal matrix.} \item{n}{deprecated. See \code{nrow} above.} \item{d}{deprecated. See \code{ncol} above.} \item{seed}{an optional integer argument to use in \code{set.seed()} for reproducibility. By default the current seed will be used. Reproducibility can also be achieved by calling \code{set.seed()} before calling this function.} } \details{ The use of arguments \code{n} and \code{d} is deprecated and they will be removed in the future. } \value{ An orthogonal matrix of dimension \eqn{nrow x ncol} such that each column is orthogonal to the other and has unit lenght. Because of the latter, it is also called orthonormal. } \seealso{\code{\link{coordProj}}} \references{ Heiberger R. (1978) Generation of random orthogonal matrices. \emph{Journal of the Royal Statistical Society. Series C (Applied Statistics)}, 27(2), 199-206. } \examples{ B <- randomOrthogonalMatrix(10,3) zapsmall(crossprod(B)) } mclust/man/mvnX.Rd0000644000175000017500000000572514124774626013733 0ustar nileshnilesh\name{mvnX} \alias{mvnX} \alias{mvnXII} \alias{mvnXXI} \alias{mvnXXX} \title{ Univariate or Multivariate Normal Fit } \description{ Computes the mean, covariance, and log-likelihood from fitting a single Gaussian (univariate or multivariate normal). } \usage{ mvnX(data, prior = NULL, warn = NULL, \dots) mvnXII(data, prior = NULL, warn = NULL, \dots) mvnXXI(data, prior = NULL, warn = NULL, \dots) mvnXXX(data, prior = NULL, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{prior}{ Specification of a conjugate prior on the means and variances. The default assumes no prior. } \item{warn}{ A logical value indicating whether or not a warning should be issued whenever a singularity is encountered. The default is given by \code{mclust.options("warn")}. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \details{ \describe{ \item{\code{mvnXII}}{computes the best fitting Gaussian with the covariance restricted to be a multiple of the identity.} \item{\code{mvnXXI}}{computes the best fitting Gaussian with the covariance restricted to be diagonal.} \item{\code{mvnXXX}}{computes the best fitting Gaussian with ellipsoidal (unrestricted) covariance.} } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{parameters}{ \describe{ \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{loglik}{ The log likelihood for the data in the mixture model. } \item{Attributes:}{ \code{"WARNING"} An appropriate warning if problems are encountered in the computations. } } \seealso{ \code{\link{mvn}}, \code{\link{mstepE}} } \examples{ \donttest{ n <- 1000 set.seed(0) x <- rnorm(n, mean = -1, sd = 2) mvnX(x) mu <- c(-1, 0, 1) set.seed(0) x <- sweep(matrix(rnorm(n*3), n, 3) \%*\% (2*diag(3)), MARGIN = 2, STATS = mu, FUN = "+") mvnXII(x) set.seed(0) x <- sweep(matrix(rnorm(n*3), n, 3) \%*\% diag(1:3), MARGIN = 2, STATS = mu, FUN = "+") mvnXXI(x) Sigma <- matrix(c(9,-4,1,-4,9,4,1,4,9), 3, 3) set.seed(0) x <- sweep(matrix(rnorm(n*3), n, 3) \%*\% chol(Sigma), MARGIN = 2, STATS = mu, FUN = "+") mvnXXX(x) } } \keyword{cluster} mclust/man/EuroUnemployment.Rd0000644000175000017500000000162314125001434016300 0ustar nileshnilesh\name{EuroUnemployment} \alias{EuroUnemployment} \docType{data} \title{Unemployment data for European countries in 2014} \description{ The data set contains unemployment rates for 31 European countries for the year 2014.} \usage{data(EuroUnemployment)} \format{A data frame with the following variables: \describe{ \item{TUR}{Total unemployment rate, i.e. percentage of unemployed persons aged 15-74 in the economically active population.} \item{YUR}{Youth unemployment rate, i.e. percentage of unemployed persons aged 15-24 in the economically active population.} \item{LUR}{Long-term unemployment rate, i.e. percentage of unemployed persons who have been unemployed for 12 months or more.} } } \source{Dataset from EUROSTAT available at \url{https://ec.europa.eu/eurostat/web/lfs/data/database}. For conditions of use see \url{https://ec.europa.eu/eurostat/about/policies/copyright}.} \keyword{datasets} mclust/man/predict.MclustDA.Rd0000644000175000017500000000305514124774626016102 0ustar nileshnilesh\name{predict.MclustDA} \alias{predict.MclustDA} \title{Classify multivariate observations by Gaussian finite mixture modeling} \description{Classify multivariate observations based on Gaussian finite mixture models estimated by \code{\link{MclustDA}}.} \usage{ \method{predict}{MclustDA}(object, newdata, prop = object$prop, \dots) } \arguments{ \item{object}{an object of class \code{'MclustDA'} resulting from a call to \code{\link{MclustDA}}.} \item{newdata}{a data frame or matrix giving the data. If missing the train data obtained from the call to \code{\link{MclustDA}} are classified.} \item{prop}{the class proportions or prior class probabilities to belong to each class; by default, this is set at the class proportions in the training data.} \item{\dots}{further arguments passed to or from other methods.} } % \details{} \value{ Returns a list of with the following components: \item{classification}{a factor of predicted class labels for \code{newdata}.} \item{z}{a matrix whose \emph{[i,k]}th entry is the probability that observation \emph{i} in \code{newdata} belongs to the \emph{k}th class.} } \author{Luca Scrucca} % \note{} \seealso{\code{\link{MclustDA}}.} \examples{ \donttest{ odd <- seq(from = 1, to = nrow(iris), by = 2) even <- odd + 1 X.train <- iris[odd,-5] Class.train <- iris[odd,5] X.test <- iris[even,-5] Class.test <- iris[even,5] irisMclustDA <- MclustDA(X.train, Class.train) predTrain <- predict(irisMclustDA) predTrain predTest <- predict(irisMclustDA, X.test) predTest } } \keyword{multivariate} mclust/man/partuniq.Rd0000644000175000017500000000134613752165060014631 0ustar nileshnilesh\name{partuniq} \alias{partuniq} \title{ Classifies Data According to Unique Observations } \description{ Gives a one-to-one mapping from unique observations to rows of a data matrix. } \usage{ partuniq(x) } \arguments{ \item{x}{Matrix of observations.} } \value{ A vector of length \code{nrow(x)} with integer entries. An observation \code{k} is assigned an integer \code{i} whenever observation \code{i} is the first row of \code{x} that is identical to observation \code{k} (note that \code{i <= k}). } \seealso{ \code{\link{partconv}} } \examples{ set.seed(0) mat <- data.frame(lets = sample(LETTERS[1:2],9,TRUE), nums = sample(1:2,9,TRUE)) mat ans <- partuniq(mat) ans partconv(ans,consec=TRUE) } \keyword{cluster} mclust/man/combMat.Rd0000644000175000017500000000173713475242100014345 0ustar nileshnilesh\name{combMat} \alias{combMat} \title{ Combining Matrix } \description{ Create a combining matrix } \usage{ combMat(K, l1, l2) } \arguments{ \item{K}{ The original number of classes: the matrix will define a combining from K to (K-1) classes. } \item{l1}{ Label of one of the two classes to be combined. } \item{l2}{ Label of the other class to be combined. } } \value{ If \code{z} is a vector (length \emph{K}) whose \emph{k}th entry is the probability that an observation belongs to the \emph{k}th class in a \emph{K}-classes classification, then \code{combiM \%*\% z} is the vector (length \emph{K-1}) whose \emph{k}th entry is the probability that the observation belongs to the \emph{k}th class in the \emph{K-1}-classes classification obtained by merging classes \code{l1} and \code{l2} in the initial classification. } \author{ J.-P. Baudry, A. E. Raftery, L. Scrucca } \seealso{ \code{\link{clustCombi}}, \code{\link{combiPlot}} } %\examples{} \keyword{ cluster } mclust/man/plot.clustCombi.Rd0000644000175000017500000000443614124774626016062 0ustar nileshnilesh\name{plot.clustCombi} \alias{plot.clustCombi} \title{ Plot Combined Clusterings Results } \description{ Plot combined clusterings results: classifications corresponding to \code{Mclust}/BIC and to the hierarchically combined classes, "entropy plots" to help to select a number of classes, and the tree structure obtained from combining mixture components. } \usage{ \method{plot}{clustCombi}(x, what = c("classification", "entropy", "tree"), \dots) } \arguments{ \item{x}{ Object returned by \code{\link{clustCombi}} function. } \item{what}{ Type of plot. } \item{\dots}{ Other arguments to be passed to other functions: \code{\link{combiPlot}}, \code{\link{entPlot}}, \code{\link{combiTree}}. Please see the corresponding documentations. } } \value{ Classifications are plotted with \code{\link{combiPlot}}, which relies on the \code{Mclust} plot functions. Entropy plots are plotted with \code{\link{entPlot}} and may help to select a number of classes: please see the article cited in the references. Tree plots are produced by \code{\link{combiTree}} and graph the tree structure implied by the clusters combining process. } \references{ J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics, 19(2):332-353.} } \author{ J.-P. Baudry, A. E. Raftery, L. Scrucca } \seealso{ \code{\link{combiPlot}}, \code{\link{entPlot}}, \code{\link{combiTree}}, \code{\link{clustCombi}}. } \examples{ \donttest{ data(Baudry_etal_2010_JCGS_examples) ## 1D Example output <- clustCombi(data = Test1D, G=1:15) # plots the hierarchy of combined solutions, then some "entropy plots" which # may help one to select the number of classes (please see the article cited # in the references) plot(output) ## 2D Example output <- clustCombi(data = ex4.1) # plots the hierarchy of combined solutions, then some "entropy plots" which # may help one to select the number of classes (please see the article cited # in the references) plot(output) ## 3D Example output <- clustCombi(data = ex4.4.2) # plots the hierarchy of combined solutions, then some "entropy plots" which # may help one to select the number of classes (please see the article cited # in the references) plot(output) } } \keyword{ cluster } mclust/man/mapClass.Rd0000644000175000017500000000225413752165016014531 0ustar nileshnilesh\name{mapClass} \alias{mapClass} \title{Correspondence between classifications} \description{ Best correspondence between classes given two vectors viewed as alternative classifications of the same object. } \usage{ mapClass(a, b) } \arguments{ \item{a}{ A numeric or character vector of class labels. } \item{b}{ A numeric or character vector of class labels. Must have the same length as \code{a}. } } \value{ A list with two named elements, \code{aTOb} and \code{bTOa} which are themselves lists. The \code{aTOb} list has a component corresponding to each unique element of \code{a}, which gives the element or elements of \code{b} that result in the closest class correspondence. The \code{bTOa} list has a component corresponding to each unique element of \code{b}, which gives the element or elements of \code{a} that result in the closest class correspondence. } \seealso{ \code{\link{classError}}, \code{\link{table}} } \examples{ a <- rep(1:3, 3) a b <- rep(c("A", "B", "C"), 3) b mapClass(a, b) a <- sample(1:3, 9, replace = TRUE) a b <- sample(c("A", "B", "C"), 9, replace = TRUE) b mapClass(a, b) } \keyword{cluster} mclust/man/unmap.Rd0000644000175000017500000000321613752165063014107 0ustar nileshnilesh\name{unmap} \alias{unmap} \title{ Indicator Variables given Classification } \description{ Converts a classification into a matrix of indicator variables. } \usage{ unmap(classification, groups=NULL, noise=NULL, \dots) } \arguments{ \item{classification}{ A numeric or character vector. Typically the distinct entries of this vector would represent a classification of observations in a data set. } \item{groups}{ A numeric or character vector indicating the groups from which \code{classification} is drawn. If not supplied, the default is to assumed to be the unique entries of classification. } \item{noise}{ A single numeric or character value used to indicate the value of \code{groups} corresponding to noise. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ An \emph{n} by \emph{m} matrix of \emph{(0,1)} indicator variables, where \emph{n} is the length of \code{classification} and \emph{m} is the number of unique values or symbols in \code{classification}. Columns are labeled by the unique values in \code{classification}, and the \code{[i,j]}th entry is \emph{1} if \code{classification[i]} is the \emph{j}th unique value or symbol in sorted order \code{classification}. If a \code{noise} value of symbol is designated, the corresponding indicator variables are relocated to the last column of the matrix. } \seealso{ \code{\link{map}}, \code{\link{estep}}, \code{\link{me}} } \examples{ z <- unmap(iris[,5]) z[1:5, ] emEst <- me(modelName = "VVV", data = iris[,-5], z = z) emEst$z[1:5,] map(emEst$z) } \keyword{cluster} mclust/man/dupPartition.Rd0000644000175000017500000000126414124774626015457 0ustar nileshnilesh\name{dupPartition} \alias{dupPartition} \title{Partition the data by grouping together duplicated data} \description{ Duplicated data are grouped together to form a basic partition that can be used to start hierarchical agglomeration. } \usage{ dupPartition(data) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. If a matrix or data frame, rows correspond to observations (\eqn{n}) and columns correspond to variables (\eqn{d}). } } \value{ A vector of indices indicating the partition. } \seealso{ \code{\link{hc}} } \examples{ \donttest{ dupPartition(iris[,1:4]) dupPartition(iris) dupPartition(iris$Species) } } \keyword{cluster} mclust/man/plot.MclustDA.Rd0000644000175000017500000001453414124774626015432 0ustar nileshnilesh\name{plot.MclustDA} \alias{plot.MclustDA} \title{Plotting method for MclustDA discriminant analysis} \description{ Plots for model-based mixture discriminant analysis results, such as scatterplot of training and test data, classification of train and test data, and errors. } \usage{ \method{plot}{MclustDA}(x, what = c("scatterplot", "classification", "train&test", "error"), newdata, newclass, dimens = NULL, symbols, colors, main = NULL, \dots) } \arguments{ \item{x}{ An object of class \code{'MclustDA'} resulting from a call to \code{\link{MclustDA}}. } \item{what}{ A string specifying the type of graph requested. Available choices are: \describe{ \item{\code{"scatterplot"} =}{a plot of training data with points marked based on the known classification. Ellipses corresponding to covariances of mixture components are also drawn.} \item{\code{"classification"} =}{a plot of data with points marked on based the predicted classification; if \code{newdata} is provided then the test set is shown otherwise the training set.} \item{\code{"train&test"} =}{a plot of training and test data with points marked according to the type of set.} \item{\code{"error"} =}{a plot of training set (or test set if \code{newdata} and \code{newclass} are provided) with misclassified points marked.} } If not specified, in interactive sessions a menu of choices is proposed. } \item{newdata}{ A data frame or matrix for test data. } \item{newclass}{ A vector giving the class labels for the observations in the test data (if known). } \item{dimens}{ A vector of integers giving the dimensions of the desired coordinate projections for multivariate data. The default is to take all the the available dimensions for plotting. } \item{symbols}{ Either an integer or character vector assigning a plotting symbol to each unique class. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{factor}). The default is given by \code{mclust.options("classPlotSymbols")}. } \item{colors}{ Either an integer or character vector assigning a color to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{factor}). The default is given by \code{mclust.options("classPlotColors")}. } \item{main}{ A logical, a character string, or \code{NULL} (default) for the main title. If \code{NULL} or \code{FALSE} no title is added to a plot. If \code{TRUE} a default title is added identifying the type of plot drawn. If a character string is provided, this is used for the title. } \item{\dots}{further arguments passed to or from other methods.} } %\value{} \details{ For more flexibility in plotting, use \code{mclust1Dplot}, \code{mclust2Dplot}, \code{surfacePlot}, \code{coordProj}, or \code{randProj}. } \author{Luca Scrucca} \seealso{ \code{\link{MclustDA}}, \code{\link{surfacePlot}}, \code{\link{coordProj}}, \code{\link{randProj}} } \examples{ \donttest{ odd <- seq(from = 1, to = nrow(iris), by = 2) even <- odd + 1 X.train <- iris[odd,-5] Class.train <- iris[odd,5] X.test <- iris[even,-5] Class.test <- iris[even,5] # common EEE covariance structure (which is essentially equivalent to linear discriminant analysis) irisMclustDA <- MclustDA(X.train, Class.train, modelType = "EDDA", modelNames = "EEE") summary(irisMclustDA, parameters = TRUE) summary(irisMclustDA, newdata = X.test, newclass = Class.test) # common covariance structure selected by BIC irisMclustDA <- MclustDA(X.train, Class.train, modelType = "EDDA") summary(irisMclustDA, parameters = TRUE) summary(irisMclustDA, newdata = X.test, newclass = Class.test) # general covariance structure selected by BIC irisMclustDA <- MclustDA(X.train, Class.train) summary(irisMclustDA, parameters = TRUE) summary(irisMclustDA, newdata = X.test, newclass = Class.test) plot(irisMclustDA) plot(irisMclustDA, dimens = 3:4) plot(irisMclustDA, dimens = 4) plot(irisMclustDA, what = "classification") plot(irisMclustDA, what = "classification", newdata = X.test) plot(irisMclustDA, what = "classification", dimens = 3:4) plot(irisMclustDA, what = "classification", newdata = X.test, dimens = 3:4) plot(irisMclustDA, what = "classification", dimens = 4) plot(irisMclustDA, what = "classification", dimens = 4, newdata = X.test) plot(irisMclustDA, what = "train&test", newdata = X.test) plot(irisMclustDA, what = "train&test", newdata = X.test, dimens = 3:4) plot(irisMclustDA, what = "train&test", newdata = X.test, dimens = 4) plot(irisMclustDA, what = "error") plot(irisMclustDA, what = "error", dimens = 3:4) plot(irisMclustDA, what = "error", dimens = 4) plot(irisMclustDA, what = "error", newdata = X.test, newclass = Class.test) plot(irisMclustDA, what = "error", newdata = X.test, newclass = Class.test, dimens = 3:4) plot(irisMclustDA, what = "error", newdata = X.test, newclass = Class.test, dimens = 4) # simulated 1D data n <- 250 set.seed(1) triModal <- c(rnorm(n,-5), rnorm(n,0), rnorm(n,5)) triClass <- c(rep(1,n), rep(2,n), rep(3,n)) odd <- seq(from = 1, to = length(triModal), by = 2) even <- odd + 1 triMclustDA <- MclustDA(triModal[odd], triClass[odd]) summary(triMclustDA, parameters = TRUE) summary(triMclustDA, newdata = triModal[even], newclass = triClass[even]) plot(triMclustDA) plot(triMclustDA, what = "classification") plot(triMclustDA, what = "classification", newdata = triModal[even]) plot(triMclustDA, what = "train&test", newdata = triModal[even]) plot(triMclustDA, what = "error") plot(triMclustDA, what = "error", newdata = triModal[even], newclass = triClass[even]) # simulated 2D cross data data(cross) odd <- seq(from = 1, to = nrow(cross), by = 2) even <- odd + 1 crossMclustDA <- MclustDA(cross[odd,-1], cross[odd,1]) summary(crossMclustDA, parameters = TRUE) summary(crossMclustDA, newdata = cross[even,-1], newclass = cross[even,1]) plot(crossMclustDA) plot(crossMclustDA, what = "classification") plot(crossMclustDA, what = "classification", newdata = cross[even,-1]) plot(crossMclustDA, what = "train&test", newdata = cross[even,-1]) plot(crossMclustDA, what = "error") plot(crossMclustDA, what = "error", newdata =cross[even,-1], newclass = cross[even,1]) } } \keyword{multivariate} mclust/man/priorControl.Rd0000644000175000017500000000321513752165066015465 0ustar nileshnilesh\name{priorControl} \alias{priorControl} \title{ Conjugate Prior for Gaussian Mixtures. } \description{ Specify a conjugate prior for Gaussian mixtures. } \usage{ priorControl(functionName = "defaultPrior", \dots) } \arguments{ \item{functionName}{ The name of the function specifying the conjugate prior. By default the function \code{\link{defaultPrior}} is used, and this can also be used as a template for alternative specification. } \item{\dots}{ Optional named arguments to the function specified in \code{functionName} together with their values. } } \value{ A list with the function name as the first component. The remaining components (if any) consist of a list of arguments to the function with assigned values. } \details{ The function \code{priorControl} is used to specify a conjugate prior for EM within \emph{MCLUST}.\cr Note that, as described in \code{\link{defaultPrior}}, in the multivariate case only 10 out of 14 models may be used in conjunction with a prior, i.e. those available in \emph{MCLUST} up to version 4.4. } \references{ C. Fraley and A. E. Raftery (2007). Bayesian regularization for normal mixture estimation and model-based clustering. \emph{Journal of Classification 24:155-181}. } \seealso{ \code{\link{mclustBIC}}, \code{\link{me}}, \code{\link{mstep}}, \code{\link{defaultPrior}} } \examples{ # default prior irisBIC <- mclustBIC(iris[,-5], prior = priorControl()) summary(irisBIC, iris[,-5]) # no prior on the mean; default prior on variance irisBIC <- mclustBIC(iris[,-5], prior = priorControl(shrinkage = 0)) summary(irisBIC, iris[,-5]) } \keyword{cluster} mclust/man/summary.mclustBIC.Rd0000644000175000017500000000760413175055217016313 0ustar nileshnilesh\name{summary.mclustBIC} \alias{summary.mclustBIC} \alias{print.summary.mclustBIC} \alias{summary.mclustBIC} \alias{summaryMclustBIC} \alias{summaryMclustBICn} \alias{printSummaryMclustBIC} \alias{printSummaryMclustBICn} \title{Summary function for model-based clustering via BIC} \description{ Optimal model characteristics and classification for model-based clustering via \code{mclustBIC}. } \usage{ \method{summary}{mclustBIC}(object, data, G, modelNames, \dots) } \arguments{ \item{object}{ An \code{'mclustBIC'} object, which is the result of applying \code{mclustBIC} to \code{data}. } \item{data}{ The matrix or vector of observations used to generate `object'. } \item{G}{ A vector of integers giving the numbers of mixture components (clusters) from which the best model according to BIC will be selected (\code{as.character(G)} must be a subset of the row names of \code{object}). The default is to select the best model for all numbers of mixture components used to obtain \code{object}. } \item{modelNames}{ A vector of integers giving the model parameterizations from which the best model according to BIC will be selected (\code{as.character(model)} must be a subset of the column names of \code{object}). The default is to select the best model for parameterizations used to obtain \code{object}. } \item{\dots}{ Not used. For generic/method consistency. } } \value{ A list giving the optimal (according to BIC) parameters, conditional probabilities \code{z}, and log-likelihood, together with the associated classification and its uncertainty. The details of the output components are as follows: \item{modelName}{ A character string denoting the model corresponding to the optimal BIC. } \item{n}{ The number of observations in the data. } \item{d}{ The dimension of the data. } \item{G}{ The number of mixture components in the model corresponding to the optimal BIC. } \item{bic}{ The optimal BIC value. } \item{loglik}{ The log-likelihood corresponding to the optimal BIC. } \item{parameters}{ A list with the following components: \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If missing, equal proportions are assumed. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{z}{ A matrix whose \emph{[i,k]}th entry is the probability that observation \emph{i} in the data belongs to the \emph{k}th class. } \item{classification}{ \code{map(z)}: The classification corresponding to \code{z}. } \item{uncertainty}{ The uncertainty associated with the classification. } \item{Attributes:}{ \code{"bestBICvalues"} Some of the best bic values for the analysis.\cr \code{"prior"} The prior as specified in the input.\cr \code{"control"} The control parameters for EM as specified in the input.\cr \code{"initialization"} The parameters used to initial EM for computing the maximum likelihood values used to obtain the BIC. } } \seealso{ \code{\link{mclustBIC}} \code{\link{mclustModel}} } \examples{ irisBIC <- mclustBIC(iris[,-5]) summary(irisBIC, iris[,-5]) summary(irisBIC, iris[,-5], G = 1:6, modelNames = c("VII", "VVI", "VVV")) } \keyword{cluster} % docclass is function mclust/man/em.Rd0000644000175000017500000001203514124774626013374 0ustar nileshnilesh\name{em} \alias{em} \title{EM algorithm starting with E-step for parameterized Gaussian mixture models} \description{ Implements the EM algorithm for parameterized Gaussian mixture models, starting with the expectation step. } \usage{ em(data, modelName, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{parameters}{ A names list giving the parameters of the model. The components are as follows: \describe{ \item{\code{pro}}{ Mixing proportions for the components of the mixture. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ An estimate of the reciprocal hypervolume of the data region. If set to NULL or a negative value, the default is determined by applying function \code{hypvol} to the data. Used only when \code{pro} includes an additional mixing proportion for a noise component. } } } \item{prior}{ Specification of a conjugate prior on the means and variances. The default assumes no prior. } \item{control}{ A list of control parameters for EM. The defaults are set by the call \code{emControl()}. } \item{warn}{ A logical value indicating whether or not a warning should be issued when computations fail. The default is \code{warn=FALSE}. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{n}{ The number of observations in the data. } \item{d}{ The dimension of the data. } \item{G}{ The number of mixture components. } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the \emph{i}th observation belonging to the \emph{k}th component of the mixture. } \item{parameters}{ \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ The estimate of the reciprocal hypervolume of the data region used in the computation when the input indicates the addition of a noise component to the model. } } } \item{loglik}{ The log likelihood for the data in the mixture model. } \item{control}{ The list of control parameters for EM used. } \item{prior}{ The specification of a conjugate prior on the means and variances used, \code{NULL} if no prior is used. } \item{Attributes:}{ \code{"info"} Information on the iteration.\cr \code{"WARNING"} An appropriate warning if problems are encountered in the computations. } } \seealso{ \code{\link{emE}}, \dots, \code{\link{emVVV}}, \code{\link{estep}}, \code{\link{me}}, \code{\link{mstep}}, \code{\link{mclust.options}}, \code{\link{do.call}} } \examples{ \donttest{ msEst <- mstep(modelName = "EEE", data = iris[,-5], z = unmap(iris[,5])) names(msEst) em(modelName = msEst$modelName, data = iris[,-5], parameters = msEst$parameters) do.call("em", c(list(data = iris[,-5]), msEst)) ## alternative call } } \keyword{cluster} mclust/man/mclustLoglik.Rd0000644000175000017500000000160114124774626015441 0ustar nileshnilesh\name{mclustLoglik} \alias{mclustLoglik} \alias{print.mclustLoglik} \title{Log-likelihood from a table of BIC values for parameterized Gaussian mixture models} \description{ Compute the maximal log-likelihood from a table of BIC values contained in a \code{'mclustBIC'} object as returned by function \code{\link{mclustBIC}}. } \usage{ mclustLoglik(object, \dots) } \arguments{ \item{object}{An object of class \code{'mclustBIC'} containing the BIC values as returned by a call to \code{\link{mclustBIC}}. } \item{\dots}{ Catches unused arguments in an indirect or list call via \code{do.call}. } } \value{ An object of class \code{'mclustLoglik'} containing the maximal log-likelihood values for the Gaussian mixture models provided as input. } \seealso{ \code{\link{mclustBIC}}. } \examples{ \donttest{ BIC <- mclustBIC(iris[,1:4]) mclustLoglik(BIC) } } \keyword{cluster} mclust/man/me.weighted.Rd0000644000175000017500000001106214125666512015165 0ustar nileshnilesh\name{me.weighted} \alias{me.weighted} \title{EM algorithm with weights starting with M-step for parameterized MVN mixture models} \description{ Implements the EM algorithm for fitting MVN mixture models parameterized by eigenvalue decomposition, when observations have weights, starting with the maximization step. } \usage{ me.weighted(data, modelName, z, weights = NULL, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{z}{ A matrix whose \code{[i,k]}th entry is an initial estimate of the conditional probability of the ith observation belonging to the \emph{k}th component of the mixture. } \item{weights}{ A vector of positive weights, where the \code{[i]}th entry is the weight for the ith observation. If any of the weights are greater than one, then they are scaled so that the maximum weight is one. } \item{prior}{ Specification of a conjugate prior on the means and variances. See the help file for \code{priorControl} for further information. The default assumes no prior. } \item{control}{ A list of control parameters for EM. The defaults are set by the call \code{\link{emControl}}. } \item{Vinv}{ If the model is to include a noise term, \code{Vinv} is an estimate of the reciprocal hypervolume of the data region. If set to a negative value or 0, the model will include a noise term with the reciprocal hypervolume estimated by the function \code{hypvol}. The default is not to assume a noise term in the model through the setting \code{Vinv=NULL}. } \item{warn}{ A logical value indicating whether or not certain warnings (usually related to singularity) should be issued when the estimation fails. The default is set by \code{warn} using \code{\link{mclust.options}}. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the \emph{i}th observation belonging to the \emph{k}th component of the mixture. } \item{parameters}{ \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ The estimate of the reciprocal hypervolume of the data region used in the computation when the input indicates the addition of a noise component to the model. } } } \item{loglik}{ The log likelihood for the data in the mixture model. } \item{Attributes:}{ \code{"info"} Information on the iteration.\cr \code{"WARNING"} An appropriate warning if problems are encountered in the computations. } } \author{Thomas Brendan Murphy} \seealso{ \code{\link{me}}, \code{\link{meE}}, \dots, \code{\link{meVVV}}, \code{\link{em}}, \code{\link{mstep}}, \code{\link{estep}}, \code{\link{priorControl}}, \code{\link{mclustModelNames}}, \code{\link{mclustVariance}}, \code{\link{mclust.options}} } \examples{ \donttest{ w <- rep(1,150) w[1] <- 0 me.weighted(data = iris[,-5], modelName = "VVV", z = unmap(iris[,5]), weights = w) }} mclust/man/hcRandomPairs.Rd0000644000175000017500000000224113750455151015514 0ustar nileshnilesh\name{hcRandomPairs} \alias{hcRandomPairs} \alias{randomPairs} \title{Random hierarchical structure} \description{Create a hierarchical structure using a random hierarchical partition of the data.} \usage{ hcRandomPairs(data, seed = NULL, \dots) } \arguments{ \item{data}{ A numeric matrix or data frame of observations. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{seed}{ Optional single value, interpreted as an integer, specifying the seed for random partition. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A numeric two-column matrix in which the \emph{i}th row gives the minimum index for observations in each of the two clusters merged at the \emph{i}th stage of a random agglomerative hierarchical clustering. } \seealso{ \code{\link{hc}}, \code{\link{hclass}} \code{\link{hcVVV}} } \examples{ data <- iris[,1:4] randPairs <- hcRandomPairs(data) str(randPairs) # start model-based clustering from a random partition mod <- Mclust(data, initialization = list(hcPairs = randPairs)) summary(mod) } \keyword{cluster} mclust/man/mclust.options.Rd0000644000175000017500000001416714104430141015757 0ustar nileshnilesh\name{mclust.options} \alias{mclust.options} \title{Default values for use with MCLUST package} \description{Set or retrieve default values for use with MCLUST package.} \usage{ mclust.options(\dots) } \arguments{ \item{\dots}{one or more arguments provided in the \code{name = value} form, or no argument at all may be given. \cr Available arguments are described in the Details section below.} } \details{ \code{mclust.options()} is provided for assigning or retrieving default values used by various functions in \code{MCLUST}.\cr Available options are: \describe{ \item{\code{emModelNames}}{ A vector of 3-character strings that are associated with multivariate models for which EM estimation is available in MCLUST. \cr The current default is all of the multivariate mixture models supported in MCLUST. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{\code{hcModelName}}{ A character string specifying the multivariate model to be used in model-based agglomerative hierarchical clustering for initialization of EM algorithm.\cr The available models are the following: \describe{ \item{\code{"EII"}}{spherical, equal volume;} \item{\code{"EEE"}}{ellipsoidal, equal volume, shape, and orientation;} \item{\code{"VII"}}{spherical, unequal volume;} \item{\code{"VVV"}}{ellipsoidal, varying volume, shape, and orientation (default).} } } \item{\code{hcUse}}{ A character string specifying the type of input variables/transformation to be used in model-based agglomerative hierarchical clustering for initialization of EM algorithm.\cr Possible values are: \describe{ \item{\code{"VARS"}}{original variables;} \item{\code{"STD"}}{standardized variables (centered and scaled);} \item{\code{"SPH"}}{sphered variables (centered, scaled and uncorrelated) computed using SVD;} \item{\code{"PCS"}}{principal components computed using SVD on centered variables (i.e. using the covariance matrix);} \item{\code{"PCR"}}{principal components computed using SVD on standardized (center and scaled) variables (i.e. using the correlation matrix);} \item{\code{"SVD"}}{scaled SVD transformation (default);} \item{\code{"RND"}}{no transformation is applied but a random hierarchical structure is returned (see \code{\link{hcRandomPairs}}).} } For further details see Scrucca and Raftery (2015), Scrucca et al. (2016). } \item{\code{subset}}{ A value specifying the maximal sample size to be used in the model-based hierarchical clustering to start the EM algorithm. If data sample size exceeds this value, a random sample is drawn of size specified by \code{subset}. } \item{\code{fillEllipses}}{ A logical value specifying whether or not to fill with transparent colors ellipses corresponding to the within-cluster covariances in case of \code{"classification"} plot for \code{'Mclust'} objects, or \code{"scatterplot"} graphs for \code{'MclustDA'} objects. } \item{\code{bicPlotSymbols}}{ A vector whose entries correspond to graphics symbols for plotting the BIC values output from \code{\link{Mclust}} and \code{\link{mclustBIC}}. These are displayed in the legend which appears at the lower right of the BIC plots. } \item{\code{bicPlotColors}}{ A vector whose entries correspond to colors for plotting the BIC curves from output from \code{\link{Mclust}} and \code{\link{mclustBIC}}. These are displayed in the legend which appears at the lower right of the BIC plots. } \item{\code{classPlotSymbols}}{ A vector whose entries are either integers corresponding to graphics symbols or single characters for indicating classifications when plotting data. Classes are assigned symbols in the given order. } \item{\code{classPlotColors}}{ A vector whose entries correspond to colors for indicating classifications when plotting data. Classes are assigned colors in the given order. } \item{\code{warn}}{ A logical value indicating whether or not to issue certain warnings. Most of these warnings have to do with situations in which singularities are encountered. The default is \code{warn = FALSE}. } } The parameter values set via a call to this function will remain in effect for the rest of the session, affecting the subsequent behaviour of the functions for which the given parameters are relevant. } \value{ If the argument list is empty the function returns the current list of values. If the argument list is not empty, the returned list is invisible. } \seealso{ \code{\link{Mclust}}, \code{\link{MclustDA}}, \code{\link{densityMclust}}, \code{\link{emControl}} } \references{ Scrucca L. and Raftery A. E. (2015) Improved initialisation of model-based clustering using Gaussian hierarchical partitions. \emph{Advances in Data Analysis and Classification}, 9/4, pp. 447-460. Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317. } \examples{ opt <- mclust.options() # save default values irisBIC <- mclustBIC(iris[,-5]) summary(irisBIC, iris[,-5]) mclust.options(emModelNames = c("EII", "EEI", "EEE")) irisBIC <- mclustBIC(iris[,-5]) summary(irisBIC, iris[,-5]) mclust.options(opt) # restore default values mclust.options() oldpar <- par(mfrow = c(2,1), no.readonly = TRUE) n <- with(mclust.options(), max(sapply(list(bicPlotSymbols, bicPlotColors),length))) plot(seq(n), rep(1,n), ylab = "", xlab = "", yaxt = "n", pch = mclust.options("bicPlotSymbols"), col = mclust.options("bicPlotColors")) title("mclust.options(\"bicPlotSymbols\") \n mclust.options(\"bicPlotColors\")") n <- with(mclust.options(), max(sapply(list(classPlotSymbols, classPlotColors),length))) plot(seq(n), rep(1,n), ylab = "", xlab = "", yaxt = "n", pch = mclust.options("classPlotSymbols"), col = mclust.options("classPlotColors")) title("mclust.options(\"classPlotSymbols\") \n mclust.options(\"classPlotColors\")") par(oldpar) } \keyword{cluster} mclust/man/clustCombiOptim.Rd0000644000175000017500000000415313475242100016073 0ustar nileshnilesh\name{clustCombiOptim} \alias{clustCombiOptim} \title{Optimal number of clusters obtained by combining mixture components} \description{ Return the optimal number of clusters by combining mixture components based on the entropy method discussed in the reference given below. } \usage{ clustCombiOptim(object, reg = 2, plot = FALSE, \dots) } \arguments{ \item{object}{ An object of class \code{'clustCombi'} resulting from a call to \code{\link{clustCombi}}. } \item{reg}{ The number of parts of the piecewise linear regression for the entropy plots. Choose 2 for a two-segment piecewise linear regression model (i.e. 1 change-point), and 3 for a three-segment piecewise linear regression model (i.e. 3 change-points). } \item{plot}{ Logical, if \code{TRUE} an entropy plot is also produced. } \item{\dots}{Further arguments passed to or from other methods.} } \value{ The function returns a list with the following components: \item{numClusters.combi}{The estimated number of clusters.} \item{z.combi}{A matrix whose \emph{[i,k]}th entry is the probability that observation \emph{i} in the data belongs to the \emph{k}th cluster.} \item{cluster.combi}{The clustering labels.} } \references{ J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics, 19(2):332-353.} } \author{ J.-P. Baudry, A. E. Raftery, L. Scrucca } \seealso{ \code{\link{combiPlot}}, \code{\link{entPlot}}, \code{\link{clustCombi}} } \examples{ data(Baudry_etal_2010_JCGS_examples) output <- clustCombi(data = ex4.1) combiOptim <- clustCombiOptim(output) str(combiOptim) # plot optimal clustering with alpha color transparency proportional to uncertainty zmax <- apply(combiOptim$z.combi, 1, max) col <- mclust.options("classPlotColors")[combiOptim$cluster.combi] vadjustcolor <- Vectorize(adjustcolor) alphacol = (zmax - 1/combiOptim$numClusters.combi)/(1-1/combiOptim$numClusters.combi) col <- vadjustcolor(col, alpha.f = alphacol) plot(ex4.1, col = col, pch = mclust.options("classPlotSymbols")[combiOptim$cluster.combi]) } \keyword{ cluster } mclust/man/dens.Rd0000644000175000017500000000451314124774626013726 0ustar nileshnilesh\name{dens} \alias{dens} \title{ Density for Parameterized MVN Mixtures } \description{ Computes densities of observations in parameterized MVN mixtures. } \usage{ dens(data, modelName, parameters, logarithm = FALSE, warn=NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{parameters}{ The parameters of the model: \describe{ \item{\code{pro}}{ The vector of mixing proportions for the components of the mixture. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{logarithm}{ A logical value indicating whether or not the logarithm of the component densities should be returned. The default is to return the component densities, obtained from the log component densities by exponentiation. } \item{warn}{ A logical value indicating whether or not a warning should be issued when computations fail. The default is \code{warn=FALSE}. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A numeric vector whose \emph{i}th component is the density of the \emph{ith} observation in \code{data} in the MVN mixture specified by \code{parameters}. } \seealso{ \code{\link{cdens}}, \code{\link{mclust.options}}, \code{\link{do.call}} } \examples{ \donttest{ faithfulModel <- Mclust(faithful) Dens <- dens(modelName = faithfulModel$modelName, data = faithful, parameters = faithfulModel$parameters) Dens ## alternative call do.call("dens", faithfulModel)} } \keyword{cluster} mclust/man/crimcoords.Rd0000644000175000017500000000777314156727671015157 0ustar nileshnilesh\name{crimcoords} \alias{crimcoords} \alias{print.crimcoords} \alias{plot.crimcoords} \title{Discriminant coordinates data projection} \description{ Compute the discriminant coordinates or crimcoords obtained by projecting the observed data from multiple groups onto the discriminant subspace. The optimal projection subspace is given by the linear transformation of the original variables that maximizes the ratio of the between-groups covariance (which represents groups separation) to the pooled within-group covariance (which represents within-group dispersion).} \usage{ crimcoords(data, classification, numdir = NULL, unbiased = FALSE, plot = TRUE, \dots) \method{plot}{crimcoords}(x, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{classification}{ A vector (numerical, character string, or factor) giving the groups classification (either the known class labels or the estimated clusters) for the observed data.} \item{numdir}{ An integer value specifying the number of directions of the discriminant subspace to return. If not provided, the maximal number of directions are returned (which is given by the number of non-null eigenvalues, the minimum among the number of variables and the number of groups minus one). However, since the effectiveness of the discriminant coordinates in highlighting the separation of groups is decreasing, it might be useful to provide a smaller value, say 2 or 3.} \item{unbiased}{ A logical specifying if unbiased estimates should be used for the between-groups and within-groups covariances. By default \code{unbiased = FALSE} so MLE estimates are used. Note that the use of unbiased or MLE estimates only changes the eigenvalues and eigenvectors of the generalized eigendecomposition by a constant of proportionality, so the discriminant coordinates or crimcoords are essentially the same.} \item{plot}{ A logical specifying if a graph of discriminant coordinates should be produced (default) or not.} \item{x}{ An object of class \code{crimcoords} as returned by \code{crimcoords()} function.} \item{\dots}{further arguments passed to or from other methods.} } \value{ A list of class \code{crimcoords} with the following components: \item{means}{A matrix of within-groups means.} \item{B}{The between-groups covariance matrix.} \item{W}{The pooled within-groups covariance matrix.} \item{evalues}{A vector of eigenvalues.} \item{basis}{A matrix of eigenvectors specifying the basis of the discriminant subspace.} \item{projection}{A matrix of projected data points onto the discriminant subspace.} \item{classification}{A vector giving the groups classification.} If \code{plot = TRUE} (default) a graph of data points projected onto the discriminant coordinate or crimcoords subspace is also produced. } \references{ Gnanadesikan, R. (1977) \emph{Methods for Statistical Data Analysis of Multivariate Observations}. John Wiley 1& Sons, Sec. 4.2. Flury, B. (1997) \emph{A First Course in Multivariate Statistics}. Springer, Sec. 7.3. } \author{ Luca Scrucca \email{luca.scrucca@unipg.it} } %\note{} \seealso{\code{\link{MclustDR}}, \code{\link{clPairs}}.} \examples{ # discriminant coordinates for the iris data using known classes data("iris") CRIMCOORDS = crimcoords(iris[,-5], iris$Species) CRIMCOORDS # banknote data data("banknote") # discriminant coordinate on known classes CRIMCOORDS = crimcoords(banknote[,-1], banknote$Status) CRIMCOORDS # discriminant coordinates on estimated clusters mod = Mclust(banknote[,-1]) CRIMCOORDS = crimcoords(banknote[,-1], mod$classification, plot = FALSE) plot(CRIMCOORDS$projection, type = "n") text(CRIMCOORDS$projection, cex = 0.8, labels = strtrim(banknote$Status, 2), col = mclust.options("classPlotColors")[1:mod$G][mod$classification]) } \keyword{multivariate} mclust/man/plot.mclustBIC.Rd0000644000175000017500000000406514124774626015601 0ustar nileshnilesh\name{plot.mclustBIC} \alias{plot.mclustBIC} \title{BIC Plot for Model-Based Clustering} \description{ Plots the BIC values returned by the \code{\link{mclustBIC}} function. } \usage{ \method{plot}{mclustBIC}(x, G = NULL, modelNames = NULL, symbols = NULL, colors = NULL, xlab = NULL, ylab = "BIC", legendArgs = list(x = "bottomright", ncol = 2, cex = 1, inset = 0.01), \dots) } \arguments{ \item{x}{ Output from \code{mclustBIC}. } \item{G}{ One or more numbers of components corresponding to models fit in \code{x}. The default is to plot the BIC for all of the numbers of components fit. } \item{modelNames}{ One or more model names corresponding to models fit in \code{x}. The default is to plot the BIC for all of the models fit. } \item{symbols}{ Either an integer or character vector assigning a plotting symbol to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given by \code{mclust.options("classPlotSymbols")}. } \item{colors}{ Either an integer or character vector assigning a color to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given by \code{mclust.options("classPlotColors")}. } \item{xlab}{ Optional label for the horizontal axis of the BIC plot. } \item{ylab}{ Label for the vertical axis of the BIC plot. } \item{legendArgs}{ Arguments to pass to the \code{legend} function. Set to \code{NULL} for no legend. } \item{\dots}{ Other graphics parameters. } } \value{ A plot of the BIC values. } \seealso{ \code{\link{mclustBIC}} } \examples{ \donttest{ plot(mclustBIC(precip), legendArgs = list(x = "bottomleft")) plot(mclustBIC(faithful)) plot(mclustBIC(iris[,-5])) } } \keyword{cluster} % docclass is function mclust/man/uncerPlot.Rd0000644000175000017500000000275413175055331014743 0ustar nileshnilesh\name{uncerPlot} \alias{uncerPlot} \title{ Uncertainty Plot for Model-Based Clustering } \description{ Displays the uncertainty in converting a conditional probablility from EM to a classification in model-based clustering. } \usage{ uncerPlot(z, truth, \dots) } \arguments{ \item{z}{ A matrix whose \emph{[i,k]}th entry is the conditional probability of the ith observation belonging to the \emph{k}th component of the mixture. } \item{truth}{ A numeric or character vector giving the true classification of the data. } \item{\dots }{ Provided to allow lists with elements other than the arguments can be passed in indirect or list calls with \code{do.call}. } } \value{ A plot of the uncertainty profile of the data, with uncertainties in increasing order of magnitude. If \code{truth} is supplied and the number of classes is the same as the number of columns of \code{z}, the uncertainty of the misclassified data is marked by vertical lines on the plot. } \details{ When \code{truth} is provided and the number of classes is compatible with \code{z}, the function \code{compareClass} is used to to find best correspondence between classes in \code{truth} and \code{z}. } \seealso{ \code{\link{mclustBIC}}, \code{\link{em}}, \code{\link{me}}, \code{\link{mapClass}} } \examples{ irisModel3 <- Mclust(iris[,-5], G = 3) uncerPlot(z = irisModel3$z) uncerPlot(z = irisModel3$z, truth = iris[,5]) } \keyword{cluster} % docclass is function mclust/man/mclust-package.Rd0000644000175000017500000000365413762450575015703 0ustar nileshnilesh\name{mclust-package} \alias{mclust-package} \alias{mclust} \docType{package} \title{Gaussian Mixture Modelling for Model-Based Clustering, Classification, and Density Estimation} \description{ \if{html}{\figure{logo.png}{options: align="right" alt="logo" width="120"}} Gaussian finite mixture models estimated via EM algorithm for model-based clustering, classification, and density estimation, including Bayesian regularization and dimension reduction. } \details{ For a quick introduction to \pkg{mclust} see the vignette \href{../doc/mclust.html}{A quick tour of mclust}. See also: \itemize{ \item \code{\link{Mclust}} for clustering; \item \code{\link{MclustDA}} for supervised classification; \item \code{\link{MclustSSC}} for semi-supervised classification; \item \code{\link{densityMclust}} for density estimation. } } \author{ Chris Fraley, Adrian Raftery and Luca Scrucca. Maintainer: Luca Scrucca \email{luca.scrucca@unipg.it} } \references{ Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317. Fraley C. and Raftery A. E. (2002) Model-based clustering, discriminant analysis and density estimation, \emph{Journal of the American Statistical Association}, 97/458, pp. 611-631. Fraley C., Raftery A. E., Murphy T. B. and Scrucca L. (2012) mclust Version 4 for R: Normal Mixture Modeling for Model-Based Clustering, Classification, and Density Estimation. \emph{Technical Report} No. 597, Department of Statistics, University of Washington. } \examples{ # Clustering mod1 <- Mclust(iris[,1:4]) summary(mod1) plot(mod1, what = c("BIC", "classification")) # Classification data(banknote) mod2 <- MclustDA(banknote[,2:7], banknote$Status) summary(mod2) plot(mod2) # Density estimation mod3 <- densityMclust(faithful$waiting) summary(mod3) plot(mod3, faithful$waiting) } \keyword{package} mclust/man/partconv.Rd0000644000175000017500000000164012460535131014612 0ustar nileshnilesh\name{partconv} \alias{partconv} \title{Numeric Encoding of a Partitioning} \description{ Converts a vector interpreted as a classification or partitioning into a numeric vector. } \usage{ partconv(x, consec=TRUE) } \arguments{ \item{x}{ A vector interpreted as a classification or partitioning. } \item{consec}{ Logical value indicating whether or not to consecutive class numbers should be used . } } \value{ Numeric encoding of \code{x}. When \code{consec = TRUE}, the distinct values in \code{x} are numbered by the order in which they appear. When \code{consec = FALSE}, each distinct value in \code{x} is numbered by the index corresponding to its first appearance in \code{x}. } \seealso{ \code{\link{partuniq}} } \examples{ partconv(iris[,5]) set.seed(0) cl <- sample(LETTERS[1:9], 25, replace=TRUE) partconv(cl, consec=FALSE) partconv(cl, consec=TRUE) } \keyword{cluster} mclust/man/logLik.Mclust.Rd0000644000175000017500000000151514124774626015463 0ustar nileshnilesh\name{logLik.Mclust} \alias{logLik.Mclust} \title{Log-Likelihood of a \code{Mclust} object} \description{ Returns the log-likelihood for a \code{'Mclust'} object.} \usage{ \method{logLik}{Mclust}(object, \dots) } \arguments{ \item{object}{an object of class \code{'Mclust'} resulting from a call to \code{\link{Mclust}}.} \item{\dots}{further arguments passed to or from other methods.} } % \details{} \value{Returns an object of class \code{'logLik'} with an element providing the maximized log-likelihood, and further arguments giving the number of (estimated) parameters in the model (\code{"df"}) and the sample size (\code{"nobs"}).} \author{Luca Scrucca} % \note{} \seealso{\code{\link{Mclust}}.} \examples{ \donttest{ irisMclust <- Mclust(iris[,1:4]) summary(irisMclust) logLik(irisMclust) } } \keyword{multivariate} mclust/man/MclustSSC.Rd0000644000175000017500000001344513742016673014615 0ustar nileshnilesh\name{MclustSSC} \alias{MclustSSC} \alias{print.MclustSSC} \title{MclustSSC semi-supervised classification} \description{ Semi-Supervised classification based on Gaussian finite mixture modeling. } \usage{ MclustSSC(data, class, G = NULL, modelNames = NULL, prior = NULL, control = emControl(), warn = mclust.options("warn"), verbose = interactive(), \dots) } \arguments{ \item{data}{ A data frame or matrix giving the training data. } \item{class}{ A vector giving the known class labels (either a numerical value or a character string) for the observations in the training data. Observations with unknown class are encoded as \code{NA}. } \item{G}{ An integer value specifying the numbers of mixture components or classes. By default is set equal to the number of known classes. See the examples below. } \item{modelNames}{ A vector of character strings indicating the models to be fitted by EM (see the description in \code{\link{mclustModelNames}}). See the examples below. } \item{prior}{ The default assumes no prior, but this argument allows specification of a conjugate prior on the means and variances through the function \code{\link{priorControl}}. } \item{control}{ A list of control parameters for EM. The defaults are set by the call \code{emControl()}. } \item{warn}{ A logical value indicating whether or not certain warnings (usually related to singularity) should be issued when estimation fails. The default is controlled by \code{\link{mclust.options}}. } \item{verbose}{ A logical controlling if a text progress bar is displayed during the fitting procedure. By default is \code{TRUE} if the session is interactive, and \code{FALSE} otherwise. } \item{\dots }{Further arguments passed to or from other methods.} } \value{ An object of class \code{'MclustSSC'} providing the optimal (according to BIC) Gaussian mixture model for semi-supervised classification. The details of the output components are as follows: \item{call}{The matched call.} \item{data}{The input data matrix.} \item{class}{The input class labels (including \code{NA}s for unknown labels.} \item{modelName}{A character string specifying the "best" estimated model.} \item{G}{A numerical value specifying the number of mixture components or classes of the "best" estimated model.} \item{n}{The total number of observations in the data.} \item{d}{The dimension of the data.} \item{BIC}{All BIC values.} \item{loglik}{Log-likelihood for the selected model.} \item{df}{Number of estimated parameters.} \item{bic}{Optimal BIC value.} \item{parameters}{ A list with the following components: \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{z}{ A matrix whose \emph{[i,k]}th entry is the probability that observation \emph{i} in the test data belongs to the \emph{k}th class. } \item{classification}{ The classification corresponding to \code{z}, i.e. \code{map(z)}. } \item{prior}{ The prior used (if any). } \item{control}{ A list of control parameters used in the EM algorithm. } } \details{ The semi-supervised approach implemented in \code{MclustSSC()} is a simple Gaussian mixture model for classification where at the first M-step only observations with known class labels are used for parameters estimation. Then, a standard EM algorithm is used for updating the probabiltiy of class membership for unlabelled data while keeping fixed the probabilities for labelled data. } \references{ Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317. } \author{Luca Scrucca} \seealso{ \code{\link{summary.MclustSSC}}, \code{\link{plot.MclustSSC}}, \code{\link{predict.MclustSSC}} } \examples{ # Simulate two overlapping groups n <- 200 pars <- list(pro = c(0.5, 0.5), mean = matrix(c(-1,1), nrow = 2, ncol = 2, byrow = TRUE), variance = mclustVariance("EII", d = 2, G = 2)) pars$variance$sigmasq <- 1 data <- sim("EII", parameters = pars, n = n, seed = 12) class <- data[,1] X <- data[,-1] clPairs(X, class, symbols = c(1,2), main = "Full classified data") # Randomly remove labels cl <- class; cl[sample(1:n, size = 195)] <- NA table(cl, useNA = "ifany") clPairs(X, ifelse(is.na(cl), 0, class), symbols = c(0, 16, 17), colors = c("grey", 4, 2), main = "Partially classified data") # Fit semi-supervised classification model mod_SSC <- MclustSSC(X, cl) summary(mod_SSC, parameters = TRUE) pred_SSC <- predict(mod_SSC) table(Predicted = pred_SSC$classification, Actual = class) ngrid <- 50 xgrid <- seq(-3, 3, length.out = ngrid) ygrid <- seq(-4, 4.5, length.out = ngrid) xygrid <- expand.grid(xgrid, ygrid) pred_SSC <- predict(mod_SSC, newdata = xygrid) col <- mclust.options("classPlotColors")[class] pch <- class pch[!is.na(cl)] = ifelse(cl[!is.na(cl)] == 1, 19, 17) plot(X, pch = pch, col = col) contour(xgrid, ygrid, matrix(pred_SSC$z[,1], ngrid, ngrid), add = TRUE, levels = 0.5, drawlabels = FALSE, lty = 2, lwd = 2) } \keyword{classification} mclust/man/wdbc.Rd0000644000175000017500000000661514037530206013704 0ustar nileshnilesh\name{wdbc} \alias{wdbc} \docType{data} \title{Wisconsin diagnostic breast cancer (WDBC) data} \description{ The data set provides data for 569 patients on 30 features of the cell nuclei obtained from a digitized image of a fine needle aspirate (FNA) of a breast mass. For each patient the cancer was diagnosed as malignant or benign.} \usage{data(wdbc)} \format{A data frame with 569 observations on the following variables: \describe{ \item{\code{ID}}{ID number} \item{\code{Diagnosis}}{cancer diagnosis: \code{M} = malignant, \code{B} = benign} \item{\code{Radius_mean}}{a numeric vector} \item{\code{Texture_mean}}{a numeric vector} \item{\code{Perimeter_mean}}{a numeric vector} \item{\code{Area_mean}}{a numeric vector} \item{\code{Smoothness_mean}}{a numeric vector} \item{\code{Compactness_mean}}{a numeric vector} \item{\code{Concavity_mean}}{a numeric vector} \item{\code{Nconcave_mean}}{a numeric vector} \item{\code{Symmetry_mean}}{a numeric vector} \item{\code{Fractaldim_mean}}{a numeric vector} \item{\code{Radius_se}}{a numeric vector} \item{\code{Texture_se}}{a numeric vector} \item{\code{Perimeter_se}}{a numeric vector} \item{\code{Area_se}}{a numeric vector} \item{\code{Smoothness_se}}{a numeric vector} \item{\code{Compactness_se}}{a numeric vector} \item{\code{Concavity_se}}{a numeric vector} \item{\code{Nconcave_se}}{a numeric vector} \item{\code{Symmetry_se}}{a numeric vector} \item{\code{Fractaldim_se}}{a numeric vector} \item{\code{Radius_extreme}}{a numeric vector} \item{\code{Texture_extreme}}{a numeric vector} \item{\code{Perimeter_extreme}}{a numeric vector} \item{\code{Area_extreme}}{a numeric vector} \item{\code{Smoothness_extreme}}{a numeric vector} \item{\code{Compactness_extreme}}{a numeric vector} \item{\code{Concavity_extreme}}{a numeric vector} \item{\code{Nconcave_extreme}}{a numeric vector} \item{\code{Symmetry_extreme}}{a numeric vector} \item{\code{Fractaldim_extreme}}{a numeric vector} } } \details{ The recorded features are: \itemize{ \item \code{Radius} as mean of distances from center to points on the perimeter \item \code{Texture} as standard deviation of gray-scale values \item \code{Perimeter} as cell nucleus perimeter \item \code{Area} as cell nucleus area \item \code{Smoothness} as local variation in radius lengths \item \code{Compactness} as cell nucleus compactness, perimeter^2 / area - 1 \item \code{Concavity} as severity of concave portions of the contour \item \code{Nconcave} as number of concave portions of the contour \item \code{Symmetry} as cell nucleus shape \item \code{Fractaldim} as fractal dimension, "coastline approximation" - 1 } For each feature the recorded values are computed from each image as \code{_mean}, \code{_se}, and \code{_extreme}, for the mean, the standard error, and the mean of the three largest values. } \source{Breast Cancer Wisconsin (Diagnostic) Data Set (\code{wdbc.data}, \code{wdbc.names}) is available at UCI Machine Learning Repository \url{https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+(Diagnostic)}. Please note the UCI conditions of use.} \references{ Mangasarian, O. L., Street, W. N., and Wolberg, W. H. (1995) Breast cancer diagnosis and prognosis via linear programming. \emph{Operations Research}, 43(4), pp. 570-577. } \keyword{datasets} mclust/man/predict.Mclust.Rd0000644000175000017500000000317113175055063015664 0ustar nileshnilesh\name{predict.Mclust} \alias{predict.Mclust} \title{Cluster multivariate observations by Gaussian finite mixture modeling} \description{Cluster prediction for multivariate observations based on Gaussian finite mixture models estimated by \code{\link{Mclust}}.} \usage{ \method{predict}{Mclust}(object, newdata, \dots) } \arguments{ \item{object}{an object of class \code{'Mclust'} resulting from a call to \code{\link{Mclust}}.} \item{newdata}{a data frame or matrix giving the data. If missing the clustering data obtained from the call to \code{\link{Mclust}} are classified.} \item{\dots}{further arguments passed to or from other methods.} } % \details{} \value{ Returns a list of with the following components: \item{classification}{a factor of predicted cluster labels for \code{newdata}.} \item{z}{a matrix whose \emph{[i,k]}th entry is the probability that observation \emph{i} in \code{newdata} belongs to the \emph{k}th cluster.} } \author{Luca Scrucca} % \note{} \seealso{\code{\link{Mclust}}.} \examples{ model <- Mclust(faithful) # predict cluster for the observed data pred <- predict(model) str(pred) pred$z # equal to model$z pred$classification # equal to plot(faithful, col = pred$classification, pch = pred$classification) # predict cluster over a grid grid <- apply(faithful, 2, function(x) seq(min(x), max(x), length = 50)) grid <- expand.grid(eruptions = grid[,1], waiting = grid[,2]) pred <- predict(model, grid) plot(grid, col = mclust.options("classPlotColors")[pred$classification], pch = 15, cex = 0.5) points(faithful, pch = model$classification) } \keyword{multivariate} mclust/man/mclustModel.Rd0000644000175000017500000000667513475242100015261 0ustar nileshnilesh\name{mclustModel} \alias{mclustModel} \title{ Best model based on BIC } \description{ Determines the best model from clustering via \code{mclustBIC} for a given set of model parameterizations and numbers of components. } \usage{ mclustModel(data, BICvalues, G, modelNames, \dots) } \arguments{ \item{data}{ The matrix or vector of observations used to generate `object'. } \item{BICvalues}{ An \code{'mclustBIC'} object, which is the result of applying \code{mclustBIC} to \code{data}. } \item{G}{ A vector of integers giving the numbers of mixture components (clusters) from which the best model according to BIC will be selected (\code{as.character(G)} must be a subset of the row names of \code{BICvalues}). The default is to select the best model for all numbers of mixture components used to obtain \code{BICvalues}. } \item{modelNames}{ A vector of integers giving the model parameterizations from which the best model according to BIC will be selected (\code{as.character(model)} must be a subset of the column names of \code{BICvalues}). The default is to select the best model for parameterizations used to obtain \code{BICvalues}. } \item{\dots}{ Not used. For generic/method consistency. } } \value{ A list giving the optimal (according to BIC) parameters, conditional probabilities \code{z}, and log-likelihood, together with the associated classification and its uncertainty. The details of the output components are as follows: \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{n}{ The number of observations in the data. } \item{d}{ The dimension of the data. } \item{G}{ The number of components in the Gaussian mixture model corresponding to the optimal BIC. } \item{bic}{ The optimal BIC value. } \item{loglik}{ The log-likelihood corresponding to the optimal BIC. } \item{parameters}{ A list with the following components: \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If missing, equal proportions are assumed. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ The estimate of the reciprocal hypervolume of the data region used in the computation when the input indicates the addition of a noise component to the model. } } } \item{z}{ A matrix whose \emph{[i,k]}th entry is the probability that observation \emph{i} in the test data belongs to the \emph{k}th class. } } \seealso{ \code{\link{mclustBIC}} } \examples{ irisBIC <- mclustBIC(iris[,-5]) mclustModel(iris[,-5], irisBIC) mclustModel(iris[,-5], irisBIC, G = 1:6, modelNames = c("VII", "VVI", "VVV")) } \keyword{cluster} % docclass is function mclust/man/summary.MclustDA.Rd0000644000175000017500000000262313465000766016137 0ustar nileshnilesh\name{summary.MclustDA} \alias{summary.MclustDA} \alias{print.summary.MclustDA} \title{Summarizing discriminant analysis based on Gaussian finite mixture modeling} \description{Summary method for class \code{"MclustDA"}.} \usage{ \method{summary}{MclustDA}(object, parameters = FALSE, newdata, newclass, \dots) \method{print}{summary.MclustDA}(x, digits = getOption("digits"), \dots) } \arguments{ \item{object}{An object of class \code{'MclustDA'} resulting from a call to \code{\link{MclustDA}}.} \item{x}{An object of class \code{'summary.MclustDA'}, usually, a result of a call to \code{summary.MclustDA}.} \item{parameters}{Logical; if \code{TRUE}, the parameters of mixture components are printed.} \item{newdata}{A data frame or matrix giving the test data.} \item{newclass}{A vector giving the class labels for the observations in the test data.} \item{digits}{The number of significant digits to use when printing.} \item{\dots}{Further arguments passed to or from other methods.} } % \details{} \value{The function \code{summary.MclustDA} computes and returns a list of summary statistics of the estimated MclustDA or EDDA model for classification.} \author{Luca Scrucca} % \note{} \seealso{\code{\link{MclustDA}}, \code{\link{plot.MclustDA}}.} \examples{ mod = MclustDA(data = iris[,1:4], class = iris$Species) summary(mod) summary(mod, parameters = TRUE) } \keyword{multivariate} mclust/man/summary.MclustBootstrap.Rd0000644000175000017500000000261014124774626017632 0ustar nileshnilesh\name{summary.MclustBootstrap} \alias{summary.MclustBootstrap} \alias{print.summary.MclustBootstrap} \title{Summary Function for Bootstrap Inference for Gaussian Finite Mixture Models} \description{Summary of bootstrap distribution for the parameters of a Gaussian mixture model providing either standard errors or percentile bootstrap confidence intervals.} \usage{ \method{summary}{MclustBootstrap}(object, what = c("se", "ci", "ave"), conf.level = 0.95, \dots) } \arguments{ \item{object}{An object of class \code{'MclustBootstrap'} as returned by \code{\link{MclustBootstrap}}.} \item{what}{A character string: \code{"se"} for the standard errors; \code{"ci"} for the confidence intervals; \code{"ave"} for the averages.} \item{conf.level}{A value specifying the confidence level of the interval.} \item{\dots}{Further arguments passed to or from other methods.} } \details{For details about the procedure used to obtain the bootstrap distribution see \code{\link{MclustBootstrap}}.} %\value{} \seealso{\code{\link{MclustBootstrap}}.} \examples{ \donttest{ data(diabetes) X = diabetes[,-1] modClust = Mclust(X) bootClust = MclustBootstrap(modClust) summary(bootClust, what = "se") summary(bootClust, what = "ci") data(acidity) modDens = densityMclust(acidity) modDens = MclustBootstrap(modDens) summary(modDens, what = "se") summary(modDens, what = "ci") } } \keyword{htest} \keyword{cluster} mclust/man/mclust-deprecated.Rd0000644000175000017500000000135513405515075016373 0ustar nileshnilesh\name{mclust-deprecated} \alias{cv.MclustDA} \alias{cv1EMtrain} \alias{bicEMtrain} \title{Deprecated Functions in mclust package} \description{ These functions are provided for compatibility with older versions of the \pkg{mclust} package only, and may be removed eventually. } \usage{ cv.MclustDA(\dots) cv1EMtrain(data, labels, modelNames=NULL) bicEMtrain(data, labels, modelNames=NULL) } \arguments{ \item{\dots}{pass arguments down.} \item{data}{A numeric vector or matrix of observations.} \item{labels}{Labels for each element or row in the dataset.} \item{modelNames}{Vector of model names that should be tested. The default is to select all available model names.} } \seealso{\code{\link{deprecated}}} mclust/man/estepE.Rd0000644000175000017500000000752714124774626014232 0ustar nileshnilesh\name{estepE} \alias{estepE} \alias{estepV} \alias{estepEII} \alias{estepVII} \alias{estepEEI} \alias{estepVEI} \alias{estepEVI} \alias{estepVVI} \alias{estepEEE} \alias{estepEEV} \alias{estepVEV} \alias{estepVVV} \alias{estepEVE} \alias{estepEVV} \alias{estepVEE} \alias{estepVVE} \title{ E-step in the EM algorithm for a parameterized Gaussian mixture model. } \description{ Implements the expectation step in the EM algorithm for a parameterized Gaussian mixture model. } \usage{ estepE(data, parameters, warn = NULL, \dots) estepV(data, parameters, warn = NULL, \dots) estepEII(data, parameters, warn = NULL, \dots) estepVII(data, parameters, warn = NULL, \dots) estepEEI(data, parameters, warn = NULL, \dots) estepVEI(data, parameters, warn = NULL, \dots) estepEVI(data, parameters, warn = NULL, \dots) estepVVI(data, parameters, warn = NULL, \dots) estepEEE(data, parameters, warn = NULL, \dots) estepEEV(data, parameters, warn = NULL, \dots) estepVEV(data, parameters, warn = NULL, \dots) estepVVV(data, parameters, warn = NULL, \dots) estepEVE(data, parameters, warn = NULL, \dots) estepEVV(data, parameters, warn = NULL, \dots) estepVEE(data, parameters, warn = NULL, \dots) estepVVE(data, parameters, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{parameters}{ The parameters of the model: %\itemize{ %\item An argument describing the variance (depends on the model): \describe{ \item{\code{pro}}{ Mixing proportions for the components of the mixture. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{mu}{ The mean for each component. If there is more than one component, this is a matrix whose columns are the means of the components. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ An estimate of the reciprocal hypervolume of the data region. If not supplied or set to a negative value, the default is determined by applying function \code{hypvol} to the data. Used only when \code{pro} includes an additional mixing proportion for a noise component. } } %} } \item{warn}{ A logical value indicating whether or certain warnings should be issued. The default is given by \code{mclust.options("warn")}. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ Character string identifying the model. } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the \emph{i}th observation belonging to the \emph{k}th component of the mixture. } \item{parameters}{ The input parameters. } \item{loglik}{ The logliklihood for the data in the mixture model. } \item{Attribute}{ \code{"WARNING"}: An appropriate warning if problems are encountered in the computations. } } \seealso{ \code{\link{estep}}, \code{\link{em}}, \code{\link{mstep}}, \code{\link{do.call}}, \code{\link{mclustVariance}}, \code{\link{mclust.options}}. } \examples{ \donttest{ msEst <- mstepEII(data = iris[,-5], z = unmap(iris[,5])) names(msEst) estepEII(data = iris[,-5], parameters = msEst$parameters)} } \keyword{cluster} mclust/man/banknote.Rd0000644000175000017500000000141012501077123014550 0ustar nileshnilesh\name{banknote} \alias{banknote} \docType{data} \title{Swiss banknotes data} \description{ The data set contains six measurements made on 100 genuine and 100 counterfeit old-Swiss 1000-franc bank notes.} \usage{data(banknote)} \format{A data frame with the following variables: \describe{ \item{Status}{the status of the banknote: \code{genuine} or \code{counterfeit}} \item{Length}{Length of bill (mm)} \item{Left}{Width of left edge (mm)} \item{Right}{Width of right edge (mm)} \item{Bottom}{Bottom margin width (mm)} \item{Top}{Top margin width (mm)} \item{Diagonal}{Length of diagonal (mm)} } } \source{Flury, B. and Riedwyl, H. (1988). \emph{Multivariate Statistics: A practical approach.} London: Chapman & Hall, Tables 1.1 and 1.2, pp. 5-8.} \keyword{datasets} mclust/man/plot.hc.Rd0000644000175000017500000000705713731057511014340 0ustar nileshnilesh\name{plot.hc} \alias{plot.hc} \title{Dendrograms for Model-based Agglomerative Hierarchical Clustering} \description{ Display two types for dendrograms for model-based hierarchical clustering objects. } \usage{ \method{plot}{hc}(x, what=c("loglik","merge"), maxG=NULL, labels=FALSE, hang=0, \dots) } \arguments{ \item{x}{ An object of class \code{'hc'}. } \item{what}{ A character string indicating the type of dendrogram to be displayed.\cr Possible options are: \describe{ \item{\code{"loglik"}}{Distances between dendrogram levels are based on the classification likelihood.} \item{\code{"merge"}}{Distances between dendrogram levels are uniform, so that levels correspond to the number of clusters.} } } \item{maxG}{ The maximum number of clusters for the dendrogram. For \code{what = "merge"}, the default is the number of clusters in the initial partition. For \code{what = "loglik"}, the default is the minimnum of the maximum number of clusters for which the classification loglikelihood an be computed in most cases, and the maximum number of clusters for which the classification likelihood increases with increasing numbers of clusters. } \item{labels}{ A logical variable indicating whether or not to display leaf (observation) labels for the dendrogram (row names of the data). These are likely to be useful only if the number of observations in fairly small, since otherwise the labels will be too crowded to read. The default is not to display the leaf labels. } \item{hang}{ For \code{hclust} objects, this argument is the fraction of the plot height by which labels should hang below the rest of the plot. A negative value will cause the labels to hang down from 0. Because model-based hierarchical clustering does not share all of the properties of \code{hclust}, the \code{hang} argment won't work in many instances. } \item{\dots}{ Additional plotting arguments. } } \value{ A dendrogram is drawn, with distances based on either the classification likelihood or the merge level (number of clusters). } \details{ The plotting input does not share all of the properties of \code{hclust} objects, hence not all plotting arguments associated with \code{hclust} can be expected to work here. } \note{ If \code{modelName = "E"} (univariate with equal variances) or \code{modelName = "EII"} (multivariate with equal spherical covariances), then the underlying model is the same as for Ward's method for hierarchical clustering. } \references{ J. D. Banfield and A. E. Raftery (1993). Model-based Gaussian and non-Gaussian Clustering. \emph{Biometrics 49:803-821}. C. Fraley (1998). Algorithms for model-based Gaussian hierarchical clustering. \emph{SIAM Journal on Scientific Computing 20:270-281}. C. Fraley and A. E. Raftery (2002). Model-based clustering, discriminant analysis, and density estimation. \emph{Journal of the American Statistical Association 97:611-631}. } \seealso{ \code{\link{hc}} } \examples{ data(EuroUnemployment) hcTree <- hc(modelName = "VVV", data = EuroUnemployment) plot(hcTree, what = "loglik") plot(hcTree, what = "loglik", labels = TRUE) plot(hcTree, what = "loglik", maxG = 5, labels = TRUE) plot(hcTree, what = "merge") plot(hcTree, what = "merge", labels = TRUE) plot(hcTree, what = "merge", labels = TRUE, hang = 0.1) plot(hcTree, what = "merge", labels = TRUE, hang = -1) plot(hcTree, what = "merge", labels = TRUE, maxG = 5) } \keyword{cluster} mclust/man/thyroid.Rd0000644000175000017500000000353714037530122014444 0ustar nileshnilesh\name{thyroid} \alias{thyroid} \docType{data} \title{Thyroid gland data} \description{ Data on five laboratory tests administered to a sample of 215 patients. The tests are used to predict whether a patient's thyroid can be classified as euthyroidism (normal thyroid gland function), hypothyroidism (underactive thyroid not producing enough thyroid hormone) or hyperthyroidism (overactive thyroid producing and secreting excessive amounts of the free thyroid hormones T3 and/or thyroxine T4). Diagnosis of thyroid operation was based on a complete medical record, including anamnesis, scan, etc.} \usage{data(thyroid)} \format{A data frame with the following variables: \describe{ \item{Diagnosis}{Diagnosis of thyroid operation: \code{Hypo}, \code{Normal}, and \code{Hyper}.} \item{RT3U}{T3-resin uptake test (percentage).} \item{T4}{Total Serum thyroxin as measured by the isotopic displacement method.} \item{T3}{Total serum triiodothyronine as measured by radioimmuno assay.} \item{TSH}{Basal thyroid-stimulating hormone (TSH) as measured by radioimmuno assay.} \item{DTSH}{Maximal absolute difference of TSH value after injection of 200 micro grams of thyrotropin-releasing hormone as compared to the basal value.} } } \source{Thyroid Disease Data Set (\code{new-thyroid.data}, \code{new-thyroid.names}) is available at UCI Machine Learning Repository \url{https://archive.ics.uci.edu/ml/datasets/thyroid+disease}. Please note the UCI conditions of use.} \references{ Coomans, D., Broeckaert, M. Jonckheer M. and Massart D.L. (1983) Comparison of Multivariate Discriminant Techniques for Clinical Data - Application to the Thyroid Functional State, \emph{Meth. Inform. Med.} 22, pp. 93-101. Coomans, D. and I. Broeckaert (1986) \emph{Potential Pattern Recognition in Cemical and Medical Decision Making}, Research Studies Press, Letchworth, England. } \keyword{datasets} mclust/man/icl.Rd0000644000175000017500000000161613205036712013527 0ustar nileshnilesh\name{icl} \alias{icl} \title{ ICL for an estimated Gaussian Mixture Model } \description{ Computes the ICL (Integrated Complete-data Likelihood) for criterion for a Gaussian Mixture Model fitted by \code{\link{Mclust}}. } \usage{ icl(object, \dots) } \arguments{ \item{object}{ An object of class \code{'Mclust'} resulting from a call to \code{\link{Mclust}}. } \item{\dots}{Further arguments passed to or from other methods.} } \value{ The ICL for the given input MCLUST model. } \references{ Biernacki, C., Celeux, G., Govaert, G. (2000). Assessing a mixture model for clustering with the integrated completed likelihood. \emph{IEEE Trans. Pattern Analysis and Machine Intelligence}, 22 (7), 719-725. } \seealso{ \code{\link{Mclust}}, \code{\link{mclustBIC}}, \code{\link{mclustICL}}, \code{\link{bic}}. } \examples{ mod <- Mclust(iris[,1:4]) icl(mod) } \keyword{cluster} mclust/man/nMclustParams.Rd0000644000175000017500000000275613175053603015563 0ustar nileshnilesh\name{nMclustParams} \alias{nMclustParams} \title{Number of Estimated Parameters in Gaussian Mixture Models} \description{ Gives the number of estimated parameters for parameterizations of the Gaussian mixture model that are used in MCLUST. } \usage{ nMclustParams(modelName, d, G, noise = FALSE, equalPro = FALSE, \dots) } \arguments{ \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{d}{ The dimension of the data. Not used for models in which neither the shape nor the orientation varies. } \item{G}{ The number of components in the Gaussian mixture model used to compute \code{loglik}. } \item{noise}{ A logical variable indicating whether or not the model includes an optional Poisson noise component. } \item{equalPro}{ A logical variable indicating whether or not the components in the model are assumed to be present in equal proportion. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ The number of variance parameters in the corresponding Gaussian mixture model. } \details{ To get the total number of parameters in model, add \code{G*d} for the means and \code{G-1} for the mixing proportions if they are unequal. } \seealso{ \code{\link{bic}}, \code{\link{nVarParams}}. } \examples{ mapply(nMclustParams, mclust.options("emModelNames"), d = 2, G = 3) } \keyword{cluster} mclust/man/classPriorProbs.Rd0000644000175000017500000001050214124774626016117 0ustar nileshnilesh\name{classPriorProbs} \alias{classPriorProbs} % R CMD Rd2pdf classPriorProbs.Rd \title{Estimation of class prior probabilities by EM algorithm} \description{ A simple procedure to improve the estimation of class prior probabilities when the training data does not reflect the true a priori probabilities of the target classes. The EM algorithm used is described in Saerens et al (2002).} \usage{ classPriorProbs(object, newdata = object$data, itmax = 1e3, eps = sqrt(.Machine$double.eps)) } \arguments{ \item{object}{ an object of class \code{'MclustDA'} resulting from a call to \code{\link{MclustDA}}. } \item{newdata}{ a data frame or matrix giving the data. If missing the train data obtained from the call to \code{\link{MclustDA}} are used. } \item{itmax}{ an integer value specifying the maximal number of EM iterations. } \item{eps}{ a scalar specifying the tolerance associated with deciding when to terminate the EM iterations. } } \details{ The estimation procedure employes an EM algorithm as described in Saerens et al (2002). } \value{A vector of class prior estimates which can then be used in the \code{\link{predict.MclustDA}} to improve predictions.} \references{ Saerens, M., Latinne, P. and Decaestecker, C. (2002) Adjusting the outputs of a classifier to new a priori probabilities: a simple procedure, \emph{Neural computation}, 14 (1), 21--41. } \seealso{\code{\link{MclustDA}}, \code{\link{predict.MclustDA}}} \examples{ \donttest{ # generate data from a mixture f(x) = 0.9 * N(0,1) + 0.1 * N(3,1) n <- 10000 mixpro <- c(0.9, 0.1) class <- factor(sample(0:1, size = n, prob = mixpro, replace = TRUE)) x <- ifelse(class == 1, rnorm(n, mean = 3, sd = 1), rnorm(n, mean = 0, sd = 1)) hist(x[class==0], breaks = 11, xlim = range(x), main = "", xlab = "x", col = adjustcolor("dodgerblue2", alpha.f = 0.5), border = "white") hist(x[class==1], breaks = 11, add = TRUE, col = adjustcolor("red3", alpha.f = 0.5), border = "white") box() # generate training data from a balanced case-control sample, i.e. # f(x) = 0.5 * N(0,1) + 0.5 * N(3,1) n_train <- 1000 class_train <- factor(sample(0:1, size = n_train, prob = c(0.5, 0.5), replace = TRUE)) x_train <- ifelse(class_train == 1, rnorm(n_train, mean = 3, sd = 1), rnorm(n_train, mean = 0, sd = 1)) hist(x_train[class_train==0], breaks = 11, xlim = range(x_train), main = "", xlab = "x", col = adjustcolor("dodgerblue2", alpha.f = 0.5), border = "white") hist(x_train[class_train==1], breaks = 11, add = TRUE, col = adjustcolor("red3", alpha.f = 0.5), border = "white") box() # fit a MclustDA model mod <- MclustDA(x_train, class_train) summary(mod, parameters = TRUE) # test set performance pred <- predict(mod, newdata = x) classError(pred$classification, class)$error BrierScore(pred$z, class) # compute performance over a grid of prior probs priorProp <- seq(0.01, 0.99, by = 0.01) CE <- BS <- rep(as.double(NA), length(priorProp)) for(i in seq(priorProp)) { pred <- predict(mod, newdata = x, prop = c(1-priorProp[i], priorProp[i])) CE[i] <- classError(pred$classification, class = class)$error BS[i] <- BrierScore(pred$z, class) } # estimate the optimal class prior probs (priorProbs <- classPriorProbs(mod, x)) pred <- predict(mod, newdata = x, prop = priorProbs) # compute performance at the estimated class prior probs classError(pred$classification, class = class)$error BrierScore(pred$z, class) matplot(priorProp, cbind(CE,BS), type = "l", lty = 1, lwd = 2, xlab = "Class prior probability", ylab = "", ylim = c(0,max(CE,BS)), panel.first = { abline(h = seq(0,1,by=0.05), col = "grey", lty = 3) abline(v = seq(0,1,by=0.05), col = "grey", lty = 3) }) abline(v = mod$prop[2], lty = 2) # training prop abline(v = mean(class==1), lty = 4) # test prop (usually unknown) abline(v = priorProbs[2], lty = 3, lwd = 2) # estimated prior probs legend("topleft", legend = c("ClassError", "BrierScore"), col = 1:2, lty = 1, lwd = 2, inset = 0.02) # Summary of results: priorProp[which.min(CE)] # best prior of class 1 according to classification error priorProp[which.min(BS)] # best prior of class 1 according to Brier score priorProbs # optimal estimated class prior probabilities } } \keyword{classif} mclust/man/mclustBIC.Rd0000644000175000017500000001645714124774626014634 0ustar nileshnilesh\name{mclustBIC} \alias{mclustBIC} \alias{EMclust} \alias{print.mclustBIC} \title{BIC for Model-Based Clustering} \description{ BIC for parameterized Gaussian mixture models fitted by EM algorithm initialized by model-based hierarchical clustering.} \usage{ mclustBIC(data, G = NULL, modelNames = NULL, prior = NULL, control = emControl(), initialization = list(hcPairs = NULL, subset = NULL, noise = NULL), Vinv = NULL, warn = mclust.options("warn"), x = NULL, verbose = interactive(), \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{G}{ An integer vector specifying the numbers of mixture components (clusters) for which the BIC is to be calculated. The default is \code{G=1:9}, unless the argument \code{x} is specified, in which case the default is taken from the values associated with \code{x}. } \item{modelNames}{ A vector of character strings indicating the models to be fitted in the EM phase of clustering. The help file for \code{\link{mclustModelNames}} describes the available models. The default is: \describe{ \item{\code{c("E", "V")}}{for univariate data} \item{\code{mclust.options("emModelNames")}}{for multivariate data (n > d)} \item{\code{c("EII", "VII", "EEI", "EVI", "VEI", "VVI")}}{the spherical and diagonal models for multivariate data (n <= d)} } unless the argument \code{x} is specified, in which case the default is taken from the values associated with \code{x}. } \item{prior}{ The default assumes no prior, but this argument allows specification of a conjugate prior on the means and variances through the function \code{priorControl}. } \item{control}{ A list of control parameters for EM. The defaults are set by the call \code{emControl()}. } \item{initialization}{ A list containing zero or more of the following components: \describe{ \item{\code{hcPairs}}{ A matrix of merge pairs for hierarchical clustering such as produced by function \code{\link{hc}}. \cr For multivariate data, the default is to compute a hierarchical agglomerative clustering tree by applying function \code{\link{hc}} with model specified by \code{mclust.options("hcModelName")}, and data transformation set by \code{mclust.options("hcUse")}.\cr All the input or a subset as indicated by the \code{subset} argument is used for initial clustering.\cr The hierarchical clustering results are then used to start the EM algorithm from a given partition.\cr For univariate data, the default is to use quantiles to start the EM algorithm. However, hierarchical clustering could also be used by calling \code{\link{hc}} with model specified as \code{"V"} or \code{"E"}. } \item{\code{subset}}{ A logical or numeric vector specifying a subset of the data to be used in the initial hierarchical clustering phase. By default no subset is used unless the number of observations exceeds the value specified by \code{mclust.options("subset")}. The \code{subset} argument is ignored if \code{hcPairs} are provided. Note that to guarantee exact reproducibility of results a seed must be specified (see \code{\link{set.seed}}). } \item{\code{noise}}{ A logical or numeric vector indicating an initial guess as to which observations are noise in the data. If numeric the entries should correspond to row indexes of the data. If supplied, a noise term will be added to the model in the estimation. } } } \item{Vinv}{ An estimate of the reciprocal hypervolume of the data region. The default is determined by applying function \code{hypvol} to the data. Used only if an initial guess as to which observations are noise is supplied. } \item{warn}{ A logical value indicating whether or not certain warnings (usually related to singularity) should be issued when estimation fails. The default is controlled by \code{\link{mclust.options}}. } \item{x}{ An object of class \code{'mclustBIC'}. If supplied, \code{mclustBIC} will use the settings in \code{x} to produce another object of class \code{'mclustBIC'}, but with \code{G} and \code{modelNames} as specified in the arguments. Models that have already been computed in \code{x} are not recomputed. All arguments to \code{mclustBIC} except \code{data}, \code{G} and \code{modelName} are ignored and their values are set as specified in the attributes of \code{x}. Defaults for \code{G} and \code{modelNames} are taken from \code{x}. } \item{verbose}{ A logical controlling if a text progress bar is displayed during the fitting procedure. By default is \code{TRUE} if the session is interactive, and \code{FALSE} otherwise. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ Return an object of class \code{'mclustBIC'} containing the Bayesian Information Criterion for the specified mixture models numbers of clusters. Auxiliary information returned as attributes. The corresponding \code{print} method shows the matrix of values and the top models according to the BIC criterion. } \references{ Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317. Fraley C. and Raftery A. E. (2002) Model-based clustering, discriminant analysis and density estimation, \emph{Journal of the American Statistical Association}, 97/458, pp. 611-631. Fraley C., Raftery A. E., Murphy T. B. and Scrucca L. (2012) mclust Version 4 for R: Normal Mixture Modeling for Model-Based Clustering, Classification, and Density Estimation. \emph{Technical Report} No. 597, Department of Statistics, University of Washington. } \seealso{ \code{\link{summary.mclustBIC}}, \code{\link{priorControl}}, \code{\link{emControl}}, \code{\link{mclustModel}}, \code{\link{hc}}, \code{\link{me}}, \code{\link{mclustModelNames}}, \code{\link{mclust.options}} } \examples{ irisBIC <- mclustBIC(iris[,-5]) irisBIC plot(irisBIC) \donttest{ subset <- sample(1:nrow(iris), 100) irisBIC <- mclustBIC(iris[,-5], initialization=list(subset = subset)) irisBIC plot(irisBIC) irisBIC1 <- mclustBIC(iris[,-5], G=seq(from=1,to=9,by=2), modelNames=c("EII", "EEI", "EEE")) irisBIC1 plot(irisBIC1) irisBIC2 <- mclustBIC(iris[,-5], G=seq(from=2,to=8,by=2), modelNames=c("VII", "VVI", "VVV"), x= irisBIC1) irisBIC2 plot(irisBIC2) } nNoise <- 450 set.seed(0) poissonNoise <- apply(apply( iris[,-5], 2, range), 2, function(x, n) runif(n, min = x[1]-.1, max = x[2]+.1), n = nNoise) set.seed(0) noiseInit <- sample(c(TRUE,FALSE),size=nrow(iris)+nNoise,replace=TRUE, prob=c(3,1)) irisNdata <- rbind(iris[,-5], poissonNoise) irisNbic <- mclustBIC(data = irisNdata, G = 1:5, initialization = list(noise = noiseInit)) irisNbic plot(irisNbic) } \keyword{cluster} mclust/man/predict.MclustSSC.Rd0000644000175000017500000000343714124774626016252 0ustar nileshnilesh\name{predict.MclustSSC} \alias{predict.MclustSSC} \title{Classification of multivariate observations by semi-supervised Gaussian finite mixtures} \description{Classify multivariate observations based on Gaussian finite mixture models estimated by \code{\link{MclustSSC}}.} \usage{ \method{predict}{MclustSSC}(object, newdata, \dots) } \arguments{ \item{object}{an object of class \code{'MclustSSC'} resulting from a call to \code{\link{MclustSSC}}.} \item{newdata}{a data frame or matrix giving the data. If missing the train data obtained from the call to \code{\link{MclustSSC}} are classified.} \item{\dots}{further arguments passed to or from other methods.} } % \details{} \value{ Returns a list of with the following components: \item{classification}{a factor of predicted class labels for \code{newdata}.} \item{z}{a matrix whose \emph{[i,k]}th entry is the probability that observation \emph{i} in \code{newdata} belongs to the \emph{k}th class.} } \author{Luca Scrucca} % \note{} \seealso{\code{\link{MclustSSC}}.} \examples{ \donttest{ X <- iris[,1:4] class <- iris$Species # randomly remove class labels set.seed(123) class[sample(1:length(class), size = 120)] <- NA table(class, useNA = "ifany") clPairs(X, ifelse(is.na(class), 0, class), symbols = c(0, 16, 17, 18), colors = c("grey", 4, 2, 3), main = "Partially classified data") # Fit semi-supervised classification model mod_SSC <- MclustSSC(X, class) pred_SSC <- predict(mod_SSC) table(Predicted = pred_SSC$classification, Actual = class, useNA = "ifany") X_new = data.frame(Sepal.Length = c(5, 8), Sepal.Width = c(3.1, 4), Petal.Length = c(2, 5), Petal.Width = c(0.5, 2)) predict(mod_SSC, newdata = X_new) } } \keyword{classification} mclust/man/chevron.Rd0000644000175000017500000000107612460535131014425 0ustar nileshnilesh\name{chevron} \alias{chevron} \title{Simulated minefield data} \usage{data(chevron)} \description{A set of simulated bivariate minefield data (1104 observations).} \references{ A. Dasgupta and A. E. Raftery (1998). Detecting features in spatial point processes with clutter via model-based clustering. \emph{Journal of the American Statistical Association 93:294-302}. C. Fraley and A.E. Raftery (1998). \emph{Computer Journal 41:578-588}. G. J. McLachlan and D. Peel (2000). \emph{Finite Mixture Models}, Wiley, pages 110-112. } \keyword{datasets} mclust/man/cdensE.Rd0000644000175000017500000001053414124774626014176 0ustar nileshnilesh\name{cdensE} \alias{cdensE} \alias{cdensV} \alias{cdensX} \alias{cdensEII} \alias{cdensVII} \alias{cdensEEI} \alias{cdensVEI} \alias{cdensEVI} \alias{cdensVVI} \alias{cdensEEE} \alias{cdensEEV} \alias{cdensVEV} \alias{cdensVVV} \alias{cdensEVE} \alias{cdensEVV} \alias{cdensVEE} \alias{cdensVVE} \alias{cdensXII} \alias{cdensXXI} \alias{cdensXXX} \title{ Component Density for a Parameterized MVN Mixture Model } \description{ Computes component densities for points in a parameterized MVN mixture model. } \usage{ cdensE(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensV(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensX(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensEII(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensVII(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensEEI(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensVEI(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensEVI(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensVVI(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensEEE(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensEEV(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensVEV(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensVVV(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensEVE(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensEVV(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensVEE(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensVVE(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensXII(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensXXI(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensXXX(data, logarithm = FALSE, parameters, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{logarithm}{ A logical value indicating whether or not the logarithm of the component densities should be returned. The default is to return the component densities, obtained from the log component densities by exponentiation. } \item{parameters}{ The parameters of the model: \describe{ \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{pro}}{ Mixing proportions for the components of the mixture. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } } } \item{warn}{ A logical value indicating whether or not a warning should be issued when computations fail. The default is \code{warn=FALSE}. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A numeric matrix whose \code{[i,j]}th entry is the density of observation \emph{i} in component \emph{j}. The densities are not scaled by mixing proportions. } \note{ When one or more component densities are very large in magnitude, then it may be possible to compute the logarithm of the component densities but not the component densities themselves due to overflow. } \seealso{ \code{\link{cdens}}, \code{\link{dens}}, \code{\link{mclustVariance}}, \code{\link{mstep}}, \code{\link{mclust.options}}, \code{\link{do.call}}. } \examples{ \donttest{ z2 <- unmap(hclass(hcVVV(faithful),2)) # initial value for 2 class case model <- meVVV(data=faithful, z=z2) cdensVVV(data=faithful, logarithm = TRUE, parameters = model$parameters) data(cross) z2 <- unmap(cross[,1]) model <- meEEV(data = cross[,-1], z = z2) EEVdensities <- cdensEEV( data = cross[,-1], parameters = model$parameters) cbind(cross[,-1],map(EEVdensities))} } \keyword{cluster} mclust/man/plot.Mclust.Rd0000644000175000017500000000520714124774626015222 0ustar nileshnilesh\name{plot.Mclust} \alias{plot.Mclust} \title{Plotting method for Mclust model-based clustering} \description{ Plots for model-based clustering results, such as BIC, classification, uncertainty and density. } \usage{ \method{plot}{Mclust}(x, what = c("BIC", "classification", "uncertainty", "density"), dimens = NULL, xlab = NULL, ylab = NULL, addEllipses = TRUE, main = FALSE, \dots) } \arguments{ \item{x}{ Output from \code{Mclust}. } \item{what}{ A string specifying the type of graph requested. Available choices are: \describe{ \item{\code{"BIC"}}{plot of BIC values used for choosing the number of clusters.} \item{\code{"classification"} =}{a plot showing the clustering. For data in more than two dimensions a pairs plot is produced, followed by a coordinate projection plot using specified \code{dimens}. Ellipses corresponding to covariances of mixture components are also drawn if \code{addEllipses = TRUE}.} \item{\code{"uncertainty"}}{a plot of classification uncertainty. For data in more than two dimensions a coordinate projection plot is drawn using specified \code{dimens}.} \item{\code{"density"}}{a plot of estimated density. For data in more than two dimensions a matrix of contours for coordinate projection plot is drawn using specified \code{dimens}.} } If not specified, in interactive sessions a menu of choices is proposed. } \item{dimens}{ A vector of integers specifying the dimensions of the coordinate projections in case of \code{"classification"}, \code{"uncertainty"}, or \code{"density"} plots. } \item{xlab, ylab}{ Optional labels for the x-axis and the y-axis. } \item{addEllipses}{ A logical indicating whether or not to add ellipses with axes corresponding to the within-cluster covariances in case of \code{"classification"} or \code{"uncertainty"} plots. } \item{main}{ A logical or \code{NULL} indicating whether or not to add a title to the plot identifying the type of plot drawn. } \item{\dots}{ Other graphics parameters. } } \details{ For more flexibility in plotting, use \code{mclust1Dplot}, \code{mclust2Dplot}, \code{surfacePlot}, \code{coordProj}, or \code{randProj}. } \seealso{ \code{\link{Mclust}}, \code{\link{plot.mclustBIC}}, \code{\link{plot.mclustICL}}, \code{\link{mclust1Dplot}}, \code{\link{mclust2Dplot}}, \code{\link{surfacePlot}}, \code{\link{coordProj}}, \code{\link{randProj}}. } \examples{ \donttest{ precipMclust <- Mclust(precip) plot(precipMclust) faithfulMclust <- Mclust(faithful) plot(faithfulMclust) irisMclust <- Mclust(iris[,-5]) plot(irisMclust) } } \keyword{cluster} mclust/man/diabetes.Rd0000644000175000017500000000172314020370146014535 0ustar nileshnilesh\name{diabetes} \alias{diabetes} \docType{data} \title{Diabetes data} \description{The data set contains three measurements made on 145 non-obese adult patients classified into three groups.} \usage{data(diabetes)} \format{A data frame with the following variables: \describe{ \item{class}{The type of diabete: \code{Normal}, \code{Overt}, and \code{Chemical}.} \item{glucose}{Area under plasma glucose curve after a three hour oral glucose tolerance test (OGTT).} \item{insulin}{Area under plasma insulin curve after a three hour oral glucose tolerance test (OGTT).} \item{sspg}{Steady state plasma glucose.} } } \details{This dataset is \emph{not correct} and it is provided here only for backward compatibility. Please refer to the correct version available in package \pkg{rrcov}.} \source{Reaven, G. M. and Miller, R. G. (1979). An attempt to define the nature of chemical diabetes using a multidimensional analysis. \emph{Diabetologia} 16:17-24.} \keyword{datasets} mclust/man/mclust1Dplot.Rd0000644000175000017500000001205514124774626015370 0ustar nileshnilesh\name{mclust1Dplot} \alias{mclust1Dplot} \title{ Plot one-dimensional data modeled by an MVN mixture. } \description{ Plot one-dimensional data given parameters of an MVN mixture model for the data. } \usage{ mclust1Dplot(data, parameters = NULL, z = NULL, classification = NULL, truth = NULL, uncertainty = NULL, what = c("classification", "density", "error", "uncertainty"), symbols = NULL, colors = NULL, ngrid = length(data), xlab = NULL, ylab = NULL, xlim = NULL, ylim = NULL, cex = 1, main = FALSE, \dots) } \arguments{ \item{data}{ A numeric vector of observations. Categorical variables are not allowed. } \item{parameters}{ A named list giving the parameters of an \emph{MCLUST} model, used to produce superimposing ellipses on the plot. The relevant components are as follows: \describe{ \item{\code{pro}}{ Mixing proportions for the components of the mixture. There should one more mixing proportion than the number of Gaussian components if the mixture model includes a Poisson noise term. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{z}{ A matrix in which the \code{[i,k]}th entry gives the probability of observation \emph{i} belonging to the \emph{k}th class. Used to compute \code{classification} and \code{uncertainty} if those arguments aren't available. } \item{classification}{ A numeric or character vector representing a classification of observations (rows) of \code{data}. If present argument \code{z} will be ignored. } \item{truth}{ A numeric or character vector giving a known classification of each data point. If \code{classification} or \code{z} is also present, this is used for displaying classification errors. } \item{uncertainty}{ A numeric vector of values in \emph{(0,1)} giving the uncertainty of each data point. If present argument \code{z} will be ignored. } \item{what}{ Choose from one of the following options: \code{"classification"} (default), \code{"density"}, \code{"error"}, \code{"uncertainty"}. } \item{symbols}{ Either an integer or character vector assigning a plotting symbol to each unique class \code{classification}. Elements in \code{symbols} correspond to classes in \code{classification} in order of appearance in the observations (the order used by the function \code{unique}). The default is to use a single plotting symbol \emph{|}. Classes are delineated by showing them in separate lines above the whole of the data. } \item{colors}{ Either an integer or character vector assigning a color to each unique class \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the observations (the order used by the function \code{unique}). The default is given is \code{mclust.options("classPlotColors")}. } \item{ngrid}{ Number of grid points to use for density computation over the interval spanned by the data. The default is the length of the data set. } \item{xlab, ylab}{ An argument specifying a label for the axes. } \item{xlim, ylim}{ An argument specifying bounds of the plot. This may be useful for when comparing plots. } \item{cex}{ An argument specifying the size of the plotting symbols. The default value is 1. } \item{main}{ A logical variable or \code{NULL} indicating whether or not to add a title to the plot identifying the dimensions used. } \item{\dots}{ Other graphics parameters. } } \value{ A plot showing location of the mixture components, classification, uncertainty, density and/or classification errors. Points in the different classes are shown in separated levels above the whole of the data. } \seealso{ \code{\link{mclust2Dplot}}, \code{\link{clPairs}}, \code{\link{coordProj}} } \examples{ \donttest{ n <- 250 ## create artificial data set.seed(1) y <- c(rnorm(n,-5), rnorm(n,0), rnorm(n,5)) yclass <- c(rep(1,n), rep(2,n), rep(3,n)) yModel <- Mclust(y) mclust1Dplot(y, parameters = yModel$parameters, z = yModel$z, what = "classification") mclust1Dplot(y, parameters = yModel$parameters, z = yModel$z, what = "error", truth = yclass) mclust1Dplot(y, parameters = yModel$parameters, z = yModel$z, what = "density") mclust1Dplot(y, z = yModel$z, parameters = yModel$parameters, what = "uncertainty") } } \keyword{cluster} mclust/man/nVarParams.Rd0000644000175000017500000000317313175052444015040 0ustar nileshnilesh\name{nVarParams} \alias{nVarParams} \title{ Number of Variance Parameters in Gaussian Mixture Models } \description{ Gives the number of variance parameters for parameterizations of the Gaussian mixture model that are used in MCLUST. } \usage{ nVarParams(modelName, d, G, \dots) } \arguments{ \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{d}{ The dimension of the data. Not used for models in which neither the shape nor the orientation varies. } \item{G}{ The number of components in the Gaussian mixture model used to compute \code{loglik}. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ The number of variance parameters in the corresponding Gaussian mixture model. } \details{ To get the total number of parameters in model, add \code{G*d} for the means and \code{G-1} for the mixing proportions if they are unequal. } \references{ C. Fraley and A. E. Raftery (2002). Model-based clustering, discriminant analysis, and density estimation. \emph{Journal of the American Statistical Association 97:611:631}. C. Fraley, A. E. Raftery, T. B. Murphy and L. Scrucca (2012). mclust Version 4 for R: Normal Mixture Modeling for Model-Based Clustering, Classification, and Density Estimation. Technical Report No. 597, Department of Statistics, University of Washington. } \seealso{ \code{\link{bic}}, \code{\link{nMclustParams}}. } \examples{ mapply(nVarParams, mclust.options("emModelNames"), d = 2, G = 3) } \keyword{cluster} mclust/man/surfacePlot.Rd0000644000175000017500000001326214136474270015260 0ustar nileshnilesh\name{surfacePlot} \alias{surfacePlot} \title{Density or uncertainty surface for bivariate mixtures} \description{ Plots a density or uncertainty surface given bivariate data and parameters of a MVN mixture model for the data. } \usage{ surfacePlot(data, parameters, what = c("density", "uncertainty"), type = c("contour", "hdr", "image", "persp"), transformation = c("none", "log", "sqrt"), grid = 200, nlevels = 11, levels = NULL, prob = c(0.25, 0.5, 0.75), col = gray(0.5), col.palette = function(...) hcl.colors(..., "blues", rev = TRUE), hdr.palette = blue2grey.colors, xlim = NULL, ylim = NULL, xlab = NULL, ylab = NULL, main = FALSE, scale = FALSE, swapAxes = FALSE, verbose = FALSE, \dots) } \arguments{ \item{data}{ A matrix, or data frame of bivariate observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{parameters}{ A named list giving the parameters of an \emph{MCLUST} model, used to produce superimposing ellipses on the plot. The relevant components are as follows: \describe{ \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{what}{ Choose from one of the following options: \code{"density"} (default), \code{"uncertainty"} indicating what to plot. } \item{type}{ Choose from one of the following three options: \code{"contour"} (default), \code{"hdr"}, \code{"image"}, and \code{"persp"} indicating the plot type. } \item{transformation}{ Choose from one of the following three options: \code{"none"} (default), \code{"log"}, \code{"sqrt"} indicating a transformation to be applied before plotting. } \item{grid}{ The number of grid points (evenly spaced on each axis). The mixture density and uncertainty is computed at \code{grid x grid} points to produce the surface plot. Default: \code{100}. } \item{nlevels}{ The number of levels to use for a contour plot. Default: \code{11}. } \item{levels}{ A vector of levels at which to draw the lines in a contour plot. } \item{prob}{ A vector of probability levels for computing HDR. Only used if \code{type = "hdr"} and supersede previous \code{nlevels} and \code{levels} arguments. } \item{col}{ A string specifying the colour to be used for \code{type = "contour"} and \code{type = "persp"} plots. } \item{col.palette}{ A function which defines a palette of colours to be used for \code{type = "image"} plots. } \item{hdr.palette}{ A function which defines a palette of colours to be used for \code{type = "hdr"} plots. } \item{xlim, ylim}{ Optional argument specifying bounds for the ordinate, abscissa of the plot. This may be useful for when comparing plots. } \item{xlab, ylab}{ Optional argument specifying labels for the x-axis and y-axis. } \item{main}{ A logical variable or \code{NULL} indicating whether or not to add a title to the plot identifying the dimensions used. } \item{scale}{ A logical variable indicating whether or not the two dimensions should be plotted on the same scale, and thus preserve the shape of the distribution. The default is not to scale. } \item{swapAxes}{ A logical variable indicating whether or not the axes should be swapped for the plot. } \item{verbose}{ A logical variable telling whether or not to print an indication that the function is in the process of computing values at the grid points, which typically takes some time to complete. } \item{\dots}{ Other graphics parameters. } } \value{ A plots showing (a transformation of) the density or uncertainty for the given mixture model and data. The function also returns an invisible list with components \code{x}, \code{y}, and \code{z} in which \code{x} and \code{y} are the values used to define the grid and \code{z} is the transformed density or uncertainty at the grid points. } \details{ For an image plot, a color scheme may need to be selected on the display device in order to view the plot. } \references{ C. Fraley and A. E. Raftery (2002). Model-based clustering, discriminant analysis, and density estimation. \emph{Journal of the American Statistical Association 97:611-631}. C. Fraley, A. E. Raftery, T. B. Murphy and L. Scrucca (2012). mclust Version 4 for R: Normal Mixture Modeling for Model-Based Clustering, Classification, and Density Estimation. Technical Report No. 597, Department of Statistics, University of Washington. } \seealso{ \code{\link{mclust2Dplot}} } \examples{ \donttest{ faithfulModel <- Mclust(faithful) surfacePlot(faithful, parameters = faithfulModel$parameters, type = "contour", what = "density", transformation = "none", drawlabels = FALSE) surfacePlot(faithful, parameters = faithfulModel$parameters, type = "persp", what = "density", transformation = "log") surfacePlot(faithful, parameters = faithfulModel$parameters, type = "contour", what = "uncertainty", transformation = "log") } } \keyword{cluster} mclust/man/plot.densityMclust.Rd0000644000175000017500000001152714136476452016623 0ustar nileshnilesh\name{plot.densityMclust} \alias{plot.densityMclust} \alias{plotDensityMclust1} \alias{plotDensityMclust2} \alias{plotDensityMclustd} \title{Plots for Mixture-Based Density Estimate} \description{ Plotting methods for an object of class \code{'mclustDensity'}. Available graphs are plot of BIC values and density for univariate and bivariate data. For higher data dimensionality a scatterplot matrix of pairwise densities is drawn. } \usage{ \method{plot}{densityMclust}(x, data = NULL, what = c("BIC", "density", "diagnostic"), \dots) plotDensityMclust1(x, data = NULL, col = gray(0.3), hist.col = "lightgrey", hist.border = "white", breaks = "Sturges", \dots) plotDensityMclust2(x, data = NULL, nlevels = 11, levels = NULL, prob = c(0.25, 0.5, 0.75), points.pch = 1, points.col = 1, points.cex = 0.8, \dots) plotDensityMclustd(x, data = NULL, nlevels = 11, levels = NULL, prob = c(0.25, 0.5, 0.75), points.pch = 1, points.col = 1, points.cex = 0.8, gap = 0.2, \dots) } \arguments{ \item{x}{An object of class \code{'mclustDensity'} obtained from a call to \code{\link{densityMclust}} function.} \item{data}{Optional data points.} \item{what}{The type of graph requested: \describe{ \item{\code{"density"} =}{a plot of estimated density; if \code{data} is also provided the density is plotted over data points (see Details section).} \item{\code{"BIC"} =}{a plot of BIC values for the estimated models versus the number of components.} \item{\code{"diagnostic"} =}{diagnostic plots (only available for the one-dimensional case, see \code{\link{densityMclust.diagnostic}})} } } \item{col}{The color to be used to draw the density line in 1-dimension or contours in higher dimensions.} \item{hist.col}{The color to be used to fill the bars of the histogram.} \item{hist.border}{The color of the border around the bars of the histogram.} \item{breaks}{See the argument in function \code{\link[graphics]{hist}}.} \item{points.pch, points.col, points.cex}{The character symbols, colors, and magnification to be used for plotting \code{data} points.} \item{nlevels}{An integer, the number of levels to be used in plotting contour densities.} \item{levels}{A vector of density levels at which to draw the contour lines.} \item{prob}{A vector of probability levels for computing HDR. Only used if \code{type = "hdr"} and supersede previous \code{nlevels} and \code{levels} arguments.} \item{gap}{Distance between subplots, in margin lines, for the matrix of pairwise scatterplots.} \item{\dots}{Additional arguments passed to \code{\link{surfacePlot}}.} } \details{The function \code{plot.densityMclust} allows to obtain the plot of estimated density or the graph of BIC values for evaluated models. If \code{what = "density"} the produced plot dependes on the dimensionality of the data. For one-dimensional data a call with no \code{data} provided produces a plot of the estimated density over a sensible range of values. If \code{data} is provided the density is over-plotted on a histogram for the observed data. For two-dimensional data further arguments available are those accepted by the \code{\link{surfacePlot}} function. In particular, the density can be represented through \code{"contour"}, \code{"hdr"}, \code{"image"}, and \code{"persp"} type of graph. For \code{type = "hdr"} Highest Density Regions (HDRs) are plotted for probability levels \code{prob}. See \code{\link{hdrlevels}} for details. For higher dimensionality a scatterplot matrix of pairwise projected densities is drawn. } % \value{} \author{Luca Scrucca} \seealso{ \code{\link{densityMclust}}, \code{\link{surfacePlot}}, \code{\link{densityMclust.diagnostic}}, \code{\link{Mclust}}. } \examples{ \donttest{ dens <- densityMclust(faithful$waiting) summary(dens) summary(dens, parameters = TRUE) plot(dens, what = "BIC", legendArgs = list(x = "topright")) plot(dens, what = "density", data = faithful$waiting) dens <- densityMclust(faithful) summary(dens) summary(dens, parameters = TRUE) plot(dens, what = "density", data = faithful, drawlabels = FALSE, points.pch = 20) plot(dens, what = "density", type = "hdr") plot(dens, what = "density", type = "hdr", prob = seq(0.1, 0.9, by = 0.1)) plot(dens, what = "density", type = "hdr", data = faithful) plot(dens, what = "density", type = "persp") dens <- densityMclust(iris[,1:4]) summary(dens, parameters = TRUE) plot(dens, what = "density", data = iris[,1:4], col = "slategrey", drawlabels = FALSE, nlevels = 7) plot(dens, what = "density", type = "hdr", data = iris[,1:4]) plot(dens, what = "density", type = "persp", col = grey(0.9)) } } \keyword{cluster} \keyword{dplot} mclust/man/emE.Rd0000644000175000017500000001365114124774626013506 0ustar nileshnilesh\name{emE} \alias{emE} \alias{emV} \alias{emX} \alias{emEII} \alias{emVII} \alias{emEEI} \alias{emVEI} \alias{emEVI} \alias{emVVI} \alias{emEEE} \alias{emEEV} \alias{emVEV} \alias{emVVV} \alias{emEVV} \alias{emEVE} \alias{emVEE} \alias{emVVE} \alias{emXII} \alias{emXXI} \alias{emXXX} \title{EM algorithm starting with E-step for a parameterized Gaussian mixture model} \description{ Implements the EM algorithm for a parameterized Gaussian mixture model, starting with the expectation step. } \usage{ emE(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emV(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emX(data, prior = NULL, warn = NULL, \dots) emEII(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emVII(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emEEI(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emVEI(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emEVI(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emVVI(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emEEE(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emVEE(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emEVE(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emVVE(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emEEV(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emVEV(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emEVV(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emVVV(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emXII(data, prior = NULL, warn = NULL, \dots) emXXI(data, prior = NULL, warn = NULL, \dots) emXXX(data, prior = NULL, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{parameters}{ The parameters of the model: \describe{ \item{\code{pro}}{ Mixing proportions for the components of the mixture. There should one more mixing proportion than the number of Gaussian components if the mixture model includes a Poisson noise term. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ An estimate of the reciprocal hypervolume of the data region. The default is determined by applying function \code{hypvol} to the data. Used only when \code{pro} includes an additional mixing proportion for a noise component. } } } \item{prior}{ The default assumes no prior, but this argument allows specification of a conjugate prior on the means and variances through the function \code{priorControl}. } \item{control}{ A list of control parameters for EM. The defaults are set by the call \code{emControl()}. } \item{warn}{ A logical value indicating whether or not a warning should be issued whenever a singularity is encountered. The default is given in \code{mclust.options("warn")}. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the \emph{i}th observation belonging to the \emph{k}th component of the mixture. } \item{parameters}{ \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ The estimate of the reciprocal hypervolume of the data region used in the computation when the input indicates the addition of a noise component to the model. } } } \item{loglik}{ The log likelihood for the data in the mixture model. } \item{Attributes:}{ \code{"info"} Information on the iteration.\cr \code{"WARNING"} An appropriate warning if problems are encountered in the computations. } } \seealso{ \code{\link{me}}, \code{\link{mstep}}, \code{\link{mclustVariance}}, \code{\link{mclust.options}}. } \examples{ \donttest{ msEst <- mstepEEE(data = iris[,-5], z = unmap(iris[,5])) names(msEst) emEEE(data = iris[,-5], parameters = msEst$parameters)} } \keyword{cluster} mclust/man/coordProj.Rd0000644000175000017500000001271114124774626014735 0ustar nileshnilesh\name{coordProj} \alias{coordProj} \title{ Coordinate projections of multidimensional data modeled by an MVN mixture. } \description{ Plots coordinate projections given multidimensional data and parameters of an MVN mixture model for the data. } \usage{ coordProj(data, dimens = c(1,2), parameters = NULL, z = NULL, classification = NULL, truth = NULL, uncertainty = NULL, what = c("classification", "error", "uncertainty"), addEllipses = TRUE, fillEllipses = mclust.options("fillEllipses"), symbols = NULL, colors = NULL, scale = FALSE, xlim = NULL, ylim = NULL, cex = 1, PCH = ".", main = FALSE, \dots) } \arguments{ \item{data}{ A numeric matrix or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{dimens}{ A vector of length 2 giving the integer dimensions of the desired coordinate projections. The default is \code{c(1,2)}, in which the first dimension is plotted against the second. } \item{parameters}{ A named list giving the parameters of an \emph{MCLUST} model, used to produce superimposing ellipses on the plot. The relevant components are as follows: \describe{ \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{z}{ A matrix in which the \code{[i,k]}th entry gives the probability of observation \emph{i} belonging to the \emph{k}th class. Used to compute \code{classification} and \code{uncertainty} if those arguments aren't available. } \item{classification}{ A numeric or character vector representing a classification of observations (rows) of \code{data}. If present argument \code{z} will be ignored. } \item{truth}{ A numeric or character vector giving a known classification of each data point. If \code{classification} or \code{z} is also present, this is used for displaying classification errors. } \item{uncertainty}{ A numeric vector of values in \emph{(0,1)} giving the uncertainty of each data point. If present argument \code{z} will be ignored. } \item{what}{ Choose from one of the following three options: \code{"classification"} (default), \code{"error"}, \code{"uncertainty"}. } \item{addEllipses}{ A logical indicating whether or not to add ellipses with axes corresponding to the within-cluster covariances in case of \code{"classification"} or \code{"uncertainty"} plots. } \item{fillEllipses}{ A logical specifying whether or not to fill ellipses with transparent colors when \code{addEllipses = TRUE}. } \item{symbols}{ Either an integer or character vector assigning a plotting symbol to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given by \code{mclust.options("classPlotSymbols")}. } \item{colors}{ Either an integer or character vector assigning a color to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given by \code{mclust.options("classPlotColors")}. } \item{scale}{ A logical variable indicating whether or not the two chosen dimensions should be plotted on the same scale, and thus preserve the shape of the distribution. Default: \code{scale=FALSE} } \item{xlim, ylim}{ Arguments specifying bounds for the ordinate, abscissa of the plot. This may be useful for when comparing plots. } \item{cex}{ A numerical value specifying the size of the plotting symbols. The default value is 1. } \item{PCH}{ An argument specifying the symbol to be used when a classification has not been specified for the data. The default value is a small dot ".". } \item{main}{ A logical variable or \code{NULL} indicating whether or not to add a title to the plot identifying the dimensions used. } \item{\dots}{ Other graphics parameters. } } \value{ A plot showing a two-dimensional coordinate projection of the data, together with the location of the mixture components, classification, uncertainty, and/or classification errors. } \seealso{ \code{\link{clPairs}}, \code{\link{randProj}}, \code{\link{mclust2Dplot}}, \code{\link{mclust.options}} } \examples{ \donttest{ est <- meVVV(iris[,-5], unmap(iris[,5])) par(pty = "s", mfrow = c(1,1)) coordProj(iris[,-5], dimens=c(2,3), parameters = est$parameters, z = est$z, what = "classification", main = TRUE) coordProj(iris[,-5], dimens=c(2,3), parameters = est$parameters, z = est$z, truth = iris[,5], what = "error", main = TRUE) coordProj(iris[,-5], dimens=c(2,3), parameters = est$parameters, z = est$z, what = "uncertainty", main = TRUE) } } \keyword{cluster} mclust/man/clustCombi.Rd0000644000175000017500000001217614124774626015105 0ustar nileshnilesh\name{clustCombi} \alias{clustCombi} \alias{print.clustCombi} \alias{summary.clustCombi} \alias{print.summary.clustCombi} \title{ Combining Gaussian Mixture Components for Clustering } \description{ Provides a hierarchy of combined clusterings from the EM/BIC Gaussian mixture solution to one class, following the methodology proposed in the article cited in the references. } \usage{ clustCombi(object = NULL, data = NULL, \dots) } \arguments{ \item{object}{ An object returned by \code{\link{Mclust}} giving the optimal (according to BIC) parameters, conditional probabilities, and log-likelihood, together with the associated classification and its uncertainty. If not provided, the \code{data} argument must be specified. } \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. If the \code{object} argument is not provided, the function \code{\link{Mclust}} is applied to the given \code{data} to fit a mixture model.} \item{\dots}{ Optional arguments to be passed to called functions. Notably, any argument (such as the numbers of components for which the BIC is computed; the models to be fitted by EM; initialization parameters for the EM algorithm, etc.) to be passed to \code{\link{Mclust}} in case \code{object = NULL}. Please see the \code{\link{Mclust}} documentation for more details. } } \details{ Mclust provides a Gaussian mixture fitted to the data by maximum likelihood through the EM algorithm, for the model and number of components selected according to BIC. The corresponding components are hierarchically combined according to an entropy criterion, following the methodology described in the article cited in the references section. The solutions with numbers of classes between the one selected by BIC and one are returned as a \code{clustCombi} class object. } \value{ A list of class \code{clustCombi} giving the hierarchy of combined solutions from the number of components selected by BIC to one. The details of the output components are as follows: \item{classification}{A list of the data classifications obtained for each combined solution of the hierarchy through a MAP assignment} \item{combiM}{A list of matrices. \code{combiM[[K]]} is the matrix used to combine the components of the (K+1)-classes solution to get the K-classes solution. Please see the examples.} \item{combiz}{A list of matrices. \code{combiz[[K]]} is a matrix whose [i,k]th entry is the probability that observation i in the data belongs to the kth class according to the K-classes combined solution.} \item{MclustOutput}{A list of class \code{Mclust}. Output of a call to the Mclust function (as provided by the user or the result of a call to the Mclust function) used to initiate the combined solutions hierarchy: please see the \code{\link{Mclust}} function documentation for details.} } \references{ J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics, 19(2):332-353.} } \author{ J.-P. Baudry, A. E. Raftery, L. Scrucca } \seealso{ \code{\link{plot.clustCombi}} } \examples{ data(Baudry_etal_2010_JCGS_examples) # run Mclust using provided data output <- clustCombi(data = ex4.1) \donttest{ # or run Mclust and then clustcombi on the returned object mod <- Mclust(ex4.1) output <- clustCombi(mod) } output summary(output) \donttest{ # run Mclust using provided data and any further optional argument provided output <- clustCombi(data = ex4.1, modelName = "EEV", G = 1:15) } # plot the hierarchy of combined solutions plot(output, what = "classification") # plot some "entropy plots" which may help one to select the number of classes plot(output, what = "entropy") # plot the tree structure obtained from combining mixture components plot(output, what = "tree") # the selected model and number of components obtained from Mclust using BIC output$MclustOutput # the matrix whose [i,k]th entry is the probability that i-th observation in # the data belongs to the k-th class according to the BIC solution head( output$combiz[[output$MclustOutput$G]] ) # the matrix whose [i,k]th entry is the probability that i-th observation in # the data belongs to the k-th class according to the first combined solution head( output$combiz[[output$MclustOutput$G-1]] ) # the matrix describing how to merge the 6-classes solution to get the # 5-classes solution output$combiM[[5]] # for example the following code returns the label of the class (in the # 5-classes combined solution) to which the 4th class (in the 6-classes # solution) is assigned. Only two classes in the (K+1)-classes solution # are assigned the same class in the K-classes solution: the two which # are merged at this step output$combiM[[5]] %*% c(0,0,0,1,0,0) # recover the 5-classes soft clustering from the 6-classes soft clustering # and the 6 -> 5 combining matrix all( output$combiz[[5]] == t( output$combiM[[5]] \%*\% t(output$combiz[[6]]) ) ) # the hard clustering under the 5-classes solution head( output$classification[[5]] ) } \keyword{ cluster } mclust/man/summary.MclustSSC.Rd0000644000175000017500000000221413742016253016273 0ustar nileshnilesh\name{summary.MclustSSC} \alias{summary.MclustSSC} \alias{print.summary.MclustSSC} \title{Summarizing semi-supervised classification model based on Gaussian finite mixtures} \description{Summary method for class \code{"MclustSSC"}.} \usage{ \method{summary}{MclustSSC}(object, parameters = FALSE, \dots) \method{print}{summary.MclustSSC}(x, digits = getOption("digits"), \dots) } \arguments{ \item{object}{An object of class \code{'MclustSSC'} resulting from a call to \code{\link{MclustSSC}}.} \item{x}{An object of class \code{'summary.MclustSSC'}, usually, a result of a call to \code{summary.MclustSSC}.} \item{parameters}{Logical; if \code{TRUE}, the parameters of mixture components are printed.} \item{digits}{The number of significant digits to use when printing.} \item{\dots}{Further arguments passed to or from other methods.} } % \details{} \value{The function \code{summary.MclustSSC} computes and returns a list of summary statistics of the estimated MclustSSC model for semi-supervised classification.} \author{Luca Scrucca} % \note{} \seealso{\code{\link{MclustSSC}}, \code{\link{plot.MclustSSC}}.} \keyword{classification} mclust/man/plot.MclustDR.Rd0000644000175000017500000001142514124774626015447 0ustar nileshnilesh\name{plot.MclustDR} \alias{plot.MclustDR} \alias{plotEvalues.MclustDR} \title{Plotting method for dimension reduction for model-based clustering and classification} \description{ Graphs data projected onto the estimated subspace for model-based clustering and classification. } \usage{ \method{plot}{MclustDR}(x, dimens, what = c("scatterplot", "pairs", "contour", "classification", "boundaries", "density", "evalues"), symbols, colors, col.contour = gray(0.7), col.sep = grey(0.4), ngrid = 200, nlevels = 5, asp = NULL, \dots) } \arguments{ \item{x}{ An object of class \code{'MclustDR'} resulting from a call to \code{\link{MclustDR}}. } \item{dimens}{ A vector of integers giving the dimensions of the desired coordinate projections for multivariate data. } \item{what}{ The type of graph requested: \describe{ \item{\code{"scatterplot"} =}{a two-dimensional plot of data projected onto the first two directions specified by \code{dimens} and with data points marked according to the corresponding mixture component. By default, the first two directions are selected for plotting.} \item{\code{"pairs"} =}{a scatterplot matrix of data projected onto the estimated subspace and with data points marked according to the corresponding mixture component. By default, all the available directions are used, unless they have been specified by \code{dimens}.} \item{\code{"contour"} =}{a two-dimensional plot of data projected onto the first two directions specified by \code{dimens} (by default, the first two directions) with density contours for classes or clusters and data points marked according to the corresponding mixture component.} \item{\code{"classification"} =}{a two-dimensional plot of data projected onto the first two directions specified by \code{dimens} (by default, the first two directions) with classification region and data points marked according to the corresponding mixture component.} \item{\code{"boundaries"} =}{a two-dimensional plot of data projected onto the first two directions specified by \code{dimens} (by default, the first two directions) with uncertainty boundaries and data points marked according to the corresponding mixture component. The uncertainty is shown using a greyscale with darker regions indicating higher uncertainty. } \item{\code{"density"} =}{a one-dimensional plot of estimated density for the first direction specified by \code{dimens} (by default, the first one). A set of box-plots for each estimated cluster or known class are also shown at the bottom of the graph. } } } \item{symbols}{ Either an integer or character vector assigning a plotting symbol to each unique mixture component. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{factor}). The default is given by \code{mclust.options("classPlotSymbols")}. } \item{colors}{ Either an integer or character vector assigning a color to each unique cluster or known class. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{factor}). The default is given by \code{mclust.options("classPlotColors")}. } \item{col.contour}{ The color of contours in case \code{what = "contour"}. } \item{col.sep}{ The color of classification boundaries in case \code{what = "classification"}. } \item{ngrid}{ An integer specifying the number of grid points to use in evaluating the classification regions. } \item{nlevels}{ The number of levels to use in case \code{what = "contour"}. } \item{asp}{For scatterplots the \eqn{y/x} aspect ratio, see \code{\link{plot.window}}. } \item{\dots}{further arguments passed to or from other methods.} } %\details{} %\value{} \references{ Scrucca, L. (2010) Dimension reduction for model-based clustering. \emph{Statistics and Computing}, 20(4), pp. 471-484. } \author{Luca Scrucca} %\note{} \seealso{\link{MclustDR}} \examples{ \donttest{ mod <- Mclust(iris[,1:4], G = 3) dr <- MclustDR(mod, lambda = 0.5) plot(dr, what = "evalues") plot(dr, what = "pairs") plot(dr, what = "scatterplot", dimens = c(1,3)) plot(dr, what = "contour") plot(dr, what = "classification", ngrid = 200) plot(dr, what = "boundaries", ngrid = 200) plot(dr, what = "density") plot(dr, what = "density", dimens = 2) data(banknote) da <- MclustDA(banknote[,2:7], banknote$Status, G = 1:3) dr <- MclustDR(da) plot(dr, what = "evalues") plot(dr, what = "pairs") plot(dr, what = "contour") plot(dr, what = "classification", ngrid = 200) plot(dr, what = "boundaries", ngrid = 200) plot(dr, what = "density") plot(dr, what = "density", dimens = 2) } } \keyword{multivariate} mclust/man/cvMclustDA.Rd0000644000175000017500000000563014156711614014774 0ustar nileshnilesh\name{cvMclustDA} \alias{cvMclustDA} \title{MclustDA cross-validation} \description{ V-fold cross-validation for classification models based on Gaussian finite mixture modelling. } \usage{ cvMclustDA(object, nfold = 10, prop = object$prop, verbose = interactive(), \dots) } \arguments{ \item{object}{ An object of class \code{'MclustDA'} resulting from a call to \code{\link{MclustDA}}. } \item{nfold}{ An integer specifying the number of folds (by defaul 10-fold CV is used). } \item{prop}{ A vector of class prior probabilities, which if not provided default to the class proportions in the training data. } \item{verbose}{ A logical controlling if a text progress bar is displayed during the cross-validation procedure. By default is \code{TRUE} if the session is interactive, and \code{FALSE} otherwise. } \item{\dots }{Further arguments passed to or from other methods.} } \details{ The function implements V-fold cross-validation for classification models fitted by \code{\link{MclustDA}}. Classification error and Brier score are the metrics returned, but other metrics can be computed using the output returned by this function (see Examples section below). } \value{ The function returns a list with the following components: \item{classification}{a factor of cross-validated class labels.} \item{z}{a matrix containing the cross-validated probabilites for class assignment.} \item{ce}{the cross-validation classification error.} \item{se.ce}{the standard error of the cross-validated classification error.} \item{brier}{the cross-validation Brier score.} \item{se.brier}{the standard error of the cross-validated Brier score.} } \author{Luca Scrucca} \seealso{ \code{\link{MclustDA}}, \code{\link{predict.MclustDA}}, \code{\link{classError}}, \code{\link{BrierScore}} } \examples{ \donttest{ # Iris data Class <- iris$Species X <- iris[,1:4] ## EDDA model with common covariance (essentially equivalent to linear discriminant analysis) irisEDDA <- MclustDA(X, Class, modelType = "EDDA", modelNames = "EEE") cv <- cvMclustDA(irisEDDA) # 10-fold CV (default) str(cv) cv <- cvMclustDA(irisEDDA, nfold = length(Class)) # LOO-CV str(cv) ## MclustDA model selected by BIC irisMclustDA <- MclustDA(X, Class) cv <- cvMclustDA(irisMclustDA) # 10-fold CV (default) str(cv) # Banknote data data("banknote") Class <- banknote$Status X <- banknote[,2:7] ## EDDA model selected by BIC banknoteEDDA <- MclustDA(X, Class, modelType = "EDDA") cv <- cvMclustDA(banknoteEDDA) # 10-fold CV (default) str(cv) (ConfusionMatrix <- table(Pred = cv$classification, Class)) TP <- ConfusionMatrix[1,1] FP <- ConfusionMatrix[1,2] FN <- ConfusionMatrix[2,1] TN <- ConfusionMatrix[2,2] (Sensitivity <- TP/(TP+FN)) (Specificity <- TN/(FP+TN)) } } \keyword{multivariate} mclust/man/imputeData.Rd0000644000175000017500000000326014124774626015070 0ustar nileshnilesh\name{imputeData} \alias{imputeData} \alias{matchCluster} \title{Missing data imputation via the \pkg{mix} package} \description{ Imputes missing data using the \pkg{mix} package. } \usage{ imputeData(data, categorical = NULL, seed = NULL, verbose = interactive()) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations containing missing values. Categorical variables are allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{categorical}{ A logical vectors whose \emph{i}th entry is \code{TRUE} if the \emph{i}th variable or column of \code{data} is to be interpreted as categorical and \code{FALSE} otherwise. The default is to assume that a variable is to be interpreted as categorical only if it is a factor. } \item{seed}{ A seed for the function \code{rngseed} that is used to initialize the random number generator in \pkg{mix}. By default, a seed is chosen uniformly in the interval \code{(.Machine$integer.max/1024, .Machine$integer.max)}. } \item{verbose}{ A logical, if \code{TRUE} reports info about iterations of the algorithm. } } \value{ A dataset of the same dimensions as \code{data} with missing values filled in. } \references{ Schafer J. L. (1997). Analysis of Imcomplete Multivariate Data, Chapman and Hall. } \seealso{ \code{\link{imputePairs}} } \examples{ \donttest{ # Note that package 'mix' must be installed data(stlouis, package = "mix") # impute the continuos variables in the stlouis data stlimp <- imputeData(stlouis[,-(1:3)]) # plot imputed values imputePairs(stlouis[,-(1:3)], stlimp) } } \keyword{cluster} mclust/man/MclustBootstrap.Rd0000644000175000017500000001064514124774626016145 0ustar nileshnilesh\name{MclustBootstrap} \alias{MclustBootstrap} \alias{print.MclustBootstrap} \title{Resampling-based Inference for Gaussian finite mixture models} \description{Bootstrap or jackknife estimation of standard errors and percentile bootstrap confidence intervals for the parameters of a Gaussian mixture model.} \usage{ MclustBootstrap(object, nboot = 999, type = c("bs", "wlbs", "pb", "jk"), max.nonfit = 10*nboot, verbose = interactive(), \dots) } \arguments{ \item{object}{An object of class \code{'Mclust'} or \code{'densityMclust'} providing an estimated Gaussian mixture model.} \item{nboot}{The number of bootstrap replications.} \item{type}{A character string specifying the type of resampling to use: \describe{ \item{\code{"bs"}}{nonparametric bootstrap} \item{\code{"wlbs"}}{weighted likelihood bootstrap} \item{\code{"pb"}}{parametric bootstrap} \item{\code{"jk"}}{jackknife} } } \item{max.nonfit}{The maximum number of non-estimable models allowed.} \item{verbose}{A logical controlling if a text progress bar is displayed during the resampling procedure. By default is \code{TRUE} if the session is interactive, and \code{FALSE} otherwise.} \item{\dots}{Further arguments passed to or from other methods.} } \details{For a fitted Gaussian mixture model with \code{object$G} mixture components and covariances parameterisation \code{object$modelName}, this function returns either the bootstrap distribution or the jackknife distribution of mixture parameters. In the former case, the nonparametric bootstrap or the weighted likelihood bootstrap approach could be used, so the the bootstrap procedure generates \code{nboot} bootstrap samples of the same size as the original data by resampling with replacement from the observed data. In the jackknife case, the procedure considers all the samples obtained by omitting one observation at time. The resulting resampling distribution can then be used to obtain standard errors and percentile confidence intervals by the use of \code{\link{summary.MclustBootstrap}} function.} \value{An object of class \code{'MclustBootstrap'} with the following components: \item{n}{The number of observations in the data.} \item{d}{The dimension of the data.} \item{G}{A value specifying the number of mixture components.} \item{modelName}{A character string specifying the mixture model covariances parameterisation (see \code{\link{mclustModelNames}}).} \item{parameters}{A list of estimated parameters for the mixture components with the following components: \describe{ \item{\code{pro}}{a vector of mixing proportions.} \item{\code{mean}}{a matrix of means for each component.} \item{\code{variance}}{an array of covariance matrices for each component.} } } \item{nboot}{The number of bootstrap replications if \code{type = "bs"} or \code{type = "wlbs"}. The sample size if \code{type = "jk"}.} \item{type}{The type of resampling approach used.} \item{nonfit}{The number of resamples that did not convergence during the procedure.} \item{pro}{A matrix of dimension (\code{nboot} x \code{G}) containing the bootstrap distribution for the mixing proportion.} \item{mean}{An array of dimension (\code{nboot} x \code{d} x \code{G}), where \code{d} is the dimension of the data, containing the bootstrap distribution for the component means.} \item{variance}{An array of dimension (\code{nboot} x \code{d} x \code{d} x \code{G}), where \code{d} is the dimension of the data, containing the bootstrap distribution for the component covariances.} } \references{ Davison, A. and Hinkley, D. (1997) \emph{Bootstrap Methods and Their Applications}. Cambridge University Press. McLachlan, G.J. and Peel, D. (2000) \emph{Finite Mixture Models}. Wiley. O'Hagan A., Murphy T. B., Gormley I. C. and Scrucca L. (2015) On Estimation of Parameter Uncertainty in Model-Based Clustering. Submitted to \emph{Computational Statistics}. } \seealso{\code{\link{summary.MclustBootstrap}}, \code{\link{plot.MclustBootstrap}}, \code{\link{Mclust}}, \code{\link{densityMclust}}.} \examples{ \donttest{ data(diabetes) X <- diabetes[,-1] modClust <- Mclust(X) bootClust <- MclustBootstrap(modClust) summary(bootClust, what = "se") summary(bootClust, what = "ci") data(acidity) modDens <- densityMclust(acidity) modDens <- MclustBootstrap(modDens) summary(modDens, what = "se") summary(modDens, what = "ci") } } \keyword{htest} \keyword{cluster} mclust/man/mclustBICupdate.Rd0000644000175000017500000000245114124774626016024 0ustar nileshnilesh\name{mclustBICupdate} \alias{mclustBICupdate} \title{Update BIC values for parameterized Gaussian mixture models} \description{ Update the BIC (Bayesian Information Criterion) for parameterized Gaussian mixture models by taking the best from BIC results as returned by \code{\link{mclustBIC}}. } \usage{ mclustBICupdate(BIC, \dots) } \arguments{ \item{BIC}{Object of class \code{'mclustBIC'} containing the BIC values as returned by a call to \code{\link{mclustBIC}}. } \item{\dots}{Further objects of class \code{'mclustBIC'} to be merged.} } \value{ An object of class \code{'mclustBIC'} containing the best values obtained from merging the input arguments. Attributes are also updated according to the best BIC found, so calling \code{\link{Mclust}} on the resulting ouput will return the corresponding best model (see example). } \seealso{ \code{\link{mclustBIC}}, \code{\link{Mclust}}. } \examples{ \donttest{ data(galaxies, package = "MASS") galaxies <- galaxies / 1000 # use several random starting points BIC <- NULL for(j in 1:100) { rBIC <- mclustBIC(galaxies, verbose = FALSE, initialization = list(hcPairs = hcRandomPairs(galaxies))) BIC <- mclustBICupdate(BIC, rBIC) } pickBIC(BIC) plot(BIC) mod <- Mclust(galaxies, x = BIC) summary(mod) } } \keyword{cluster} mclust/man/hypvol.Rd0000644000175000017500000000301313175052576014306 0ustar nileshnilesh\name{hypvol} \alias{hypvol} \title{ Aproximate Hypervolume for Multivariate Data } \description{ Computes a simple approximation to the hypervolume of a multivariate data set. } \usage{ hypvol(data, reciprocal=FALSE) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{reciprocal}{ A logical variable indicating whether or not the reciprocal hypervolume is desired rather than the hypervolume itself. The default is to return the hypervolume. } } \value{ Returns the minimum of the hypervolume computed from simple variable bounds and that computed from variable bounds of the principal component scores. Used for the default hypervolume parameter for the noise component when observations are designated as noise in \code{Mclust} and \code{mclustBIC}. } \references{ A. Dasgupta and A. E. Raftery (1998). Detecting features in spatial point processes with clutter via model-based clustering. \emph{Journal of the American Statistical Association 93:294-302}. C. Fraley and A.E. Raftery (1998). \emph{Computer Journal 41:578-588}. C. Fraley and A. E. Raftery (2002). Model-based clustering, discriminant analysis, and density estimation. \emph{Journal of the American Statistical Association 97:611-631}. } \seealso{ \code{\link{mclustBIC}} } \examples{ hypvol(iris[,-5]) } \keyword{cluster} mclust/man/mvn.Rd0000644000175000017500000000577313752165054013601 0ustar nileshnilesh\name{mvn} \alias{mvn} \title{ Univariate or Multivariate Normal Fit } \description{ Computes the mean, covariance, and log-likelihood from fitting a single Gaussian to given data (univariate or multivariate normal). } \usage{ mvn( modelName, data, prior = NULL, warn = NULL, \dots) } \arguments{ \item{modelName}{ A character string representing a model name. This can be either \code{"Spherical"}, \code{"Diagonal"}, or \code{"Ellipsoidal"} or else \cr \code{"X"} for one-dimensional data,\cr \code{"XII"} for a spherical Gaussian, \cr \code{"XXI"} for a diagonal Gaussian \cr \code{"XXX"} for a general ellipsoidal Gaussian } \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{prior}{ Specification of a conjugate prior on the means and variances. The default assumes no prior. } \item{warn}{ A logical value indicating whether or not a warning should be issued whenever a singularity is encountered. The default is given by \code{mclust.options("warn")}. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{parameters}{ \describe{ \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{loglik}{ The log likelihood for the data in the mixture model. } \item{Attributes:}{ \code{"WARNING"} An appropriate warning if problems are encountered in the computations. } } \seealso{ \code{\link{mvnX}}, \code{\link{mvnXII}}, \code{\link{mvnXXI}}, \code{\link{mvnXXX}}, \code{\link{mclustModelNames}} } \examples{ n <- 1000 set.seed(0) x <- rnorm(n, mean = -1, sd = 2) mvn(modelName = "X", x) mu <- c(-1, 0, 1) set.seed(0) x <- sweep(matrix(rnorm(n*3), n, 3) \%*\% (2*diag(3)), MARGIN = 2, STATS = mu, FUN = "+") mvn(modelName = "XII", x) mvn(modelName = "Spherical", x) set.seed(0) x <- sweep(matrix(rnorm(n*3), n, 3) \%*\% diag(1:3), MARGIN = 2, STATS = mu, FUN = "+") mvn(modelName = "XXI", x) mvn(modelName = "Diagonal", x) Sigma <- matrix(c(9,-4,1,-4,9,4,1,4,9), 3, 3) set.seed(0) x <- sweep(matrix(rnorm(n*3), n, 3) \%*\% chol(Sigma), MARGIN = 2, STATS = mu, FUN = "+") mvn(modelName = "XXX", x) mvn(modelName = "Ellipsoidal", x) } \keyword{cluster} mclust/man/plot.MclustSSC.Rd0000644000175000017500000000342113742016344015556 0ustar nileshnilesh\name{plot.MclustSSC} \alias{plot.MclustSSC} \title{Plotting method for MclustSSC semi-supervised classification} \description{ Plots for semi-supervised classification based on Gaussian finite mixture models. } \usage{ \method{plot}{MclustSSC}(x, what = c("BIC", "classification", "uncertainty"), \dots) } \arguments{ \item{x}{ An object of class \code{'MclustSSC'} resulting from a call to \code{\link{MclustSSC}}. } \item{what}{ A string specifying the type of graph requested. Available choices are: \describe{ \item{\code{"BIC"} =}{plot of BIC values used for model selection, i.e. for choosing the model class covariances.} \item{\code{"classification"} =}{a plot of data with points marked based on the known and the predicted classification.} \item{\code{"uncertainty"} =}{a plot of classification uncertainty.} } If not specified, in interactive sessions a menu of choices is proposed. } \item{\dots}{further arguments passed to or from other methods. See \code{\link{plot.Mclust}}.} } %\value{} %\details{} \author{Luca Scrucca} \seealso{ \code{\link{MclustSSC}} } \examples{ X <- iris[,1:4] class <- iris$Species # randomly remove class labels set.seed(123) class[sample(1:length(class), size = 120)] <- NA table(class, useNA = "ifany") clPairs(X, ifelse(is.na(class), 0, class), symbols = c(0, 16, 17, 18), colors = c("grey", 4, 2, 3), main = "Partially classified data") # Fit semi-supervised classification model mod_SSC <- MclustSSC(X, class) summary(mod_SSC, parameters = TRUE) pred_SSC <- predict(mod_SSC) table(Predicted = pred_SSC$classification, Actual = class, useNA = "ifany") plot(mod_SSC, what = "BIC") plot(mod_SSC, what = "classification") plot(mod_SSC, what = "uncertainty") } \keyword{multivariate} mclust/man/hclass.Rd0000644000175000017500000000212214124774626014244 0ustar nileshnilesh\name{hclass} \alias{hclass} \title{ Classifications from Hierarchical Agglomeration } \description{ Determines the classifications corresponding to different numbers of groups given merge pairs from hierarchical agglomeration. } \usage{ hclass(hcPairs, G) } \arguments{ \item{hcPairs}{ A numeric two-column matrix in which the \emph{i}th row gives the minimum index for observations in each of the two clusters merged at the \emph{i}th stage of agglomerative hierarchical clustering. } \item{G}{ An integer or vector of integers giving the number of clusters for which the corresponding classfications are wanted. } } \value{ A matrix with \code{length(G)} columns, each column corresponding to a classification. Columns are indexed by the character representation of the integers in \code{G}. } \seealso{ \code{\link{hc}}, \code{\link{hcE}} } \examples{ hcTree <- hc(modelName="VVV", data = iris[,-5]) cl <- hclass(hcTree,c(2,3)) \donttest{ par(pty = "s", mfrow = c(1,1)) clPairs(iris[,-5],cl=cl[,"2"]) clPairs(iris[,-5],cl=cl[,"3"]) } } \keyword{cluster} mclust/man/BrierScore.Rd0000644000175000017500000001152413467346535015037 0ustar nileshnilesh\name{BrierScore} \alias{BrierScore} % R CMD Rd2pdf BrierScore.Rd \title{Brier score to assess the accuracy of probabilistic predictions} \description{ The Brier score is a proper score function that measures the accuracy of probabilistic predictions.} \usage{ BrierScore(z, class) } \arguments{ \item{z}{ a matrix containing the predicted probabilities of each observation to be classified in one of the classes. Thus, the number of rows must match the length of \code{class}, and the number of columns the number of known classes. } \item{class}{ a numeric, character vector or factor containing the known class labels for each observation. If \code{class} is a factor, the number of classes is \code{nlevels(class)} with classes \code{levels(class)}. If \code{class} is a numeric or character vector, the number of classes is equal to the number of classes obtained via \code{unique(class)}. } } \details{ The Brier Score is the mean square difference between the true classes and the predicted probabilities. This function implements the original multi-class definition by Brier (1950), normalized to \eqn{[0,1]} as in Kruppa et al (2014). The formula is the following: \deqn{ BS = \frac{1}{2n} \sum_{i=1}^n \sum_{k=1}^K (C_{ik} - p_{ik})^2 } where \eqn{n} is the number of observations, \eqn{K} the number of classes, \eqn{C_{ik} = \{0,1\}} the indicator of class \eqn{k} for observation \eqn{i}, and \eqn{p_{ik}} is the predicted probability of observation \eqn{i} to belong to class \eqn{k}. The above formulation is applicable to multi-class predictions, including the binary case. A small value of the Brier Score indicates high prediction accuracy. The Brier Score is a strictly proper score (Gneiting and Raftery, 2007), which means that it takes its minimal value only when the predicted probabilities match the empirical probabilities. } \references{ Brier, G.W. (1950) Verification of forecasts expressed in terms of probability. \emph{Monthly Weather Review}, 78 (1): 1-3. Gneiting, G. and Raftery, A. E. (2007) Strictly proper scoring rules, prediction, and estimation. \emph{Journal of the American Statistical Association} 102 (477): 359-378. Kruppa, J., Liu, Y., Diener, H.-C., Holste, T., Weimar, C., Koonig, I. R., and Ziegler, A. (2014) Probability estimation with machine learning methods for dichotomous and multicategory outcome: Applications. \emph{Biometrical Journal}, 56 (4): 564-583. } \seealso{\code{\link{cvMclustDA}}} \examples{ # multi-class case class <- factor(c(5,5,5,2,5,3,1,2,1,1), levels = 1:5) probs <- matrix(c(0.15, 0.01, 0.08, 0.23, 0.01, 0.23, 0.59, 0.02, 0.38, 0.45, 0.36, 0.05, 0.30, 0.46, 0.15, 0.13, 0.06, 0.19, 0.27, 0.17, 0.40, 0.34, 0.18, 0.04, 0.47, 0.34, 0.32, 0.01, 0.03, 0.11, 0.04, 0.04, 0.09, 0.05, 0.28, 0.27, 0.02, 0.03, 0.12, 0.25, 0.05, 0.56, 0.35, 0.22, 0.09, 0.03, 0.01, 0.75, 0.20, 0.02), nrow = 10, ncol = 5) cbind(class, probs, map = map(probs)) BrierScore(probs, class) # two-class case class <- factor(c(1,1,1,2,2,1,1,2,1,1), levels = 1:2) probs <- matrix(c(0.91, 0.4, 0.56, 0.27, 0.37, 0.7, 0.97, 0.22, 0.68, 0.43, 0.09, 0.6, 0.44, 0.73, 0.63, 0.3, 0.03, 0.78, 0.32, 0.57), nrow = 10, ncol = 2) cbind(class, probs, map = map(probs)) BrierScore(probs, class) # two-class case when predicted probabilities are constrained to be equal to # 0 or 1, then the (normalized) Brier Score is equal to the classification # error rate probs <- ifelse(probs > 0.5, 1, 0) cbind(class, probs, map = map(probs)) BrierScore(probs, class) classError(map(probs), class)$errorRate # plot Brier score for predicted probabilities in range [0,1] class <- factor(rep(1, each = 100), levels = 0:1) prob <- seq(0, 1, by = 0.01) brier <- sapply(prob, function(p) { z <- matrix(c(1-p,p), nrow = length(class), ncol = 2, byrow = TRUE) BrierScore(z, class) }) plot(prob, brier, type = "l", main = "Scoring all one class", xlab = "Predicted probability", ylab = "Brier score") # brier score for predicting balanced data with constant prob class <- factor(rep(c(1,0), each = 50), levels = 0:1) prob <- seq(0, 1, by = 0.01) brier <- sapply(prob, function(p) { z <- matrix(c(1-p,p), nrow = length(class), ncol = 2, byrow = TRUE) BrierScore(z, class) }) plot(prob, brier, type = "l", main = "Scoring balanced classes", xlab = "Predicted probability", ylab = "Brier score") # brier score for predicting unbalanced data with constant prob class <- factor(rep(c(0,1), times = c(90,10)), levels = 0:1) prob <- seq(0, 1, by = 0.01) brier <- sapply(prob, function(p) { z <- matrix(c(1-p,p), nrow = length(class), ncol = 2, byrow = TRUE) BrierScore(z, class) }) plot(prob, brier, type = "l", main = "Scoring unbalanced classes", xlab = "Predicted probability", ylab = "Brier score") } \keyword{classif} mclust/man/cross.Rd0000644000175000017500000000102514124774626014121 0ustar nileshnilesh\name{cross} \alias{cross} \title{Simulated Cross Data} \usage{data(cross)} \description{ A 500 by 3 matrix in which the first column is the classification and the remaining columns are two data from a simulation of two crossed elliptical Gaussians. } \examples{ # This dataset was created as follows \donttest{ n <- 250 set.seed(0) cross <- rbind(matrix(rnorm(n*2), n, 2) \%*\% diag(c(1,9)), matrix(rnorm(n*2), n, 2) \%*\% diag(c(1,9))[,2:1]) cross <- cbind(c(rep(1,n),rep(2,n)), cross) } } \keyword{datasets} mclust/man/acidity.Rd0000644000175000017500000000230514020171050014371 0ustar nileshnilesh\name{acidity} \alias{acidity} \docType{data} \title{Acidity data} \description{ Acidity index measured in a sample of 155 lakes in the Northeastern United States. Following Crawford et al. (1992, 1994), the data are expressed as log(ANC+50), where ANC is the acidity neutralising capacity value. The data were also used to fit mixture of gaussian distributions by Richardson and Green (1997), and by McLachlan and Peel (2000, Sec. 6.6.2). } \usage{data(acidity)} \source{\url{http://www.stats.bris.ac.uk/~peter/mixdata}} \references{ Crawford, S. L. (1994) An application of the Laplace method to finite mixture distribution. \emph{Journal of the American Statistical Association}, 89, 259--267. Crawford, S. L., DeGroot, M. H., Kadane, J. B., and Small, M. J. (1994) Modeling lake chemistry distributions: Approximate Bayesian methods for estimating a finite mixture model. \emph{Technometrics}, 34, 441--453. McLachlan, G. and Peel, D. (2000) \emph{Finite Mixture Models}. Wiley, New York. Richardson, S. and Green, P. J. (1997) On Bayesian analysis of mixtures with unknown number of components (with discussion). \emph{Journal of the Royal Statistical Society, Series B}, 59, 731--792. } \keyword{datasets} mclust/man/sigma2decomp.Rd0000644000175000017500000000546413752165052015346 0ustar nileshnilesh\name{sigma2decomp} \alias{sigma2decomp} \title{ Convert mixture component covariances to decomposition form. } \description{ Converts a set of covariance matrices from representation as a 3-D array to a parameterization by eigenvalue decomposition. } \usage{ sigma2decomp(sigma, G = NULL, tol = sqrt(.Machine$double.eps), \dots) } \arguments{ \item{sigma}{ Either a 3-D array whose [,,k]th component is the covariance matrix for the kth component in an MVN mixture model, or a single covariance matrix in the case that all components have the same covariance. } \item{G}{ The number of components in the mixture. When \code{sigma} is a 3-D array, the number of components can be inferred from its dimensions. } \item{tol}{ Tolerance for determining whether or not the covariances have equal volume, shape, and or orientation. The default is the square root of the relative machine precision, \code{sqrt(.Machine$double.eps)}, which is about \code{1.e-8}. } \item{\dots}{ Catches unused arguments from an indirect or list call via \code{do.call}. } } \value{ The covariance matrices for the mixture components in decomposition form, including the following components: \item{modelName}{ A character string indicating the infered model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{d}{ The dimension of the data. } \item{G}{ The number of components in the mixture model. } \item{scale}{ Either a \emph{G}-vector giving the scale of the covariance (the \emph{d}th root of its determinant) for each component in the mixture model, or a single numeric value if the scale is the same for each component. } \item{shape}{ Either a \emph{G} by \emph{d} matrix in which the \emph{k}th column is the shape of the covariance matrix (normalized to have determinant 1) for the \emph{k}th component, or a \emph{d}-vector giving a common shape for all components. } \item{orientation}{ Either a \emph{d} by \emph{d} by \emph{G} array whose \code{[,,k]}th entry is the orthonomal matrix whose columns are the eigenvectors of the covariance matrix of the \emph{k}th component, or a \emph{d} by \emph{d} orthonormal matrix if the mixture components have a common orientation. The \code{orientation} component of \code{decomp} can be omitted in spherical and diagonal models, for which the principal components are parallel to the coordinate axes so that the orientation matrix is the identity. } } \seealso{ \code{\link{decomp2sigma}} } \examples{ meEst <- meEEE(iris[,-5], unmap(iris[,5])) names(meEst$parameters$variance) meEst$parameters$variance$Sigma sigma2decomp(meEst$parameters$variance$Sigma, G = length(unique(iris[,5]))) } \keyword{cluster} mclust/man/clPairs.Rd0000644000175000017500000000761613656731011014367 0ustar nileshnilesh\name{clPairs} \alias{clPairs} \alias{clPairsLegend} \title{Pairwise Scatter Plots showing Classification} \description{ Creates a scatter plot for each pair of variables in given data. Observations in different classes are represented by different colors and symbols. } \usage{ clPairs(data, classification, symbols = NULL, colors = NULL, cex = NULL, labels = dimnames(data)[[2]], cex.labels = 1.5, gap = 0.2, grid = FALSE, \dots) clPairsLegend(x, y, class, col, pch, cex, box = TRUE, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{classification}{ A numeric or character vector representing a classification of observations (rows) of \code{data}. } \item{symbols}{ Either an integer or character vector assigning a plotting symbol to each unique class in \code{classification}. Elements in \code{symbols} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given by \code{mclust.options("classPlotSymbols")}. } \item{colors}{ Either an integer or character vector assigning a color to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given by \code{mclust.options("classPlotColors")}. } \item{cex}{ A vector of numerical values specifying the size of the plotting symbol for each unique class in \code{classification}. Values in \code{cex} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). By default \code{cex = 1} for all classes is used. } \item{labels}{ A vector of character strings for labelling the variables. The default is to use the column dimension names of \code{data}. } \item{cex.labels}{ A numerical value specifying the size of the text labels. } \item{gap}{ An argument specifying the distance between subplots (see \code{\link{pairs}}). } \item{grid}{ A logical specifying if grid lines should be added to panels (see \code{\link{grid}}). } \item{x,y}{ The x and y co-ordinates with respect to a graphic device having plotting region coordinates \code{par("usr" = c(0,1,0,1))}. } \item{class}{ The class labels. } \item{box}{ A logical, if \code{TRUE} then a box is drawn around the current plot figure. } \item{col, pch}{ The colors and plotting symbols appearing in the legend. } \item{\dots}{ For a \code{clPairs} call may be additional arguments to be passed to \code{\link{pairs}}. For a \code{clPairsLegend} call may be additional arguments to be passed to \code{\link{legend}}. } } \details{ The function \code{clPairs()} draws scatter plots on the current graphics device for each combination of variables in \code{data}. Observations of different classifications are labeled with different symbols. The function \code{clPairsLegend()} can be used to add a legend. See examples below. } \value{ The function \code{clPairs()} invisibly returns a list with the following components: \item{class}{A character vector of class labels.} \item{col}{A vector of colors used for each class.} \item{pch}{A vector of plotting symbols used for each class.} } \seealso{ \code{\link{pairs}}, \code{\link{coordProj}}, \code{\link{mclust.options}} } \examples{ clPairs(iris[,1:4], cl = iris$Species) clp <- clPairs(iris[,1:4], cl = iris$Species, lower.panel = NULL) clPairsLegend(0.1, 0.4, class = clp$class, col = clp$col, pch = clp$pch, title = "Iris data") } \keyword{cluster} mclust/man/dmvnorm.Rd0000644000175000017500000000257313562733761014463 0ustar nileshnilesh\name{dmvnorm} \alias{dmvnorm} \title{Density of multivariate Gaussian distribution} \description{ Efficiently computes the density of observations for a generic multivariate Gaussian distribution. } \usage{ dmvnorm(data, mean, sigma, log = FALSE) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{mean}{ A vector of means for each variable. } \item{sigma}{ A positive definite covariance matrix. } \item{log}{ A logical value indicating whether or not the logarithm of the densities should be returned. } } \value{ A numeric vector whose \emph{i}th element gives the density of the \emph{ith} observation in \code{data} for the multivariate Gaussian distribution with parameters \code{mean} and \code{sigma}. } \seealso{ \code{\link{dnorm}}, \code{\link{dens}} } \examples{ # univariate ngrid <- 101 x <- seq(-5, 5, length = ngrid) dens <- dmvnorm(x, mean = 1, sigma = 5) plot(x, dens, type = "l") # bivariate ngrid <- 101 x1 <- x2 <- seq(-5, 5, length = ngrid) mu <- c(1,0) sigma <- matrix(c(1,0.5,0.5,2), 2, 2) dens <- dmvnorm(as.matrix(expand.grid(x1, x2)), mu, sigma) dens <- matrix(dens, ngrid, ngrid) image(x1, x2, dens) contour(x1, x2, dens, add = TRUE) } mclust/man/cdfMclust.Rd0000644000175000017500000000403614154456176014721 0ustar nileshnilesh\name{cdfMclust} \alias{cdfMclust} \alias{quantileMclust} \title{ Cumulative Distribution and Quantiles for a univariate Gaussian mixture distribution } \description{ Compute the cumulative density function (cdf) or quantiles from an estimated one-dimensional Gaussian mixture fitted using \code{\link{densityMclust}}.} \usage{ cdfMclust(object, data, ngrid = 100, \dots) quantileMclust(object, p, \dots) } \arguments{ \item{object}{a \code{densityMclust} model object.} \item{data}{a numeric vector of evaluation points.} \item{ngrid}{the number of points in a regular grid to be used as evaluation points if no \code{data} are provided.} \item{p}{a numeric vector of probabilities.} \item{\dots}{further arguments passed to or from other methods.} } \details{The cdf is evaluated at points given by the optional argument \code{data}. If not provided, a regular grid of length \code{ngrid} for the evaluation points is used. The quantiles are computed using bisection linear search algorithm. } \value{ \code{cdfMclust} returns a list of \code{x} and \code{y} values providing, respectively, the evaluation points and the estimated cdf. \code{quantileMclust} returns a vector of quantiles. } \author{Luca Scrucca} \seealso{ \code{\link{densityMclust}}, \code{\link{plot.densityMclust}}. } \examples{ x <- c(rnorm(100), rnorm(100, 3, 2)) dens <- densityMclust(x) summary(dens, parameters = TRUE) cdf <- cdfMclust(dens) str(cdf) q <- quantileMclust(dens, p = c(0.01, 0.1, 0.5, 0.9, 0.99)) cbind(quantile = q, cdf = cdfMclust(dens, q)$y) plot(cdf, type = "l", xlab = "x", ylab = "CDF") points(q, cdfMclust(dens, q)$y, pch = 20, col = "red3") par(mfrow = c(2,2)) dens.waiting <- densityMclust(faithful$waiting) plot(dens.waiting) plot(cdfMclust(dens.waiting), type = "l", xlab = dens.waiting$varname, ylab = "CDF") dens.eruptions <- densityMclust(faithful$eruptions) plot(dens.eruptions) plot(cdfMclust(dens.eruptions), type = "l", xlab = dens.eruptions$varname, ylab = "CDF") par(mfrow = c(1,1)) } \keyword{cluster} \keyword{dplot} mclust/man/classError.Rd0000644000175000017500000000375113467317376015123 0ustar nileshnilesh\name{classError} \alias{classError} \title{Classification error} \description{ Computes the errore rate of a given classification relative to the known classes, and the location of misclassified data points.} \usage{ classError(classification, class) } \arguments{ \item{classification}{ A numeric, character vector or factor specifying the predicted class labels. Must have the same length as \code{class}. } \item{class}{ A numeric, character vector or factor of known true class labels. Must have the same length as \code{classification}. } } \value{ A list with the following two components: \item{misclassified}{ The indexes of the misclassified data points in a minimum error mapping between the predicted classification and the known true classes. } \item{errorRate}{ The error rate corresponding to a minimum error mapping between the predicted classification and the known true classes. } } \details{ If more than one mapping between predicted classification and the known truth corresponds to the minimum number of classification errors, only one possible set of misclassified observations is returned. } \seealso{ \code{\link{map}} \code{\link{mapClass}}, \code{\link{table}} } \examples{ (a <- rep(1:3, 3)) (b <- rep(c("A", "B", "C"), 3)) classError(a, b) (a <- sample(1:3, 9, replace = TRUE)) (b <- sample(c("A", "B", "C"), 9, replace = TRUE)) classError(a, b) class <- factor(c(5,5,5,2,5,3,1,2,1,1), levels = 1:5) probs <- matrix(c(0.15, 0.01, 0.08, 0.23, 0.01, 0.23, 0.59, 0.02, 0.38, 0.45, 0.36, 0.05, 0.30, 0.46, 0.15, 0.13, 0.06, 0.19, 0.27, 0.17, 0.40, 0.34, 0.18, 0.04, 0.47, 0.34, 0.32, 0.01, 0.03, 0.11, 0.04, 0.04, 0.09, 0.05, 0.28, 0.27, 0.02, 0.03, 0.12, 0.25, 0.05, 0.56, 0.35, 0.22, 0.09, 0.03, 0.01, 0.75, 0.20, 0.02), nrow = 10, ncol = 5) cbind(class, probs, map = map(probs)) classError(map(probs), class) } \keyword{cluster} mclust/man/mclustVariance.Rd0000644000175000017500000000743613175053400015745 0ustar nileshnilesh\name{mclustVariance} \alias{mclustVariance} \title{ Template for variance specification for parameterized Gaussian mixture models } \description{ Specification of variance parameters for the various types of Gaussian mixture models. } \usage{ mclustVariance(modelName, d = NULL, G = 2) } \arguments{ \item{modelName}{A character string specifying the model.} \item{d}{A integer specifying the dimension of the data.} \item{G}{An integer specifying the number of components in the mixture model.} } \details{The \code{variance} component in the {parameters} list from the output to e.g. \code{me} or \code{mstep} or input to e.g. \code{estep} may contain one or more of the following arguments, depending on the model: \describe{ \item{\code{modelName}}{ A character string indicating the model. } \item{\code{d}}{ The dimension of the data. } \item{\code{G}}{ The number of components in the mixture model. } \item{\code{sigmasq}}{ for the one-dimensional models (\code{"E"}, \code{"V"}) and spherical models (\code{"EII"}, \code{"VII"}). This is either a vector whose \emph{k}th component is the variance for the \emph{k}th component in the mixture model (\code{"V"} and \code{"VII"}), or a scalar giving the common variance for all components in the mixture model (\code{"E"} and \code{"EII"}). } \item{\code{Sigma}}{ For the equal variance models \code{"EII"}, \code{"EEI"}, and \code{"EEE"}. A \emph{d} by \emph{d} matrix giving the common covariance for all components of the mixture model. } \item{\code{cholSigma}}{ For the equal variance model {"EEE"}. A \emph{d} by \emph{d} upper triangular matrix giving the Cholesky factor of the common covariance for all components of the mixture model. } \item{\code{sigma}}{ For all multidimensional mixture models. A \emph{d} by \emph{d} by \emph{G} matrix array whose \code{[,,k]}th entry is the covariance matrix for the \emph{k}th component of the mixture model. } \item{\code{cholsigma}}{ For the unconstrained covariance mixture model \code{"VVV"}. A \emph{d} by \emph{d} by \emph{G} matrix array whose \code{[,,k]}th entry is the upper triangular Cholesky factor of the covariance matrix for the \emph{k}th component of the mixture model. } \item{\code{scale}}{ For diagonal models \code{"EEI"}, \code{"EVI"}, \code{"VEI"}, \code{"VVI"} and constant-shape models \code{"EEV"} and \code{"VEV"}. Either a \emph{G}-vector giving the scale of the covariance (the \emph{d}th root of its determinant) for each component in the mixture model, or a single numeric value if the scale is the same for each component. } \item{\code{shape}}{ For diagonal models \code{"EEI"}, \code{"EVI"}, \code{"VEI"}, \code{"VVI"} and constant-shape models \code{"EEV"} and \code{"VEV"}. Either a \emph{G} by \emph{d} matrix in which the \emph{k}th column is the shape of the covariance matrix (normalized to have determinant 1) for the \emph{k}th component, or a \emph{d}-vector giving a common shape for all components. } \item{\code{orientation}}{ For the constant-shape models \code{"EEV"} and \code{"VEV"}. Either a \emph{d} by \emph{d} by \emph{G} array whose \code{[,,k]}th entry is the orthonomal matrix whose columns are the eigenvectors of the covariance matrix of the \emph{k}th component, or a \emph{d} by \emph{d} orthonormal matrix if the mixture components have a common orientation. The \code{orientation} component is not needed in spherical and diagonal models, since the principal components are parallel to the coordinate axes so that the orientation matrix is the identity. } } In all cases, the value \code{-1} is used as a placeholder for unknown nonzero entries. } \keyword{cluster} mclust/man/gmmhd.Rd0000644000175000017500000001202114124774626014062 0ustar nileshnilesh\name{gmmhd} \alias{gmmhd} \alias{print.gmmhd} \alias{summary.gmmhd} \alias{print.summary.gmmhd} \alias{plot.gmmhd} \alias{gmmhdClusterCores} \alias{gmmhdClassify} \title{Identifying Connected Components in Gaussian Finite Mixture Models for Clustering} \description{ Starting with the density estimate obtained from a fitted Gaussian finite mixture model, cluster cores are identified from the connected components at a given density level. Once cluster cores are identified, the remaining observations are allocated to those cluster cores for which the probability of cluster membership is the highest. } \usage{ gmmhd(object, ngrid = min(round((log(nrow(data)))*10), nrow(data)), dr = list(d = 3, lambda = 1, cumEvalues = NULL, mindir = 2), classify = list(G = 1:5, modelNames = mclust.options("emModelNames")[-c(8, 10)]), \dots) \method{plot}{gmmhd}(x, what = c("mode", "cores", "clusters"), \dots) } \arguments{ \item{object}{An object returned by \code{\link{Mclust}}.} \item{ngrid}{An integer specifying the number of grid points used to compute the density levels.} \item{dr}{A list of parameters used in the dimension reduction step.} \item{classify}{A list of parameters used in the classification step.} \item{x}{An object of class \code{'gmmhd'} as returned by the function \code{gmmhd}.} \item{what}{A string specifying the type of plot to be produced. See Examples section.} \item{\dots}{further arguments passed to or from other methods.} } \details{ Model-based clustering associates each component of a finite mixture distribution to a group or cluster. An underlying implicit assumption is that a one-to-one correspondence exists between mixture components and clusters. However, a single Gaussian density may not be sufficient, and two or more mixture components could be needed to reasonably approximate the distribution within a homogeneous group of observations. This function implements the methodology proposed by Scrucca (2016) based on the identification of high density regions of the underlying density function. Starting with an estimated Gaussian finite mixture model, the corresponding density estimate is used to identify the cluster cores, i.e. those data points which form the core of the clusters. These cluster cores are obtained from the connected components at a given density level \eqn{c}. A mode function gives the number of connected components as the level \eqn{c} is varied. Once cluster cores are identified, the remaining observations are allocated to those cluster cores for which the probability of cluster membership is the highest. The method usually improves the identification of non-Gaussian clusters compared to a fully parametric approach. Furthermore, it enables the identification of clusters which cannot be obtained by merging mixture components, and it can be straightforwardly extended to cases of higher dimensionality. } \value{ A list of class \code{gmmhd} with the following components: \item{Mclust}{The input object of class \code{"Mclust"} representing an estimated Gaussian finite mixture model.} \item{MclustDA}{An object of class \code{"MclustDA"} containing the model used for the classification step.} \item{MclustDR}{An object of class \code{"MclustDR"} containing the dimension reduction step if performed, otherwise \code{NULL}.} \item{x}{The data used in the algorithm. This can be the input data or a projection if a preliminary dimension reduction step is performed.} \item{density}{The density estimated from the input Gaussian finite mixture model evaluated at the input data.} \item{con}{A list of connected components at each step.} \item{nc}{A vector giving the number of connected components (i.e. modes) at each step.} \item{pn}{Vector of values over a uniform grid of proportions of length \code{ngrid}.} \item{qn}{Vector of density quantiles corresponding to proportions \code{pn}.} \item{pc}{Vector of empirical proportions corresponding to quantiles \code{qn}.} \item{clusterCores}{Vector of cluster cores numerical labels; \code{NA}s indicate that an observation does not belong to any cluster core.} \item{clusterCores}{Vector of numerical labels giving the final clustering.} \item{numClusters}{An integer giving the number of clusters.} } \references{ Scrucca, L. (2016) Identifying connected components in Gaussian finite mixture models for clustering. \emph{Computational Statistics & Data Analysis}, 93, 5-17. } \author{ Luca Scrucca \email{luca.scrucca@unipg.it} } %\note{} \seealso{\code{\link{Mclust}}} \examples{ \donttest{ data(faithful) mod <- Mclust(faithful) summary(mod) plot(as.densityMclust(mod), faithful, what = "density", points.pch = mclust.options("classPlotSymbols")[mod$classification], points.col = mclust.options("classPlotColors")[mod$classification]) GMMHD <- gmmhd(mod) summary(GMMHD) plot(GMMHD, what = "mode") plot(GMMHD, what = "cores") plot(GMMHD, what = "clusters") } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. Use one of RShowDoc("KEYWORDS") \keyword{cluster} mclust/man/imputePairs.Rd0000644000175000017500000000517514124774626015304 0ustar nileshnilesh\name{imputePairs} \alias{imputePairs} \title{ Pairwise Scatter Plots showing Missing Data Imputations } \description{ Creates a scatter plot for each pair of variables in given data, allowing display of imputations for missing values in different colors and symbols than non missing values. } \usage{ imputePairs(data, dataImp, symbols = c(1,16), colors = c("black", "red"), labels, panel = points, \dots, lower.panel = panel, upper.panel = panel, diag.panel = NULL, text.panel = textPanel, label.pos = 0.5 + has.diag/3, cex.labels = NULL, font.labels = 1, row1attop = TRUE, gap = 0.2) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations containing missing values. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{dataImp}{ The dataset \code{data} with missing values imputed. } \item{symbols}{ Either an integer or character vector assigning plotting symbols to the nonmissing data and impued values, respectively. The default is a closed circle for the nonmissing data and an open circle for the imputed values. } \item{colors}{ Either an integer or character vector assigning colors to the nonmissing data and impued values, respectively. The default is black for the nonmissing data and red for the imputed values. } \item{labels}{ As in function \code{pairs}. } \item{panel}{ As in function \code{pairs}. } \item{\dots}{ As in function \code{pairs}. } \item{lower.panel}{ As in function \code{pairs}. } \item{upper.panel}{ As in function \code{pairs}. } \item{diag.panel}{ As in function \code{pairs}. } \item{text.panel}{ As in function \code{pairs}. } \item{label.pos}{ As in function \code{pairs}. } \item{cex.labels}{ As in function \code{pairs}. } \item{font.labels}{ As in function \code{pairs}. } \item{row1attop}{ As in function \code{pairs}. } \item{gap}{ As in function \code{pairs}. } } \value{ A pairs plot displaying the location of missing and nonmissing values. } \references{ Schafer J. L. (1997). Analysis of Imcomplete Multivariate Data, Chapman and Hall. } \seealso{ \code{\link{pairs}}, \code{\link{imputeData}} } \examples{ \donttest{ # Note that package 'mix' must be installed data(stlouis, package = "mix") # impute the continuos variables in the stlouis data stlimp <- imputeData(stlouis[,-(1:3)]) # plot imputed values imputePairs(stlouis[,-(1:3)], stlimp) } } \keyword{cluster} mclust/man/decomp2sigma.Rd0000644000175000017500000000413214124774626015344 0ustar nileshnilesh\name{decomp2sigma} \alias{decomp2sigma} \title{ Convert mixture component covariances to matrix form } \description{ Converts covariances from a parameterization by eigenvalue decomposition or cholesky factorization to representation as a 3-D array. } \usage{ decomp2sigma(d, G, scale, shape, orientation, \dots) } \arguments{ \item{d}{ The dimension of the data. } \item{G}{ The number of components in the mixture model. } \item{scale}{ Either a \emph{G}-vector giving the scale of the covariance (the \emph{d}th root of its determinant) for each component in the mixture model, or a single numeric value if the scale is the same for each component. } \item{shape}{ Either a \emph{G} by \emph{d} matrix in which the \emph{k}th column is the shape of the covariance matrix (normalized to have determinant 1) for the \emph{k}th component, or a \emph{d}-vector giving a common shape for all components. } \item{orientation}{ Either a \emph{d} by \emph{d} by \emph{G} array whose \code{[,,k]}th entry is the orthonomal matrix whose columns are the eigenvectors of the covariance matrix of the \emph{k}th component, or a \emph{d} by \emph{d} orthonormal matrix if the mixture components have a common orientation. The \code{orientation} component of \code{decomp} can be omitted in spherical and diagonal models, for which the principal components are parallel to the coordinate axes so that the orientation matrix is the identity. } \item{\dots}{ Catches unused arguments from an indirect or list call via \code{do.call}. } } \value{ A 3-D array whose \code{[,,k]}th component is the covariance matrix of the \emph{k}th component in an MVN mixture model. } \seealso{ \code{\link{sigma2decomp}} } \examples{ meEst <- meVEV(iris[,-5], unmap(iris[,5])) names(meEst) meEst$parameters$variance dec <- meEst$parameters$variance decomp2sigma(d=dec$d, G=dec$G, shape=dec$shape, scale=dec$scale, orientation = dec$orientation) \donttest{ do.call("decomp2sigma", dec) ## alternative call } } \keyword{cluster} mclust/man/map.Rd0000644000175000017500000000225413175052667013551 0ustar nileshnilesh\name{map} \alias{map} \title{Classification given Probabilities} \description{ Converts a matrix in which each row sums to 1 to an integer vector specifying for each row the column index of the maximum. } \usage{ map(z, warn = mclust.options("warn"), \dots) } \arguments{ \item{z}{ A matrix (for example a matrix of conditional probabilities in which each row sums to 1 as produced by the E-step of the EM algorithm). } \item{warn}{ A logical variable indicating whether or not a warning should be issued when there are some columns of \code{z} for which no row attains a maximum. } \item{\dots }{ Provided to allow lists with elements other than the arguments can be passed in indirect or list calls with \code{do.call}. } } \value{ A integer vector with one entry for each row of z, in which the \emph{i}-th value is the column index at which the \emph{i}-th row of \code{z} attains a maximum. } \seealso{ \code{\link{unmap}}, \code{\link{estep}}, \code{\link{em}}, \code{\link{me}}. } \examples{ emEst <- me(modelName = "VVV", data = iris[,-5], z = unmap(iris[,5])) map(emEst$z) } \keyword{cluster} % docclass is function mclust/man/GvHD.Rd0000644000175000017500000000434514124774626013570 0ustar nileshnilesh\name{GvHD} \alias{GvHD} \alias{GvHD.pos} \alias{GvHD.control} \docType{data} \title{GvHD Dataset} \description{ GvHD (Graft-versus-Host Disease) data of Brinkman et al. (2007). Two samples of this flow cytometry data, one from a patient with the GvHD, and the other from a control patient. The GvHD positive and control samples consist of 9083 and 6809 observations, respectively. Both samples include four biomarker variables, namely, CD4, CD8b, CD3, and CD8. The objective of the analysis is to identify CD3+ CD4+ CD8b+ cell sub-populations present in the GvHD positive sample. A treatment of this data by combining mixtures is proposed in Baudry et al. (2010). } \usage{data(GvHD)} \format{ GvHD.pos (positive patient) is a data frame with 9083 observations on the following 4 variables, which are biomarker measurements. \describe{ \item{CD4}{} \item{CD8b}{} \item{CD3}{} \item{CD8}{} } GvHD.control (control patient) is a data frame with 6809 observations on the following 4 variables, which are biomarker measurements. \describe{ \item{CD4}{} \item{CD8b}{} \item{CD3}{} \item{CD8}{} } } \references{ R. R. Brinkman, M. Gasparetto, S.-J. J. Lee, A. J. Ribickas, J. Perkins, W. Janssen, R. Smiley and C. Smith (2007). High-content flow cytometry and temporal data analysis for defining a cellular signature of Graft-versus-Host Disease. \emph{Biology of Blood and Marrow Transplantation, 13: 691-700.} K. Lo, R. R. Brinkman, R. Gottardo (2008). Automated gating of flow cytometry data via robust model-based clustering. \emph{Cytometry A, 73: 321-332.} J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics, 19(2):332-353.} } \examples{ \donttest{ data(GvHD) dat <- GvHD.pos[1:500,] # only a few lines for a quick example output <- clustCombi(data = dat) output # is of class clustCombi # plot the hierarchy of combined solutions plot(output, what = "classification") # plot some "entropy plots" which may help one to select the number of classes plot(output, what = "entropy") # plot the tree structure obtained from combining mixture components plot(output, what = "tree") } } \keyword{datasets} mclust/man/adjustedRandIndex.Rd0000644000175000017500000000266113752165071016371 0ustar nileshnilesh\name{adjustedRandIndex} \alias{adjustedRandIndex} \title{ Adjusted Rand Index } \description{ Computes the adjusted Rand index comparing two classifications. } \usage{ adjustedRandIndex(x, y) } \arguments{ \item{x}{ A numeric or character vector of class labels. } \item{y}{ A numeric or character vector of class labels. The length of \code{y} should be the same as that of \code{x}. } } \value{ The adjusted Rand index comparing the two partitions (a scalar). This index has zero expected value in the case of random partition, and it is bounded above by 1 in the case of perfect agreement between two partitions. } \references{ L. Hubert and P. Arabie (1985) Comparing Partitions, \emph{Journal of the Classification}, 2, pp. 193-218. } \seealso{ \code{\link{classError}}, \code{\link{mapClass}}, \code{\link{table}} } \examples{ a <- rep(1:3, 3) a b <- rep(c("A", "B", "C"), 3) b adjustedRandIndex(a, b) a <- sample(1:3, 9, replace = TRUE) a b <- sample(c("A", "B", "C"), 9, replace = TRUE) b adjustedRandIndex(a, b) a <- rep(1:3, 4) a b <- rep(c("A", "B", "C", "D"), 3) b adjustedRandIndex(a, b) irisHCvvv <- hc(modelName = "VVV", data = iris[,-5]) cl3 <- hclass(irisHCvvv, 3) adjustedRandIndex(cl3,iris[,5]) irisBIC <- mclustBIC(iris[,-5]) adjustedRandIndex(summary(irisBIC,iris[,-5])$classification,iris[,5]) adjustedRandIndex(summary(irisBIC,iris[,-5],G=3)$classification,iris[,5]) } \keyword{cluster} mclust/man/simE.Rd0000644000175000017500000000715314124774626013675 0ustar nileshnilesh\name{simE} \alias{simE} \alias{simV} \alias{simEII} \alias{simVII} \alias{simEEI} \alias{simVEI} \alias{simEVI} \alias{simVVI} \alias{simEEV} \alias{simEEE} \alias{simVEV} \alias{simVVV} \alias{simEVE} \alias{simEVV} \alias{simVEE} \alias{simVVE} \title{ Simulate from a Parameterized MVN Mixture Model } \description{ Simulate data from a parameterized MVN mixture model. } \usage{ simE(parameters, n, seed = NULL, \dots) simV(parameters, n, seed = NULL, \dots) simEII(parameters, n, seed = NULL, \dots) simVII(parameters, n, seed = NULL, \dots) simEEI(parameters, n, seed = NULL, \dots) simVEI(parameters, n, seed = NULL, \dots) simEVI(parameters, n, seed = NULL, \dots) simVVI(parameters, n, seed = NULL, \dots) simEEE(parameters, n, seed = NULL, \dots) simVEE(parameters, n, seed = NULL, \dots) simEVE(parameters, n, seed = NULL, \dots) simVVE(parameters, n, seed = NULL, \dots) simEEV(parameters, n, seed = NULL, \dots) simVEV(parameters, n, seed = NULL, \dots) simEVV(parameters, n, seed = NULL, \dots) simVVV(parameters, n, seed = NULL, \dots) } \arguments{ \item{parameters}{ A list with the following components: \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If missing, equal proportions are assumed. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{n}{ An integer specifying the number of data points to be simulated. } \item{seed}{ An optional integer argument to \code{set.seed()} for reproducible random class assignment. By default the current seed will be used. Reproducibility can also be achieved by calling \code{set.seed} before calling \code{sim}. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A matrix in which first column is the classification and the remaining columns are the \code{n} observations simulated from the specified MVN mixture model. \item{Attributes:}{ \code{"modelName"} A character string indicating the variance model used for the simulation. } } \details{ This function can be used with an indirect or list call using \code{do.call}, allowing the output of e.g. \code{mstep}, \code{em} \code{me}, \code{Mclust}, to be passed directly without the need to specify individual parameters as arguments. } \seealso{ \code{\link{sim}}, \code{\link{Mclust}}, \code{\link{mstepE}}, \code{\link{mclustVariance}}. } \examples{ \donttest{ d <- 2 G <- 2 scale <- 1 shape <- c(1, 9) O1 <- diag(2) O2 <- diag(2)[,c(2,1)] O <- array(cbind(O1,O2), c(2, 2, 2)) O variance <- list(d= d, G = G, scale = scale, shape = shape, orientation = O) mu <- matrix(0, d, G) ## center at the origin simdat <- simEEV( n = 200, parameters = list(pro=c(1,1),mean=mu,variance=variance), seed = NULL) cl <- simdat[,1] sigma <- array(apply(O, 3, function(x,y) crossprod(x*y), y = sqrt(scale*shape)), c(2,2,2)) paramList <- list(mu = mu, sigma = sigma) coordProj( simdat, paramList = paramList, classification = cl) } } \keyword{cluster} mclust/man/predict.MclustDR.Rd0000644000175000017500000000363413175055104016112 0ustar nileshnilesh\name{predict.MclustDR} \alias{predict.MclustDR} \alias{predict2D.MclustDR} \title{Classify multivariate observations on a dimension reduced subspace by Gaussian finite mixture modeling} \description{Classify multivariate observations on a dimension reduced subspace estimated from a Gaussian finite mixture model.} \usage{ \method{predict}{MclustDR}(object, dim = 1:object$numdir, newdata, eval.points, \dots) } \arguments{ \item{object}{an object of class \code{'MclustDR'} resulting from a call to \code{\link{MclustDR}.}} \item{dim}{the dimensions of the reduced subspace used for prediction.} \item{newdata}{a data frame or matrix giving the data. If missing the data obtained from the call to \code{\link{MclustDR}} are used.} \item{eval.points}{a data frame or matrix giving the data projected on the reduced subspace. If provided \code{newdata} is not used.} \item{\dots}{further arguments passed to or from other methods.} } % \details{} \value{ Returns a list of with the following components: \item{dir}{a matrix containing the data projected onto the \code{dim} dimensions of the reduced subspace.} \item{density}{densities from mixture model for each data point.} \item{z}{a matrix whose \emph{[i,k]}th entry is the probability that observation \emph{i} in \code{newdata} belongs to the \emph{k}th class.} \item{uncertainty}{The uncertainty associated with the classification.} \item{classification}{A vector of values giving the MAP classification.} } \references{ Scrucca, L. (2010) Dimension reduction for model-based clustering. \emph{Statistics and Computing}, 20(4), pp. 471-484. } \author{Luca Scrucca} % \note{} \seealso{\code{\link{MclustDR}}.} \examples{ mod = Mclust(iris[,1:4]) dr = MclustDR(mod) pred = predict(dr) str(pred) data(banknote) mod = MclustDA(banknote[,2:7], banknote$Status) dr = MclustDR(mod) pred = predict(dr) str(pred) } \keyword{multivariate} mclust/man/combiPlot.Rd0000644000175000017500000000473714124774626014735 0ustar nileshnilesh\name{combiPlot} \alias{combiPlot} \title{ Plot Classifications Corresponding to Successive Combined Solutions } \description{ Plot classifications corresponding to successive combined solutions. } \usage{ combiPlot(data, z, combiM, \dots) } \arguments{ \item{data}{ The data. } \item{z}{ A matrix whose [i,k]th entry is the probability that observation i in the data belongs to the kth class, for the initial solution (ie before any combining). Typically, the one returned by \code{Mclust}/BIC. } \item{combiM}{ A "combining matrix" (as provided by \code{\link{clustCombi}}), ie a matrix whose kth row contains only zeros, but in columns corresponding to the labels of the classes in the initial solution to be merged together to get the combined solution. } \item{\dots}{ Other arguments to be passed to the \code{\link{Mclust}} plot functions. } } \value{ Plot the classifications obtained by MAP from the matrix \code{t(combiM \%*\% t(z))}, which is the matrix whose [i,k]th entry is the probability that observation i in the data belongs to the kth class, according to the combined solution obtained by merging (according to \code{combiM}) the initial solution described by \code{z}. } \references{ J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics, 19(2):332-353.} } \author{ J.-P. Baudry, A. E. Raftery, L. Scrucca } \seealso{ \code{\link{clustCombi}}, \code{\link{combMat}}, \code{\link{clustCombi}} } \examples{ \donttest{ data(Baudry_etal_2010_JCGS_examples) MclustOutput <- Mclust(ex4.1) MclustOutput$G # Mclust/BIC selected 6 classes par(mfrow=c(2,2)) combiM0 <- diag(6) # is the identity matrix # no merging: plot the initial solution, given by z combiPlot(ex4.1, MclustOutput$z, combiM0, cex = 3) title("No combining") combiM1 <- combMat(6, 1, 2) # let's merge classes labeled 1 and 2 combiM1 combiPlot(ex4.1, MclustOutput$z, combiM1) title("Combine 1 and 2") # let's merge classes labeled 1 and 2, and then components labeled (in this # new 5-classes combined solution) 1 and 2 combiM2 <- combMat(5, 1, 2) \%*\% combMat(6, 1, 2) combiM2 combiPlot(ex4.1, MclustOutput$z, combiM2) title("Combine 1, 2 and then 1 and 2 again") plot(0,0,type="n", xlab = "", ylab = "", axes = FALSE) legend("center", legend = 1:6, col = mclust.options("classPlotColors"), pch = mclust.options("classPlotSymbols"), title = "Class labels:")} } \keyword{cluster} mclust/man/estep.Rd0000644000175000017500000000620414124774626014114 0ustar nileshnilesh\name{estep} \alias{estep} \title{ E-step for parameterized Gaussian mixture models. } \description{ Implements the expectation step of EM algorithm for parameterized Gaussian mixture models. } \usage{ estep(data, modelName, parameters, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{parameters}{ A names list giving the parameters of the model. The components are as follows: \describe{ \item{\code{pro}}{ Mixing proportions for the components of the mixture. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ An estimate of the reciprocal hypervolume of the data region. If set to NULL or a negative value, the default is determined by applying function \code{hypvol} to the data. Used only when \code{pro} includes an additional mixing proportion for a noise component. } } } \item{warn}{ A logical value indicating whether or not a warning should be issued when computations fail. The default is \code{warn=FALSE}. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the \emph{i}th observation belonging to the \emph{k}th component of the mixture. } \item{parameters}{ The input parameters. } \item{loglik}{ The log-likelihood for the data in the mixture model. } \item{Attributes}{ \code{"WARNING"}: an appropriate warning if problems are encountered in the computations. } } \seealso{ \code{\link{estepE}}, \dots, \code{\link{estepVVV}}, \code{\link{em}}, \code{\link{mstep}}, \code{\link{mclust.options}} \code{\link{mclustVariance}} } \examples{ \donttest{ msEst <- mstep(modelName = "VVV", data = iris[,-5], z = unmap(iris[,5])) names(msEst) estep(modelName = msEst$modelName, data = iris[,-5], parameters = msEst$parameters)} } \keyword{cluster} mclust/man/randProj.Rd0000644000175000017500000001460514124774626014557 0ustar nileshnilesh\name{randProj} \alias{randProj} \title{Random projections of multidimensional data modeled by an MVN mixture} \description{ Plots random projections given multidimensional data and parameters of an MVN mixture model for the data. } \usage{ randProj(data, seeds = NULL, parameters = NULL, z = NULL, classification = NULL, truth = NULL, uncertainty = NULL, what = c("classification", "error", "uncertainty"), quantiles = c(0.75, 0.95), addEllipses = TRUE, fillEllipses = mclust.options("fillEllipses"), symbols = NULL, colors = NULL, scale = FALSE, xlim = NULL, ylim = NULL, xlab = NULL, ylab = NULL, cex = 1, PCH = ".", main = FALSE, \dots) } \arguments{ \item{data}{ A numeric matrix or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{seeds}{ An integer value or a vector of integer values to be used as seed for random number generation. If multiple values are provided, then each seed should produce a different projection. By default, a single seed is drawn randomnly, so each call of \code{randProj()} produces different projections. } \item{parameters}{ A named list giving the parameters of an \emph{MCLUST} model, used to produce superimposing ellipses on the plot. The relevant components are as follows: \describe{ \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{z}{ A matrix in which the \code{[i,k]}th entry gives the probability of observation \emph{i} belonging to the \emph{k}th class. Used to compute \code{classification} and \code{uncertainty} if those arguments aren't available. } \item{classification}{ A numeric or character vector representing a classification of observations (rows) of \code{data}. If present argument \code{z} will be ignored. } \item{truth}{ A numeric or character vector giving a known classification of each data point. If \code{classification} or \code{z} is also present, this is used for displaying classification errors. } \item{uncertainty}{ A numeric vector of values in \emph{(0,1)} giving the uncertainty of each data point. If present argument \code{z} will be ignored. } \item{what}{ Choose from one of the following three options: \code{"classification"} (default), \code{"error"}, \code{"uncertainty"}. } \item{quantiles}{ A vector of length 2 giving quantiles used in plotting uncertainty. The smallest symbols correspond to the smallest quantile (lowest uncertainty), medium-sized (open) symbols to points falling between the given quantiles, and large (filled) symbols to those in the largest quantile (highest uncertainty). The default is \emph{(0.75,0.95)}. } \item{addEllipses}{ A logical indicating whether or not to add ellipses with axes corresponding to the within-cluster covariances in case of \code{"classification"} or \code{"uncertainty"} plots. } \item{fillEllipses}{ A logical specifying whether or not to fill ellipses with transparent colors when \code{addEllipses = TRUE}. } \item{symbols}{ Either an integer or character vector assigning a plotting symbol to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given by \code{mclust.options("classPlotSymbols")}. } \item{colors}{ Either an integer or character vector assigning a color to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given by \code{mclust.options("classPlotColors")}. } \item{scale}{ A logical variable indicating whether or not the two chosen dimensions should be plotted on the same scale, and thus preserve the shape of the distribution. Default: \code{scale=FALSE} } \item{xlim, ylim}{ Optional arguments specifying bounds for the ordinate, abscissa of the plot. This may be useful for when comparing plots. } \item{xlab, ylab}{ Optional arguments specifying the labels for, respectively, the horizontal and vertical axis. } \item{cex}{ A numerical value specifying the size of the plotting symbols. The default value is 1. } \item{PCH}{ An argument specifying the symbol to be used when a classificatiion has not been specified for the data. The default value is a small dot ".". } \item{main}{ A logical variable or \code{NULL} indicating whether or not to add a title to the plot identifying the dimensions used. } \item{\dots}{ Other graphics parameters. } } \value{ A plot showing a random two-dimensional projection of the data, together with the location of the mixture components, classification, uncertainty, and/or classification errors. The function also returns an invisible list with components \code{basis}, the randomnly generated basis of the projection subspace, \code{data}, a matrix of projected data, and \code{mu} and \code{sigma} the component parameters transformed to the projection subspace. } \seealso{ \code{\link{clPairs}}, \code{\link{coordProj}}, \code{\link{mclust2Dplot}}, \code{\link{mclust.options}} } \examples{ \donttest{ est <- meVVV(iris[,-5], unmap(iris[,5])) par(pty = "s", mfrow = c(1,1)) randProj(iris[,-5], seeds=1:3, parameters = est$parameters, z = est$z, what = "classification", main = TRUE) randProj(iris[,-5], seeds=1:3, parameters = est$parameters, z = est$z, truth = iris[,5], what = "error", main = TRUE) randProj(iris[,-5], seeds=1:3, parameters = est$parameters, z = est$z, what = "uncertainty", main = TRUE) } } \keyword{cluster} mclust/man/Mclust.Rd0000644000175000017500000002056413741773130014241 0ustar nileshnilesh\name{Mclust} \alias{Mclust} \alias{print.Mclust} \title{Model-Based Clustering} \description{ Model-based clustering based on parameterized finite Gaussian mixture models. Models are estimated by EM algorithm initialized by hierarchical model-based agglomerative clustering. The optimal model is then selected according to BIC. } \usage{ Mclust(data, G = NULL, modelNames = NULL, prior = NULL, control = emControl(), initialization = NULL, warn = mclust.options("warn"), x = NULL, verbose = interactive(), \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations (\eqn{n}) and columns correspond to variables (\eqn{d}). } \item{G}{ An integer vector specifying the numbers of mixture components (clusters) for which the BIC is to be calculated. The default is \code{G=1:9}. } \item{modelNames}{ A vector of character strings indicating the models to be fitted in the EM phase of clustering. The default is: \itemize{ \item for univariate data (\eqn{d = 1}): \code{c("E", "V")} \item for multivariate data (\eqn{n > d}): all the models available in \code{mclust.options("emModelNames")} \item for multivariate data (\eqn{n <= d}): the spherical and diagonal models, i.e. \code{c("EII", "VII", "EEI", "EVI", "VEI", "VVI")} } The help file for \code{\link{mclustModelNames}} describes the available models. } \item{prior}{ The default assumes no prior, but this argument allows specification of a conjugate prior on the means and variances through the function \code{\link{priorControl}}. \cr Note that, as described in \code{\link{defaultPrior}}, in the multivariate case only 10 out of 14 models may be used in conjunction with a prior, i.e. those available in \emph{MCLUST} up to version 4.4. } \item{control}{ A list of control parameters for EM. The defaults are set by the call \code{emControl()}. } \item{initialization}{ A list containing zero or more of the following components: \describe{ \item{\code{hcPairs}}{ A matrix of merge pairs for hierarchical clustering such as produced by function \code{\link{hc}}. \cr For multivariate data, the default is to compute a hierarchical agglomerative clustering tree by applying function \code{\link{hc}} with model specified by \code{mclust.options("hcModelName")}, and data transformation set by \code{mclust.options("hcUse")}.\cr All the input or a subset as indicated by the \code{subset} argument is used for initial clustering.\cr The hierarchical clustering results are then used to start the EM algorithm from a given partition.\cr For univariate data, the default is to use quantiles to start the EM algorithm. However, hierarchical clustering could also be used by calling \code{\link{hc}} with model specified as \code{"V"} or \code{"E"}. } \item{\code{subset}}{ A logical or numeric vector specifying a subset of the data to be used in the initial hierarchical clustering phase. By default no subset is used unless the number of observations exceeds the value specified by \code{mclust.options("subset")}. Note that to guarantee exact reproducibility of results a seed must be specified (see \code{\link{set.seed}}).} \item{\code{noise}}{ A logical or numeric vector indicating an initial guess as to which observations are noise in the data. If numeric the entries should correspond to row indexes of the data. If supplied, a noise term will be added to the model in the estimation.} } } \item{warn}{ A logical value indicating whether or not certain warnings (usually related to singularity) should be issued. The default is controlled by \code{\link{mclust.options}}. } \item{x}{ An object of class \code{'mclustBIC'}. If supplied, BIC values for models that have already been computed and are available in \code{x} are not recomputed. All arguments, with the exception of \code{data}, \code{G} and \code{modelName}, are ignored and their values are set as specified in the attributes of \code{x}. Defaults for \code{G} and \code{modelNames} are taken from \code{x}. } \item{verbose}{ A logical controlling if a text progress bar is displayed during the fitting procedure. By default is \code{TRUE} if the session is interactive, and \code{FALSE} otherwise. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ An object of class \code{'Mclust'} providing the optimal (according to BIC) mixture model estimation. The details of the output components are as follows: \item{call}{The matched call} \item{data}{The input data matrix.} \item{modelName}{ A character string denoting the model at which the optimal BIC occurs. } \item{n}{ The number of observations in the data. } \item{d}{ The dimension of the data. } \item{G}{ The optimal number of mixture components. } \item{BIC}{ All BIC values. } \item{loglik}{ The log-likelihood corresponding to the optimal BIC. } \item{df}{ The number of estimated parameters. } \item{bic}{ BIC value of the selected model. } \item{icl}{ ICL value of the selected model. } \item{hypvol}{ The hypervolume parameter for the noise component if required, otherwise set to \code{NULL} (see \code{\link{hypvol}}). } \item{parameters}{ A list with the following components: \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If missing, equal proportions are assumed. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{z}{ A matrix whose \emph{[i,k]}th entry is the probability that observation \emph{i} in the test data belongs to the \emph{k}th class. } \item{classification}{ The classification corresponding to \code{z}, i.e. \code{map(z)}. } \item{uncertainty}{ The uncertainty associated with the classification. } } \references{ Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317. Fraley C. and Raftery A. E. (2002) Model-based clustering, discriminant analysis and density estimation, \emph{Journal of the American Statistical Association}, 97/458, pp. 611-631. Fraley C., Raftery A. E., Murphy T. B. and Scrucca L. (2012) mclust Version 4 for R: Normal Mixture Modeling for Model-Based Clustering, Classification, and Density Estimation. \emph{Technical Report} No. 597, Department of Statistics, University of Washington. C. Fraley and A. E. Raftery (2007) Bayesian regularization for normal mixture estimation and model-based clustering. \emph{Journal of Classification}, 24, 155-181. } \seealso{ \code{\link{summary.Mclust}}, \code{\link{plot.Mclust}}, \code{\link{priorControl}}, \code{\link{emControl}}, \code{\link{hc}}, \code{\link{mclustBIC}}, \code{\link{mclustModelNames}}, \code{\link{mclust.options}} } \examples{ mod1 <- Mclust(iris[,1:4]) summary(mod1) mod2 <- Mclust(iris[,1:4], G = 3) summary(mod2, parameters = TRUE) # Using prior mod3 <- Mclust(iris[,1:4], prior = priorControl()) summary(mod3) mod4 <- Mclust(iris[,1:4], prior = priorControl(functionName="defaultPrior", shrinkage=0.1)) summary(mod4) # Clustering of faithful data with some artificial noise added nNoise <- 100 set.seed(0) # to make it reproducible Noise <- apply(faithful, 2, function(x) runif(nNoise, min = min(x)-.1, max = max(x)+.1)) data <- rbind(faithful, Noise) plot(faithful) points(Noise, pch = 20, cex = 0.5, col = "lightgrey") set.seed(0) NoiseInit <- sample(c(TRUE,FALSE), size = nrow(faithful)+nNoise, replace = TRUE, prob = c(3,1)/4) mod5 <- Mclust(data, initialization = list(noise = NoiseInit)) summary(mod5, parameter = TRUE) plot(mod5, what = "classification") } \keyword{cluster} mclust/man/sim.Rd0000644000175000017500000000702314124774626013564 0ustar nileshnilesh\name{sim} \alias{sim} \title{ Simulate from Parameterized MVN Mixture Models } \description{ Simulate data from parameterized MVN mixture models. } \usage{ sim(modelName, parameters, n, seed = NULL, \dots) } \arguments{ \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{parameters}{ A list with the following components: \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If missing, equal proportions are assumed. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{n}{ An integer specifying the number of data points to be simulated. } \item{seed}{ An optional integer argument to \code{set.seed} for reproducible random class assignment. By default the current seed will be used. Reproducibility can also be achieved by calling \code{set.seed} before calling \code{sim}. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A matrix in which first column is the classification and the remaining columns are the \code{n} observations simulated from the specified MVN mixture model. \item{Attributes:}{ \code{"modelName"} A character string indicating the variance model used for the simulation. } } \details{ This function can be used with an indirect or list call using \code{do.call}, allowing the output of e.g. \code{mstep}, \code{em}, \code{me}, \code{Mclust} to be passed directly without the need to specify individual parameters as arguments. } \seealso{ \code{\link{simE}}, \dots, \code{\link{simVVV}}, \code{\link{Mclust}}, \code{\link{mstep}}, \code{\link{do.call}} } \examples{ irisBIC <- mclustBIC(iris[,-5]) irisModel <- mclustModel(iris[,-5], irisBIC) names(irisModel) irisSim <- sim(modelName = irisModel$modelName, parameters = irisModel$parameters, n = nrow(iris)) \donttest{ do.call("sim", irisModel) # alternative call } par(pty = "s", mfrow = c(1,2)) dimnames(irisSim) <- list(NULL, c("dummy", (dimnames(iris)[[2]])[-5])) dimens <- c(1,2) lim1 <- apply(iris[,dimens],2,range) lim2 <- apply(irisSim[,dimens+1],2,range) lims <- apply(rbind(lim1,lim2),2,range) xlim <- lims[,1] ylim <- lims[,2] coordProj(iris[,-5], parameters=irisModel$parameters, classification=map(irisModel$z), dimens=dimens, xlim=xlim, ylim=ylim) coordProj(iris[,-5], parameters=irisModel$parameters, classification=map(irisModel$z), truth = irisSim[,-1], dimens=dimens, xlim=xlim, ylim=ylim) irisModel3 <- mclustModel(iris[,-5], irisBIC, G=3) irisSim3 <- sim(modelName = irisModel3$modelName, parameters = irisModel3$parameters, n = 500, seed = 1) \donttest{ irisModel3$n <- NULL irisSim3 <- do.call("sim",c(list(n=500,seed=1),irisModel3)) # alternative call } clPairs(irisSim3[,-1], cl = irisSim3[,1]) } \keyword{cluster} mclust/man/plot.MclustBoostrap.Rd0000644000175000017500000000420014124774626016724 0ustar nileshnilesh\name{plot.MclustBootstrap} \alias{plot.MclustBootstrap} \title{Plot of bootstrap distributions for mixture model parameters} \description{ Plots the bootstrap distribution of parameters as returned by the \code{\link{MclustBootstrap}} function. } \usage{ \method{plot}{MclustBootstrap}(x, what = c("pro", "mean", "var"), show.parest = TRUE, show.confint = TRUE, hist.col = "grey", hist.border = "lightgrey", breaks = "Sturges", col = "forestgreen", lwd = 2, lty = 3, xlab = NULL, xlim = NULL, ylim = NULL, \dots) } \arguments{ \item{x}{Object returned by \code{MclustBootstrap}.} \item{what}{Character string specifying if mixing proportions (\code{"pro"}), component means (\code{"mean"}) or component variances (\code{"var"}) should be drawn.} \item{show.parest}{A logical specifying if the parameter estimate should be drawn as vertical line.} \item{show.confint}{A logical specifying if the resampling-based confidence interval should be drawn at the bottom of the graph. Confidence level can be provided as further argument \code{conf.level}; see \code{\link{summary.MclustBootstrap}}.} \item{hist.col}{The color to be used to fill the bars of the histograms.} \item{hist.border}{The color of the border around the bars of the histograms.} \item{breaks}{See the argument in function \code{\link[graphics]{hist}}.} \item{col, lwd, lty}{The color, line width and line type to be used to represent the estimated parameters and confidence intervals.} \item{xlab}{Optional label for the horizontal axis.} \item{xlim, ylim}{A two-values vector of axis range for, respectively, horizontal and vertical axis.} \item{\dots}{Other graphics parameters.} } \value{ A plot for each variable/component of the selected parameters. } \seealso{ \code{\link{MclustBootstrap}} } \examples{ \donttest{ data(diabetes) X <- diabetes[,-1] modClust <- Mclust(X, G = 3, modelNames = "VVV") bootClust <- MclustBootstrap(modClust, nboot = 99) par(mfrow = c(1,3), mar = c(4,2,2,0.5)) plot(bootClust, what = "pro") par(mfrow = c(3,3), mar = c(4,2,2,0.5)) plot(bootClust, what = "mean") } } \keyword{cluster} mclust/man/densityMclust.diagnostic.Rd0000644000175000017500000000453614124774626017774 0ustar nileshnilesh\name{densityMclust.diagnostic} \alias{densityMclust.diagnostic} \title{Diagnostic plots for \code{mclustDensity} estimation} \description{ Diagnostic plots for density estimation. Only available for the one-dimensional case. } \usage{ densityMclust.diagnostic(object, type = c("cdf", "qq"), col = c("black", "black"), lwd = c(2,1), lty = c(1,1), legend = TRUE, grid = TRUE, \dots) } \arguments{ \item{object}{An object of class \code{'mclustDensity'} obtained from a call to \code{\link{densityMclust}} function.} \item{type}{The type of graph requested: \describe{ \item{\code{"cdf"} =}{a plot of the estimated CDF versus the empirical distribution function.} \item{\code{"qq"} =}{a Q-Q plot of sample quantiles versus the quantiles obtained from the inverse of the estimated cdf.} } } \item{col}{A pair of values for the color to be used for plotting, respectively, the estimated CDF and the empirical cdf.} \item{lwd}{A pair of values for the line width to be used for plotting, respectively, the estimated CDF and the empirical cdf.} \item{lty}{A pair of values for the line type to be used for plotting, respectively, the estimated CDF and the empirical cdf.} \item{legend}{A logical indicating if a legend must be added to the plot of fitted CDF vs the empirical CDF.} \item{grid}{A logical indicating if a \code{\link{grid}} should be added to the plot.} \item{\dots}{Additional arguments.} } \details{ The two diagnostic plots for density estimation in the one-dimensional case are discussed in Loader (1999, pp- 87-90). } % \value{} \references{ Loader C. (1999), Local Regression and Likelihood. New York, Springer. C. Fraley, A. E. Raftery, T. B. Murphy and L. Scrucca (2012). mclust Version 4 for R: Normal Mixture Modeling for Model-Based Clustering, Classification, and Density Estimation. Technical Report No. 597, Department of Statistics, University of Washington. } \author{Luca Scrucca} \seealso{ \code{\link{densityMclust}}, \code{\link{plot.densityMclust}}. } \examples{ \donttest{ x <- faithful$waiting dens <- densityMclust(x) plot(dens, x, what = "diagnostic") # or densityMclust.diagnostic(dens, type = "cdf") densityMclust.diagnostic(dens, type = "qq") } } \keyword{cluster} \keyword{dplot} mclust/man/plot.mclustICL.Rd0000644000175000017500000000126214124774626015607 0ustar nileshnilesh\name{plot.mclustICL} \alias{plot.mclustICL} \title{ICL Plot for Model-Based Clustering} \description{ Plots the ICL values returned by the \code{\link{mclustICL}} function. } \usage{ \method{plot}{mclustICL}(x, ylab = "ICL", \dots) } \arguments{ \item{x}{ Output from \code{\link{mclustICL}}. } \item{ylab}{ Label for the vertical axis of the plot. } \item{\dots}{ Further arguments passed to the \code{\link{plot.mclustBIC}} function. } } \value{ A plot of the ICL values. } \seealso{ \code{\link{mclustICL}} } \examples{ \donttest{ data(faithful) faithful.ICL = mclustICL(faithful) plot(faithful.ICL) } } \keyword{cluster} % docclass is function mclust/man/hdrlevels.Rd0000644000175000017500000000431313314423527014752 0ustar nileshnilesh\name{hdrlevels} \alias{hdrlevels} \title{Highest Density Region (HDR) Levels} \description{ Compute the levels of Highest Density Regions (HDRs) for any density and probability levels. } \usage{ hdrlevels(density, prob) } \arguments{ \item{density}{A vector of density values computed on a set of (observed) evaluation points.} \item{prob}{A vector of probability levels in the range \eqn{[0,1]}.} } \value{ The function returns a vector of density values corresponding to HDRs at given probability levels. } \details{ From Hyndman (1996), let \eqn{f(x)} be the density function of a random variable \eqn{X}. Then the \eqn{100(1-\alpha)\%} HDR is the subset \eqn{R(f_\alpha)} of the sample space of \eqn{X} such that \deqn{ R(f_\alpha) = {x : f(x) \ge f_\alpha } } where \eqn{f_\alpha} is the largest constant such that \eqn{ Pr( X \in R(f_\alpha)) \ge 1-\alpha } } \seealso{ \code{\link{plot.densityMclust}} } \references{ Rob J. Hyndman (1996) Computing and Graphing Highest Density Regions. \emph{The American Statistician}, 50(2):120-126. } \author{L. Scrucca} \examples{ # Example: univariate Gaussian x <- rnorm(1000) f <- dnorm(x) a <- c(0.5, 0.25, 0.1) (f_a <- hdrlevels(f, prob = 1-a)) plot(x, f) abline(h = f_a, lty = 2) text(max(x), f_a, labels = paste0("f_", a), pos = 3) mean(f > f_a[1]) range(x[which(f > f_a[1])]) qnorm(1-a[1]/2) mean(f > f_a[2]) range(x[which(f > f_a[2])]) qnorm(1-a[2]/2) mean(f > f_a[3]) range(x[which(f > f_a[3])]) qnorm(1-a[3]/2) # Example 2: univariate Gaussian mixture set.seed(1) cl <- sample(1:2, size = 1000, prob = c(0.7, 0.3), replace = TRUE) x <- ifelse(cl == 1, rnorm(1000, mean = 0, sd = 1), rnorm(1000, mean = 4, sd = 1)) f <- 0.7*dnorm(x, mean = 0, sd = 1) + 0.3*dnorm(x, mean = 4, sd = 1) a <- 0.25 (f_a <- hdrlevels(f, prob = 1-a)) plot(x, f) abline(h = f_a, lty = 2) text(max(x), f_a, labels = paste0("f_", a), pos = 3) mean(f > f_a) # find the regions of HDR ord <- order(x) f <- f[ord] x <- x[ord] x_a <- x[f > f_a] j <- which.max(diff(x_a)) region1 <- x_a[c(1,j)] region2 <- x_a[c(j+1,length(x_a))] plot(x, f, type = "l") abline(h = f_a, lty = 2) abline(v = region1, lty = 3, col = 2) abline(v = region2, lty = 3, col = 3) } \keyword{density} mclust/man/majorityVote.Rd0000644000175000017500000000125613107132441015451 0ustar nileshnilesh\name{majorityVote} \alias{majorityVote} \title{Majority vote} \description{ A function to compute the majority vote (some would say plurality) label in a vector of labels, breaking ties at random.} \usage{ majorityVote(x) } \arguments{ \item{x}{A vector of values, either numerical or not.} } \value{A list with the following components: \item{table}{A table of votes for each unique value of \code{x}.} \item{ind}{An integer specifying which unique value of \code{x} corresponds to the majority vote.} \item{majority}{A string specifying the majority vote label.} } %\seealso{} \author{L. Scrucca} \examples{ x <- c("A", "C", "A", "B", "C", "B", "A") majorityVote(x) } mclust/man/entPlot.Rd0000644000175000017500000000540214124774626014420 0ustar nileshnilesh\name{entPlot} \alias{entPlot} \title{ Plot Entropy Plots } \description{ Plot "entropy plots" to help select the number of classes from a hierarchy of combined clusterings. } \usage{ entPlot(z, combiM, abc = c("standard", "normalized"), reg = 2, \dots) } \arguments{ \item{z}{ A matrix whose \code{[i,k]}th entry is the probability that observation \emph{i} in the data belongs to the \emph{k}th class, for the initial solution (ie before any combining). Typically, the one returned by \code{Mclust}/BIC. } \item{combiM}{ A list of "combining matrices" (as provided by \code{clustCombi}), ie \code{combiM[[K]]} is the matrix whose \emph{k}th row contains only zeros, but in columns corresponding to the labels of the classes in the \emph{(K+1)}-classes solution to be merged to get the \emph{K}-classes combined solution. \code{combiM} must contain matrices from \code{K} = number of classes in \code{z} to one. } \item{abc}{ Choose one or more of: "standard", "normalized", to specify whether the number of observations involved in each combining step should be taken into account to scale the plots or not. } \item{reg}{ The number of parts of the piecewise linear regression for the entropy plots. Choose one or more of: 2 (for 1 change-point), 3 (for 2 change-points). } \item{\dots}{ Other graphical arguments to be passed to the plot functions. } } \details{ Please see the article cited in the references for more details. A clear elbow in the "entropy plot" should suggest the user to consider the corresponding number(s) of class(es). } \value{ if \code{abc = "standard"}, plots the entropy against the number of clusters and the difference between the entropy of successive combined solutions against the number of clusters. if \code{abc = "normalized"}, plots the entropy against the cumulated number of observations involved in the successive combining steps and the difference between the entropy of successive combined solutions divided by the number of observations involved in the corresponding combining step against the number of clusters. } \references{ J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics, 19(2):332-353.} } \author{ J.-P. Baudry, A. E. Raftery, L. Scrucca } \seealso{ \code{\link{plot.clustCombi}}, \code{\link{combiPlot}}, \code{\link{clustCombi}} } \examples{ \donttest{ data(Baudry_etal_2010_JCGS_examples) # run Mclust to get the MclustOutput output <- clustCombi(data = ex4.2, modelNames = "VII") entPlot(output$MclustOutput$z, output$combiM, reg = c(2,3)) # legend: in red, the single-change-point piecewise linear regression; # in blue, the two-change-point piecewise linear regression. } } \keyword{ cluster } mclust/man/MclustDA.Rd0000644000175000017500000002073014124774626014450 0ustar nileshnilesh\name{MclustDA} \alias{MclustDA} \alias{print.MclustDA} \title{MclustDA discriminant analysis} \description{ Discriminant analysis based on Gaussian finite mixture modeling. } \usage{ MclustDA(data, class, G = NULL, modelNames = NULL, modelType = c("MclustDA", "EDDA"), prior = NULL, control = emControl(), initialization = NULL, warn = mclust.options("warn"), verbose = interactive(), \dots) } \arguments{ \item{data}{ A data frame or matrix giving the training data. } \item{class}{ A vector giving the known class labels (either a numerical value or a character string) for the observations in the training data.} \item{G}{ An integer vector specifying the numbers of mixture components (clusters) for which the BIC is to be calculated within each class. The default is \code{G = 1:5}.\cr A different set of mixture components for each class can be specified by providing this argument with a list of integers for each class. See the examples below. } \item{modelNames}{ A vector of character strings indicating the models to be fitted by EM within each class (see the description in \code{\link{mclustModelNames}}). A different set of mixture models for each class can be specified by providing this argument with a list of character strings. See the examples below. } \item{modelType}{ A character string specifying whether the models given in \code{modelNames} should fit a different number of mixture components and covariance structures for each class (\code{"MclustDA"}, the default) or should be constrained to have a single component for each class with the same covariance structure among classes (\code{"EDDA"}). See Details section and the examples below. } \item{prior}{ The default assumes no prior, but this argument allows specification of a conjugate prior on the means and variances through the function \code{\link{priorControl}}. } \item{control}{ A list of control parameters for EM. The defaults are set by the call \code{emControl()}. } \item{initialization}{ A list containing zero or more of the following components: \describe{ \item{\code{hcPairs}}{ A matrix of merge pairs for hierarchical clustering such as produced by function \code{hc}. The default is to compute a hierarchical clustering tree by applying function \code{hc} with \code{modelName = "E"} to univariate data and \code{modelName = "VVV"} to multivariate data or a subset as indicated by the \code{subset} argument. The hierarchical clustering results are used as starting values for EM.} \item{\code{subset}}{ A logical or numeric vector specifying a subset of the data to be used in the initial hierarchical clustering phase.} } } \item{warn}{ A logical value indicating whether or not certain warnings (usually related to singularity) should be issued when estimation fails. The default is controlled by \code{\link{mclust.options}}. } \item{verbose}{ A logical controlling if a text progress bar is displayed during the fitting procedure. By default is \code{TRUE} if the session is interactive, and \code{FALSE} otherwise. } \item{\dots }{Further arguments passed to or from other methods.} } \value{ An object of class \code{'MclustDA'} providing the optimal (according to BIC) mixture model. The details of the output components are as follows: \item{call}{The matched call.} \item{data}{The input data matrix.} \item{class}{The input class labels.} \item{type}{A character string specifying the \code{modelType} estimated.} \item{models}{A list of \code{\link{Mclust}} objects containing information on fitted model for each class.} \item{n}{The total number of observations in the data.} \item{d}{The dimension of the data.} % \item{BIC}{All BIC values.} \item{bic}{Optimal BIC value.} \item{loglik}{Log-likelihood for the selected model.} \item{df}{Number of estimated parameters.} } \details{ The \code{"EDDA"} method for discriminant analysis is described in Bensmail and Celeux (1996), while \code{"MclustDA"} in Fraley and Raftery (2002). } \references{ Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317. Fraley C. and Raftery A. E. (2002) Model-based clustering, discriminant analysis and density estimation, \emph{Journal of the American Statistical Association}, 97/458, pp. 611-631. Fraley C., Raftery A. E., Murphy T. B. and Scrucca L. (2012) mclust Version 4 for R: Normal Mixture Modeling for Model-Based Clustering, Classification, and Density Estimation. \emph{Technical Report} No. 597, Department of Statistics, University of Washington. Bensmail, H., and Celeux, G. (1996) Regularized Gaussian Discriminant Analysis Through Eigenvalue Decomposition.\emph{Journal of the American Statistical Association}, 91, 1743-1748. } \author{Luca Scrucca} \seealso{ \code{\link{summary.MclustDA}}, \code{\link{plot.MclustDA}}, \code{\link{predict.MclustDA}}, \code{\link{classError}} } \examples{ odd <- seq(from = 1, to = nrow(iris), by = 2) even <- odd + 1 X.train <- iris[odd,-5] Class.train <- iris[odd,5] X.test <- iris[even,-5] Class.test <- iris[even,5] # common EEE covariance structure (which is essentially equivalent to linear discriminant analysis) irisMclustDA <- MclustDA(X.train, Class.train, modelType = "EDDA", modelNames = "EEE") summary(irisMclustDA, parameters = TRUE) summary(irisMclustDA, newdata = X.test, newclass = Class.test) # common covariance structure selected by BIC irisMclustDA <- MclustDA(X.train, Class.train, modelType = "EDDA") summary(irisMclustDA, parameters = TRUE) summary(irisMclustDA, newdata = X.test, newclass = Class.test) # general covariance structure selected by BIC irisMclustDA <- MclustDA(X.train, Class.train) summary(irisMclustDA, parameters = TRUE) summary(irisMclustDA, newdata = X.test, newclass = Class.test) plot(irisMclustDA) plot(irisMclustDA, dimens = 3:4) plot(irisMclustDA, dimens = 4) plot(irisMclustDA, what = "classification") plot(irisMclustDA, what = "classification", newdata = X.test) plot(irisMclustDA, what = "classification", dimens = 3:4) plot(irisMclustDA, what = "classification", newdata = X.test, dimens = 3:4) plot(irisMclustDA, what = "classification", dimens = 4) plot(irisMclustDA, what = "classification", dimens = 4, newdata = X.test) plot(irisMclustDA, what = "train&test", newdata = X.test) plot(irisMclustDA, what = "train&test", newdata = X.test, dimens = 3:4) plot(irisMclustDA, what = "train&test", newdata = X.test, dimens = 4) plot(irisMclustDA, what = "error") plot(irisMclustDA, what = "error", dimens = 3:4) plot(irisMclustDA, what = "error", dimens = 4) plot(irisMclustDA, what = "error", newdata = X.test, newclass = Class.test) plot(irisMclustDA, what = "error", newdata = X.test, newclass = Class.test, dimens = 3:4) plot(irisMclustDA, what = "error", newdata = X.test, newclass = Class.test, dimens = 4) \donttest{ # simulated 1D data n <- 250 set.seed(1) triModal <- c(rnorm(n,-5), rnorm(n,0), rnorm(n,5)) triClass <- c(rep(1,n), rep(2,n), rep(3,n)) odd <- seq(from = 1, to = length(triModal), by = 2) even <- odd + 1 triMclustDA <- MclustDA(triModal[odd], triClass[odd]) summary(triMclustDA, parameters = TRUE) summary(triMclustDA, newdata = triModal[even], newclass = triClass[even]) plot(triMclustDA, what = "scatterplot") plot(triMclustDA, what = "classification") plot(triMclustDA, what = "classification", newdata = triModal[even]) plot(triMclustDA, what = "train&test", newdata = triModal[even]) plot(triMclustDA, what = "error") plot(triMclustDA, what = "error", newdata = triModal[even], newclass = triClass[even]) # simulated 2D cross data data(cross) odd <- seq(from = 1, to = nrow(cross), by = 2) even <- odd + 1 crossMclustDA <- MclustDA(cross[odd,-1], cross[odd,1]) summary(crossMclustDA, parameters = TRUE) summary(crossMclustDA, newdata = cross[even,-1], newclass = cross[even,1]) plot(crossMclustDA, what = "scatterplot") plot(crossMclustDA, what = "classification") plot(crossMclustDA, what = "classification", newdata = cross[even,-1]) plot(crossMclustDA, what = "train&test", newdata = cross[even,-1]) plot(crossMclustDA, what = "error") plot(crossMclustDA, what = "error", newdata =cross[even,-1], newclass = cross[even,1]) } } \keyword{multivariate} mclust/man/predict.densityMclust.Rd0000644000175000017500000000322014124774626017267 0ustar nileshnilesh\name{predict.densityMclust} \alias{predict.densityMclust} \title{Density estimate of multivariate observations by Gaussian finite mixture modeling} \description{Compute density estimation for multivariate observations based on Gaussian finite mixture models estimated by \code{\link{densityMclust}}.} \usage{ \method{predict}{densityMclust}(object, newdata, what = c("dens", "cdens", "z"), logarithm = FALSE, \dots) } \arguments{ \item{object}{an object of class \code{'densityMclust'} resulting from a call to \code{\link{densityMclust}}.} \item{newdata}{a vector, a data frame or matrix giving the data. If missing the density is computed for the input data obtained from the call to \code{\link{densityMclust}}.} \item{what}{a character string specifying what to retrieve: \code{"dens"} returns a vector of values for the mixture density; \code{"cdens"} returns a matrix of component densities for each mixture component (along the columns); \code{"z"} returns a matrix of conditional probabilities of each data point to belong to a mixture component.} \item{logarithm}{A logical value indicating whether or not the logarithm of the density or component densities should be returned.} \item{\dots}{further arguments passed to or from other methods.} } % \details{} \value{ Returns a vector or a matrix of densities evaluated at \code{newdata} depending on the argument \code{what} (see above). } \author{Luca Scrucca} % \note{} \seealso{\code{\link{Mclust}}.} \examples{ \donttest{ x <- faithful$waiting dens <- densityMclust(x) x0 <- seq(50, 100, by = 10) d0 <- predict(dens, x0) plot(dens) points(x0, d0, pch = 20) } } \keyword{multivariate} mclust/man/defaultPrior.Rd0000644000175000017500000000773613752165011015433 0ustar nileshnilesh\name{defaultPrior} \alias{defaultPrior} \title{ Default conjugate prior for Gaussian mixtures } \description{ Default conjugate prior specification for Gaussian mixtures. } \usage{ defaultPrior(data, G, modelName, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{G}{ The number of mixture components. } \item{modelName}{ A character string indicating the model: \cr\cr \code{"E"}: equal variance (univariate) \cr \code{"V"}: variable variance (univariate)\cr \code{"EII"}: spherical, equal volume \cr \code{"VII"}: spherical, unequal volume \cr \code{"EEI"}: diagonal, equal volume and shape\cr \code{"VEI"}: diagonal, varying volume, equal shape\cr \code{"EVI"}: diagonal, equal volume, varying shape \cr \code{"VVI"}: diagonal, varying volume and shape \cr \code{"EEE"}: ellipsoidal, equal volume, shape, and orientation \cr \code{"EEV"}: ellipsoidal, equal volume and equal shape\cr \code{"VEV"}: ellipsoidal, equal shape \cr \code{"VVV"}: ellipsoidal, varying volume, shape, and orientation. \cr\cr A description of the models above is provided in the help of \code{\link{mclustModelNames}}. Note that in the multivariate case only 10 out of 14 models may be used in conjunction with a prior, i.e. those available in \emph{MCLUST} up to version 4.4. } \item{\dots}{ One or more of the following: \describe{ \item{\code{dof}}{ The degrees of freedom for the prior on the variance. The default is \code{d + 2}, where \code{d} is the dimension of the data. } \item{\code{scale}}{ The scale parameter for the prior on the variance. The default is \code{var(data)/G^(2/d)}, where \code{d} is the dimension of the data. } \item{\code{shrinkage}}{ The shrinkage parameter for the prior on the mean. The default value is 0.01. If 0 or NA, no prior is assumed for the mean. } \item{\code{mean}}{ The mean parameter for the prior. The default value is \code{colMeans(data)}. } } } } \value{ A list giving the prior degrees of freedom, scale, shrinkage, and mean. } \details{ \code{defaultPrior} is a function whose default is to output the default prior specification for EM within \emph{MCLUST}.\cr Furthermore, \code{defaultPrior} can be used as a template to specify alternative parameters for a conjugate prior. } \references{ C. Fraley and A. E. Raftery (2002). Model-based clustering, discriminant analysis, and density estimation. \emph{Journal of the American Statistical Association} 97:611-631. C. Fraley and A. E. Raftery (2005, revised 2009). Bayesian regularization for normal mixture estimation and model-based clustering. Technical Report, Department of Statistics, University of Washington. C. Fraley and A. E. Raftery (2007). Bayesian regularization for normal mixture estimation and model-based clustering. \emph{Journal of Classification} 24:155-181. } \seealso{ \code{\link{mclustBIC}}, \code{\link{me}}, \code{\link{mstep}}, \code{\link{priorControl}} } \examples{ # default prior irisBIC <- mclustBIC(iris[,-5], prior = priorControl()) summary(irisBIC, iris[,-5]) # equivalent to previous example irisBIC <- mclustBIC(iris[,-5], prior = priorControl(functionName = "defaultPrior")) summary(irisBIC, iris[,-5]) # no prior on the mean; default prior on variance irisBIC <- mclustBIC(iris[,-5], prior = priorControl(shrinkage = 0)) summary(irisBIC, iris[,-5]) # equivalent to previous example irisBIC <- mclustBIC(iris[,-5], prior = priorControl(functionName="defaultPrior", shrinkage=0)) summary(irisBIC, iris[,-5]) defaultPrior( iris[-5], G = 3, modelName = "VVV") } \keyword{cluster} mclust/man/logLik.MclustDA.Rd0000644000175000017500000000201014124774626015657 0ustar nileshnilesh\name{logLik.MclustDA} \alias{logLik.MclustDA} \title{Log-Likelihood of a \code{MclustDA} object} \description{ Returns the log-likelihood for a \code{MclustDA} object.} \usage{ \method{logLik}{MclustDA}(object, data, \dots) } \arguments{ \item{object}{an object of class \code{'MclustDA'} resulting from a call to \code{\link{MclustDA}}.} \item{data}{the data for which the log-likelihood must be computed. If missing, the observed data from the \code{'MclustDA'} object is used.} \item{\dots}{further arguments passed to or from other methods.} } % \details{} \value{Returns an object of class \code{'logLik'} with an element providing the maximized log-likelihood, and further arguments giving the number of (estimated) parameters in the model (\code{"df"}) and the sample size (\code{"nobs"}).} \author{Luca Scrucca} % \note{} \seealso{\code{\link{MclustDA}}.} \examples{ \donttest{ irisMclustDA <- MclustDA(iris[,1:4], iris$Species) summary(irisMclustDA) logLik(irisMclustDA) } } \keyword{multivariate} mclust/man/MclustDR.Rd0000644000175000017500000001157313760217372014471 0ustar nileshnilesh\name{MclustDR} \alias{MclustDR} \alias{print.MclustDR} \title{Dimension reduction for model-based clustering and classification} \description{ A dimension reduction method for visualizing the clustering or classification structure obtained from a finite mixture of Gaussian densities. } \usage{ MclustDR(object, lambda = 1, normalized = TRUE, Sigma, tol = sqrt(.Machine$double.eps)) } \arguments{ \item{object}{An object of class \code{'Mclust'} or \code{'MclustDA'} resulting from a call to, respectively, \code{\link{Mclust}} or \code{\link{MclustDA}}.} \item{lambda}{A tuning parameter in the range [0,1] as described in Scrucca (2014). The directions that mostly separate the estimated clusters or classes are recovered using the default value 1. Users can set this parameter to balance the relative importance of information derived from cluster/class means and covariances. For instance, a value of 0.5 gives equal importance to differences in means and covariances among clusters/classes.} \item{normalized}{Logical. If \code{TRUE} directions are normalized to unit norm.} \item{Sigma}{Marginal covariance matrix of data. If not provided is estimated by the MLE of observed data.} \item{tol}{A tolerance value.} } \details{ The method aims at reducing the dimensionality by identifying a set of linear combinations, ordered by importance as quantified by the associated eigenvalues, of the original features which capture most of the clustering or classification structure contained in the data. Information on the dimension reduction subspace is obtained from the variation on group means and, depending on the estimated mixture model, on the variation on group covariances (see Scrucca, 2010). Observations may then be projected onto such a reduced subspace, thus providing summary plots which help to visualize the underlying structure. The method has been extended to the supervised case, i.e. when the true classification is known (see Scrucca, 2014). This implementation doesn't provide a formal procedure for the selection of dimensionality. A future release will include one or more methods. } \value{ An object of class \code{'MclustDR'} with the following components: \item{call}{The matched call} \item{type}{A character string specifying the type of model for which the dimension reduction is computed. Currently, possible values are \code{"Mclust"} for clustering, and \code{"MclustDA"} or \code{"EDDA"} for classification.} \item{x}{The data matrix.} \item{Sigma}{The covariance matrix of the data.} \item{mixcomp}{A numeric vector specifying the mixture component of each data observation.} \item{class}{A factor specifying the classification of each data observation. For model-based clustering this is equivalent to the corresponding mixture component. For model-based classification this is the known classification.} \item{G}{The number of mixture components.} \item{modelName}{The name of the parameterization of the estimated mixture model(s). See \code{\link{mclustModelNames}}.} \item{mu}{A matrix of means for each mixture component.} \item{sigma}{An array of covariance matrices for each mixture component.} \item{pro}{The estimated prior for each mixture component.} \item{M}{The kernel matrix.} \item{lambda}{The tuning parameter.} \item{evalues}{The eigenvalues from the generalized eigen-decomposition of the kernel matrix.} \item{raw.evectors}{The raw eigenvectors from the generalized eigen-decomposition of the kernel matrix, ordered according to the eigenvalues.} \item{basis}{The basis of the estimated dimension reduction subspace.} \item{std.basis}{The basis of the estimated dimension reduction subspace standardized to variables having unit standard deviation.} \item{numdir}{The dimension of the projection subspace.} \item{dir}{The estimated directions, i.e. the data projected onto the estimated dimension reduction subspace.} } \references{ Scrucca, L. (2010) Dimension reduction for model-based clustering. \emph{Statistics and Computing}, 20(4), pp. 471-484. Scrucca, L. (2014) Graphical Tools for Model-based Mixture Discriminant Analysis. \emph{Advances in Data Analysis and Classification}, 8(2), pp. 147-165. } \author{Luca Scrucca} %\note{} \seealso{ \code{\link{summary.MclustDR}}, \code{\link{plot.MclustDR}}, \code{\link{Mclust}}, \code{\link{MclustDA}}. } \examples{ # clustering data(diabetes) mod <- Mclust(diabetes[,-1]) summary(mod) dr <- MclustDR(mod) summary(dr) plot(dr, what = "scatterplot") plot(dr, what = "evalues") dr <- MclustDR(mod, lambda = 0.5) summary(dr) plot(dr, what = "scatterplot") plot(dr, what = "evalues") # classification data(banknote) da <- MclustDA(banknote[,2:7], banknote$Status, modelType = "EDDA") dr <- MclustDR(da) summary(dr) da <- MclustDA(banknote[,2:7], banknote$Status) dr <- MclustDR(da) summary(dr) } \keyword{multivariate} mclust/man/densityMclust.Rd0000644000175000017500000000621514136474063015640 0ustar nileshnilesh\name{densityMclust} \alias{densityMclust} \title{Density Estimation via Model-Based Clustering} \description{ Produces a density estimate for each data point using a Gaussian finite mixture model from \code{Mclust}. } \usage{ densityMclust(data, \dots, plot = TRUE) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{\dots }{ Additional arguments for the \code{\link{Mclust}} function. In particular, setting the arguments \code{G} and \code{modelNames} allow to specify the number of mixture components and the type of model to be fitted. By default an "optimal" model is selected based on the BIC criterion. } \item{plot}{ A logical value specifying if the estimated density should be plotted. For more contols on the resulting graph see the associated \code{\link{plot.densityMclust}} method. } } \value{ An object of class \code{densityMclust}, which inherits from \code{Mclust}. This contains all the components described in \code{\link{Mclust}} and the additional element: \item{density}{The density evaluated at the input \code{data} computed from the estimated model.} } %\details{} \references{ Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317. Fraley C. and Raftery A. E. (2002) Model-based clustering, discriminant analysis and density estimation, \emph{Journal of the American Statistical Association}, 97/458, pp. 611-631. Fraley C., Raftery A. E., Murphy T. B. and Scrucca L. (2012) mclust Version 4 for R: Normal Mixture Modeling for Model-Based Clustering, Classification, and Density Estimation. \emph{Technical Report} No. 597, Department of Statistics, University of Washington. } \author{Revised version by Luca Scrucca based on the original code by C. Fraley and A.E. Raftery.} \seealso{ \code{\link{plot.densityMclust}}, \code{\link{Mclust}}, \code{\link{summary.Mclust}}, \code{\link{predict.densityMclust}}. } \examples{ dens <- densityMclust(faithful$waiting) summary(dens) summary(dens, parameters = TRUE) plot(dens, what = "BIC", legendArgs = list(x = "topright")) plot(dens, what = "density", data = faithful$waiting) dens <- densityMclust(faithful, modelNames = "EEE", G = 3) summary(dens) summary(dens, parameters = TRUE) plot(dens, what = "density", data = faithful, drawlabels = FALSE, points.pch = 20) plot(dens, what = "density", type = "hdr") plot(dens, what = "density", type = "hdr", prob = c(0.1, 0.9)) plot(dens, what = "density", type = "hdr", data = faithful) plot(dens, what = "density", type = "persp") \donttest{ dens <- densityMclust(iris[,1:4], G = 2) summary(dens, parameters = TRUE) plot(dens, what = "density", data = iris[,1:4], col = "slategrey", drawlabels = FALSE, nlevels = 7) plot(dens, what = "density", type = "hdr", data = iris[,1:4]) plot(dens, what = "density", type = "persp", col = grey(0.9)) } } \keyword{cluster} mclust/man/figures/0000755000175000017500000000000013762445702014144 5ustar nileshnileshmclust/man/figures/logo.png0000644000175000017500000020501213376734350015613 0ustar nileshnileshPNG  IHDRX.' iCCPICC Profile8U]hU>sg#$Sl4t? % V46nI6"dΘ83OEP|1Ŀ (>/ % (>P苦;3ie|{g蹪X-2s=+WQ+]L6O w[C{_F qb Uvz?Zb1@/zcs>~if,ӈUSjF 1_Mjbuݠpamhmçϙ>a\+5%QKFkm}ۖ?ޚD\!~6,-7SثŜvķ5Z;[rmS5{yDyH}r9|-ăFAJjI.[/]mK 7KRDrYQO-Q||6 (0 MXd(@h2_f<:”_δ*d>e\c?~,7?& ك^2Iq2"y@g|UP`o@IDATxxGӀ$m {m!@4JBIB0Bz#|'BK# 00W;EۨI+$>,novnnwv `L 0&`L 0&`L 0&`L 0&`L 0&44L EI = %0 , !2zn]cP !QHQ4Jbo7O B V6㚅%7=, 0`-XY(p@7$%gQZ$tnV#$װy0 ~IQMcfq|T9! bL  /Y2(R~b4dR0R~]a3;0uq b]9y'`'ǑsL6;IL~lҶ'*<\n0Ԥ|N((G G~Lf Z|`n H&@wk?(U 7׵jbV4PӗflZV”RMUuWbLm0pZh=pRHT9F;>B] a­AZ1Xk30 ⻨iY<YU`@;3> !:fAdMD!I#!vнMQ".si Ph,g2`Ba # a `Vh |}efx]BܒPѦť0_l^3{^r}&'H&@EOcqx`Tݡ.javj3y0 $9oˬ\qY`a` og hLMa Ķ_u{@ATIGqy3RxeAaDir`'FgR3v鐡(jXxMJQ"1c/ ʗ2]=}*BY`aPdtqMiE 2W4{1AXQY[P67)Vrb#`[Xb=8aL6 ]L@+ߏ {{@aZiP+.RJ6Nr=[\^ ϟ-aE+F :f]8e@DIŏ}Q"*g0$:)ЏP`f YqLOa''=)c^n S(,Jeম@! @/yz#+̥Ҹ5m&1$_8x#p^QZt#ѹrs9W!3: )YYEPť aA 8OP:Z5Mh"T4QI\7AXddQU#Y{O k O°!AhcބCS1A=1!0Bk(<*{? N,NŘf_=n 06 |#9c~G#я@.ը` antȢtы,};|>QZRdºö ʂ`OO,wK v$Ci$ "Z!?pm[ !d:_^XQlzg fᅒW6|0A1A0Nl %"Tջ  ׎%үl9pZ?~謰~)0[=c|I> Ӑ1`u0 h*JZqpmOGa Tӽo1r\x&G\#46Jf8\XA&Alx67o]F/h)S0AQΞ_OC/*~_J0GRIS_L||Xw!:;/64:)2x KBЫ}2t5b…BPz0!BAxyƅC fILa{5E!׼^I_C ^ؖyMe=}C?ȡ 1cĩ$ *l6MȜyNQ2Y46 C %cq$c:d(kƖC{CW>|a39Ifʆ,lv~pasc鐥Eu!ޣ g.LB {7+^Y-"\~ z<+B b"(WdH=$p:dQHRa8 7 O,/OF|ǝyl)L2i|&;z DErנu}aErtL`ݮJ}ATilY&dDLD`@I#@JƂG BGC6fYK^@ws7u+:#czgVP\$.2j^98eq%F"@0 [cL"BDe9L 'a|Dw3J&5X:9F! "bjL,@ )a(Q"Y('@靧|67 I N\?x^M*.cɎbҨV5ȸP~.Lrf}O k AeC&1 0!9c~GEDjyh8u("&)%fh_;okS}v_P8.o|,sE%ޮZhq]R6a:.riicÕ:wh> 藀Lbzp6W\zgeyYCoY3_ /%Rhh\'JEJ|MaHYTKyJz-{w艋̥O5.;J?ES0-cQ5.xC>S!h[`:u5bn"靧~2w._>z%-!-6 t{j$CC!Kb7 nmcY'&kvSfoQy:'~&L( {l)Ԍ)t#pA( ȅ8 J,a>Kd1߶A,I0:Ê{{ o6 ݢzd0a3c9L)܇+ŨHAz'~>b4d)z!^΄t;A!KÓZ(;@^ȅ 0;ӿ΂w_Q^9l;k%5ʕqu2e 4i63@jڮ{J`YV6&8zF\zg^-qy̡[m7,6 _1 :$#.ts[Ã>+8s(&PF/ .&aXp4ƣVaBYO`OSFϫe0?#EC6u-:u[Ŝ( 0,u0y%6tA66 8)$V 4C ZNMibDd9L T!?[ǥg̲cK߃%K KilxN(jG¨kBv\Drq~e)IW&_3sod$_!)Azh~ $)rpgYT4(,.U;/YKhzg?T.y|{hP86 |UGEOdFQqWtȎX~&n LrqMPd;_ڎ#ąe!KL @u'!wi> /nE׺~Ƹy2CŔ0QT;>r^H\ KaDLR-6 / t.l- 16П!Br[Pz祫ܟ`xȱS8Ϩc}l=)YC$:Pa1֎CF  ~LqP<"YzhA_ɒE on1 T<P鐯ȅ aȅ 0"@? ΅%#+M`v= eA'G.sB^@,F"C0_ktȢ&[0LÚB;eN~oRM16 |&|3Cą \_\zg4 2qafo\eW)A#-0ō @n]KJ<{Sҧ%s:dm6`+靿g(E梧!pBXR8Gό4+S24K2 0,NKb@MpA6LECV`uX+0&w~~x-p&G\#e5Ӈ E ;"X TU3b:9g.L 0&p@~wu+|r7`,aE?W=l0~( 'ýbCCct}:tF C73&GN*zTY.D Ι_߸ph8# !%47pjTu`CnUOXegU`>E'.[>l6c3B]EaP~1 {ʘ٥y@й>F0n`L ,J/LL`v= JN@R,hh0_Ïqkb:Pa3N&᝹8@2niրsd;:"GAՠOz {Sr(x-*1"p§Ж+ІA! "c O L,Sq!'Ut.]$@?YˡP L@/ӗfZ4|(䋧[-& c}&u"b;qxQFAҽ>xUL |_wx鞮\#JDRִo#wi4Q+&"o 2!raL 0MHÜ24]j_F(㫴ſu3orU/0A15i:Ir&#Ʈ`8!tYW&2(aN| ~\w@Hzg*iޖͦ2gsoGסtͯQ Z)x1+.p";.58J#"|]4py$  \rN$}ܒ8ݿK(Z_n 3.œzBi.;S`p#ї#M DY/פ6 Q+t}z)>rurkܴp{|3:TBڃAH=bȰ һ% EϦ52 0&O; ,};|NTϮ%.oBq#xُzTaujl1~X<޶GNh$3CnqQV4brEq,Yo8rN ވύF#rpf,>^G 0jad| .0!wtȢpVI|0HkQgmIl8F2ΙӇV$M֮/6Q 3R"s:dAL YJX_2 עsb/Am$A_0H_7gA_|0@?gX<%H.L 0&`9=1/8x2gG)fbx忷ICwo2v%@}+d.3]dHpdph.4e4O`46 HK_p #)鋯4e c} ѡ0!_Ҁ! !BS*ZO(p0?oÇd0?꒱(,k]f6 :/j_GcvAQ!ֽ Ř4ą 0&7oVSդ85]]Vq3,8@zq}&?g6REwER:䉢8ga(|8&_[Fg+a\ĴYa+hdx=Vӭ],I8V7#)1:J߰~EYU@ؼ4LZJ̐£]0+vB \ 3N7uſ%̼wx0t¤Pe!lu,dХ(.L 0&'ryAq_'T; 0s~k:a%  Yh /xn#Du( ^3 dL Qgjr p7vKN*7ߘ`Bxݕ@\$_- #nFi_gYYgLtdicM\q;|V+>c/oEUQFAy Lƹ 6 Ԟ~N }O8!X'Тnu5^(smXl||ih Fϸȿ Q1! T!с~(N/sln*ViB+7 {ObpJȰ SӚХe-!Y&ڝaGY*1>OzձDh9VTOo_("NӦ`s@Ƀvb'&&2jdP/!'(N^rgr nzT wOW3Js||.'‚ :<tTC~LBt5O⫅] ^#z͋|(C/} Q7. 6ks0Pzl&o$]x—vc]tȩė{">CN7L}µS}ZF ֠+qZCSժ2S|yv1i.1Fv؉7L(zo5a:1G?YX1'L܅/qE /gXZMЏJBOԴ]F=j|/mYG+/$ck#7F^сTDn˘Ewa6z]21=eCFqNжa Kehn 5~=#4=iT4O*hI+,YhwԴa/z{.A?vmN ZţJ0VjRVP@+2wB_sC{BW1E?FjGSg aIAƕ8R}ȳ`Z\As3\ ''zF;yiXVhjb56 4TV&`;+.9T)j5c#fYӐtrpT'YM͓3NojvռWۥy4 ꖇ4 nQ YUHh1ta<]!鐫_iKy|XL>BfwyۣA뿿YWɣn}q􃞨xۓMIkPW(K 19SRl ZWotn-qݍf3fb7SEKr遉d$SL 2˦\Mjt^G.oUQKE$Q=0[6v^>h - x#*&dI 6:I<mͩ !7{oT5(xQ$SkǪjnNqUcd {#uR1Q)Ep{ՎP Qa@~42 \U}-٭[CRUXQmN V }gkZRHd jD/.Ob HWo;o rt G,4fJ6t֚`+7|\~Q)"| UX;R0pQ&D^KsdL- ҥ"5zr ?'OY%*Fw[(7 ;*ha -ym{؈ yWEխd$|3&?$HBY#̋:+d֔m#@Mo Q;JZZ. c rգPH4BI{\)o=$ΨA9߸{@P '*Zccc KR\ J4'xi5(aPVTQ)kG) `*6ʟ=G+804BJ!g,Jڤ*CLPB/bIJraZhu.oһC֭]6_ p`xŝ~ŏ Aqߎk(FV/P fe&:gzl眆lٍ6?p $KPh⭀Bjz%@Fh %ȒL KaкUB{t)M*x5.֯2HhP-REKӔrlڪԣwj 9e` .LJra`ݚKi+"+N' ӄ!?sW2҂T]9?ŽmqSTiQW[88EC_Ä?:S(٪ަ@2rE&ũ|a|+/zo|e(- wtjT]n8#i }/>M,hML{6ŤZj'9hrdHm[Y!r+  {1E>|j%̲+gNˆnk 8E=YTSCt`,(!yDžBw 4Ey3Ė G KYIZrSj LG0c΀Uw9NA5U퓡 FHT;RTp/Jx__t1hx y.wr-? X#01)32.U 1M[Qv4'gtc~x^kĈ Ym?^Z| r9 E G C>?Te&%S*erR%Vk\lOxx,ujݡ/`ΖV$pa.Ŷoa=y/(Ø WĴ;7qa a rA.h}@+$0Ghi&]T"eIo6//%8 "(:Lp UĹp!@b(q}V\L#0@& "(L;uUahYH0-(r[~2vG`8,}2gQ-jq]&`YѨ> Sq)RՄwkR(.zv?nA>B  ~hZ҃OP6 4x=~jѿ54r1@^x'Yv_6X aD. hɗa%e m>iecڕktڝǁ.0x@+1$:~6/T'" "Ƶcaf]&iZٟ/ OL@6 4KO'僖K~B5-,/,@R6EDaP1\ 6 :`Hf\$\@{gnDϙq6g@+ګVi]kp:%P}D<ma ܦPPbTpOBT&,-{hIYˡn}7[zQ8^M@+ 3=$/Bڵnnr֪{~sU7_R\&a?{"v kyuc\I*3QRb^'8Qɴy)dQ5ga3u7Tw6 -qO~b nw7h$PǕypGOP\lx5s0kb[  ]7I[UnhY2 La?{"@NUK`߮+;<[կ˲<:u@UE2v])j^W@X6 tvBX.ț^mpؼ6 M ~{Qys"8($5 U'>ʑ8]Ywa;5Z/~cF[*%9|JIk؅axEIk$b zȓ^R:H(U8T?g%x0xҹ LS:\ 3frؙ57.JrA:꯭Q}ľnv?C\.%raaKwrY{mNPWkQWF a(Ywj;o>s,Y rN'%IC_w,o4i:|6ͣX0^FG|z딑Ԗ0Ǚ|c@IDATEKE ~‡|{Y9|%y=chM (<3H ^!S߶Qs-}.g='! _k$xux7i* 'E>_+hӠF(O=>^D8<77>[~;UR79Њ*YL9؂ΡZ I̅N٦Yt7Jub/iUyH5C8u_j5UBgjUxM)4s_1:cHø e؄nA˔=k0pZԭ7T/O  Y2b"BJpN9=[+0Sa"X6Wc=aJ^43dd NˉׁpT=zg}TNjO#C jh ٬,t8,tRyӊQ`EGÎ^TPMɺ(gj gWZ@Vd#+9ں,mS; WhlQ-XoQٔ]#QJ Q(>!Rjct̫SXa*ݚ~|=.+I?. eNZOk[pF7ZzU-;ђF1˚$­s$E/_=dW%:ݫ.nPUFYELcXēϔ  1}>kcotsvYS* ]9us.AVǦlwOǺ4HE詟|nUa]dˆ!@sz ywo-N;7r@~> ;ShJaCk iCC 1BtxE*զJǧ9Jt"|3#gްiز\K:'ҁAӚho4szWևioONqΙ'!/iޜja H8)h1D8A ;lFG目%6Y c/zxQzh%]ލk)`|:Q" LCώ t4PC2ijp݅L)M5P(]Înd\Fy(@R3 -ky atL=}ߨR"{>(oD^C 8%A^dzb_stW%u*4.}Ev #Jcr>PFntp=S4LU4d;B 3pdlRHzU-0X1J+P,Q48 T^U",MiI]U74E/.O ? @7::z mL/.L@xU `LK0xn 0& adz:1&6 eL 0&GlNL 0&D /f`L z<+`L 0/`KY&`z$ `LK0xn 0& adz:1&6 eL 0&GlNL 0&D /f`L z<+`L 0/`KY&`z$ `LK0xn 0& adz:1&6 eL 0&GlNL 0&D /f`L z<+`L 0/`KY&`z$GX'&@(.M%p4@`4H} aÓ*1&ٹ󸹼Ýa}QA9Q(ܽǡF `L 8OG gGz@wA9b)>v Nᦙ`C 9Փ䧞M>_guL O%.fL 0&cxN?A{?̅VT-j aY`L@<6 3e* l(A{%6hOT>0*My L 0&6 GH82 -U[m>Q` .L 0&<6 gGBJct"p~8jWvVusպw0&6lhŵEd>B&+-[n0h@}ӫ( ȑKK\l= |qaL z:]0n=<c @i<2?=ͺ& ^^Oq*0 n1qx8, T"ptLFD?#oMt /[9bX)V9aroT1xEevлNQU.*rlc\'L J*3aS)i"+oL@'0ɉ85Z cȵŒO kreƮW\ril$)7l;j㎣ϳʼ xO%x87WF@8wBcl '$( L\|EXbVրc_0sqb[߮zBG D뱛->r7dl##FkGՅtd E<2uԺo*]{BR8sF3fzvI,M#  a/ϝ%3[i cŮB3aP[_ΝY 漲'N\Ҋ+S>>͂aOSfhhAJC?B ?n*/-/DCpmå0ln +0(yp>@l_z՘b>H3Ukv2I.鐋Ŏ>+@@/o ʍ1{NaB/ܲỌˍ:]&}]8r"?w a0VVD{X)81BV6&Ԩ?{&ys *7Cll'/u~i߶a` o6 "`n"Wρ)BàqU7w6&|%ݣ&W%rђ[PhmRTE00w0^ 襵\o(ܳG9,s} \(%GO/VAP{GF\mUM-DSaI^e$9?k~jCS\/a;*7&+Qm% ߜ 79k VBC  wqUn$`)YMeQw6 €ښaOЬAqlUG0.RC{[" ҽ}pa JR$ *thv rr `cO@Uq Ѷ-FsZ!_j^)=}\{$?$zk_Dm{D{0WI`m}I/FZ\2I;70B\$#80+3C\|4;od'eu.tX^ 'v-jvk&ˇS߄#߂ݻ@ڜ]Ҏ tg5 @ +l!xW$k..[ )@|g*%~گ]M{ˮ:=?v);1>Z/qLIJFӹs8].v,@H.85Ⱦh֤E yjN r/$e3}7㢜Nz6{ܠ[a, 8ÏZrb⒣._qJ/\SnYٔm}U4 ,͒_$;S~Jl6#{d%ye΄аa砡6k{{t,B׃vm79S? 8b`Ǹ ΖB`6T2=Uh?z釀- .)Tݏ⁧G`v["n:Zi'MЋK`JDjh8޼3^NS!Wo?Wf $phxPz"(ai! P'{|v،YT/`%Efa\PXvQ+Ifd͝Bp S@GI|j].p/S7{#W˩ӮPm5fX?%Ryod$7&wyq(xWb$0!.ʨK6(AS2x@5tS oPkwyK{Gs,nK^M#"OaC2E7z g ]tlwTŎ! tnkcǎ"0b͐S$a gP DE0PSK>CϿXޱmڗwCWB޼cjd쪛~^^V (P[aeqzI֬Y֭S^ّPiѢz11 L <[8F鼊ȃÎ;pa؉M$BW+, e\ׅR/jWt޽7nt~7O?\2MVV1c\s50tPCjdr#S wQ)2Ž:ҝaM7_᧏?C1ڞ< N+S 6tBNE+ 淟3.aoEjO);RCbĉ0|a`JLL( U_>^4Pq̄ȰWv`ʔ)D$AtreߙQ;(j'fR ##V>of9lA=@gR)ַTJ(z% V-Jdcv.l?z&rg) є… O?\A.]SNбcGŗ -Q={ʕ+aٲepԩr1s==DFFoI < ›4)w?VvAxsZqƐ a_ѹ:w Ch _2MzCJ 7xjԨQU1z©OحSvŕ7*A]Cpג]^}U:śo飌TY ][V^Z뮻|??PV\qwZ%8'|4Z(}BZZiW&^LJ${cg([MEF( A O4h̚5K104}y5Bї_%iQCp934Z/fϞ]iڀK 0`àZ=0=߻KaEpj1C!.yuŊd<_0>~lι^gYwgL$\.=8h B ̷o4_1Xt'w<u+bnpt"vN_v*E Yu}邩S?PiMr}X  -5)ؘmK`h 2+SdFH Tmۦ'NT#`/ pw/jS7cËqbqCi%#-(m) V+|R˃[o>{n BQZ̿>3CFkin6mT$!!AyСƲ#P)}+wT4 ,4'Ő\2mF)!Cd.]TZCys6pqO}ej[RMX KQ)tzdڑQɻղ`r-[4@\|@7] b$h^D+4E UfVT[v-EW' |D%##3gɓ'!#sr%\!ujCPLwZäK7hX -'ahJqyQ])pbXرc!GyDY:F̙k׆yzk`]{S<2 hR=P%>zpŪF \~([[\~&u:X sG;a8K98λ@E/='! (WAXUVy6Wܰ4Ȕypue܁c^񩌞^zEЪ#Na!@Yd n޼QF2+60 UhZw6\E@_#8ZPQ6tϔU4?B(fUK6\I ~ێJ,aPU{O*6m/5J,]_G_#Y΀S5DDzӾ~tsڐPhޤ|2>CNt)>z O.S(5mzX@L8K'w:64[8_wM Ν g)=zPeXߖ }Z&%rzFe!OY Z{Z+Kpi ZKd4HVBq=PT*S92L3}/+[C`]cS8y:DzŋEv|w͈.Z[ m^=}1It'i!no ~p}[Pka^h{o2]# :\1bpѳDIkׁȵA}4xܚ6׬ު5_e0{=ʥ+C(*89xp bgEҺ? lQemAdѶ-*30H%q{q">0*BCy A&{ĵ Wΐ7O DOdmm i up&…JU^))WD&9e0 mᡚtF:lE(/Ɓ _)w<8OEZ@EDH>W?ȬJ?5VT2 KƐC+_:yCWKЪ8u68lOUik@&CBvECOgQ :kpeԪ9#'_B؀5G}dF4j\2 NtF)1m7Kk bbbnXߢP gƨD-؞ $X1xW ;=yqAŌ5}rR|`ZK0J%zf!__OatHK8Wkެ&R5mSiEX[0,M^doZjM9o>v+gmEP 3ժQZs=Ej xa4׺VB y` ۷& tGNx~w|!GY}u17!5`A0tB= bRmS=_!/jtr|垓7?+V @OR Q4ȷ]R9ǁ=ⷸ O>Mߐ/o7gꐺc'|}QoPz`pbȞA&뮼2 hM9fΜ)5qV\ZVE p~QXw[ o!|wJ][$*UR٣d-Yqhzk^i.nqS^t/Oӹw gl>'+nR-dD17g(ݧXƋ 5豑]/`G{9.+cQ"ysdא\K^K@ER HX1@b0 @23dO[`([ V#?NdIY$A (T~9h_zk`"#S&oii-NF% AI(;-y I6y8AOS~>eU=-нwl|cjczmԫ=GL>JCdwHSkrl0RHiKCڂ;U `6cCQQd fϞ=-9{Fצ1 :;ѾaqYe(~0:aT75F:f kUp}ynґi *A'*3A9WҪ֏F?SuL2rvʔT)'cO;y -mBZ.vHPA|025hGKʱ/r.Rxue&R{j52xmU&0gc6lAD}7~zs7k\r0˛DF-E C?W<2A#9?PJPȣجIVzQM/Բ.L.B{>X<{us&WqlC?ޞ{ðՙyUp.ߛ]H}َªKp0ً| жV-OsyP7?lٗfAn spYT MxI MXlaCc2?A|Ԣۙ5of;S[AMD+~4\8RN+fS6sqtr8a~b,Q&hk.])sw@a`Ƥ)h`mAk6%+/KtaԩŋhHqQv⨣f/!jlܐEovRzKxa1zk1TFs)' 4%_Z^׳tBd B=\֑ Y;&VB;B>mԤ{}Y4=ӪdĹ jQUͅzR}o(!)O+`6+Րv pPt޳KY8f (}T樂~FFl+dhH釚6Mu8Xz6`-#ּ:nhͺϱ^ys[TΩ&CɵhN!#vrC19!)l*4؍&#;Tiz+0RNgAAX`C(r)\< >}h'黰D R'o---!bdNiVbcI-mzqgz@ O?}G(aQpޠ':VIОZ6G.y^I&qc>9zP)AJ吐e q56>JqX"-c ]B._\B!zje07%9; OAj@d7GA( kvd< 4 4X8 /DBI8g`="S>Ea4S>9'3 5$VMsg 68 ] 9֭[6MYu?*m9t?2 "%8qBJ1;vX),'zXnƓ^W&¨EJ^֖4l?‚չtmyJU+iM=zN4UiAܥ̙uRCLR6}w l[ Eh9'嵡P}H,J8vdLS4 ~ʿQ`{ + M#ܚxJdJd_U}`P?ZBӏ :FqL,˷}v#CX0]9n C̔GZ;vɓѺuk?Tߚ=˯?.v')JqhQBO[Li$k/@ynC!HR-RZ} w7ڷoݻKGW^z_?|#f諌 rțwTɷ%rFKM y|)#b|C槃ND}E@+W4I`E&ΨNlt-2qqq &wqRV1m4I0`!)d#p yIs0W%@g&Hjåi(,pgkJ۳j9TLYa:n~Ӵ|ȤWed(bQ|+Vtes۶m  R,$,\Pʮ|v3?Ld#%wS|4#0ZqpD,d/k. c)JݼhWP5f+C 3;<Эx(`U'nFQu~-@at3u+,S&#=$WZ];wb8 =nN.v@'4dOjmq(~J4}o >)^yzE0Ȥ9˓8#kJKҎQ1iFA(lw2ׯrIFdh1fgϚϜi,@WoKq&|JvX8K`|JLa $ N>`;OFjIvY  ?ápFQ E.!TN,ǎ)ݺ9VmsGn5ۀ81]LtG<PG[ T˦ik`YN.X` Tȟ{,u5.u3ݧ1iH{q?JSo#sS6HR"T2߃npd3 Gq06ڵk)Б g493IF#-^7v_s/R <' A!_J}? w;-?RhMρԤ[zԩc2EYouaڐ['+oAC,}gUAc  (9,HMËGVr7H`@&aXHQ?ul0`<4?jksyLN!ّԦbԐQC]6;jEdQ9xp0אf *T6¡CyUfy` đ'СCxѣҏ3^ OEݨ\hx'&wF2\t=ܣ+6b p#{Q7 >$'>˴=$%3kL[m|EA\He0l7)|_e&qߖX`MA@mc -[r/pS KzSl |q09s*c_~E>ħÑ6 5E@EEᝲOfQN `qM2*#l2ejJAd ~h[0rE)fyqX~ Ūj1]dT|˂v]aGc&''e˖;e?]/,+ŚV[`Y,ZĞl*fbU#<% sd~:D$T~qkᛸUl웸 p>11۴U`C!lccg|x05ħZ>Zx?`_Ko硚s ONkr-=VA&lUIڴSۣSVa5v깭ȽE=9 ?)R/ ^|Ag+.E&_JV u>Xw4qs+gptf@,r˘Gy4tԶ;:J3S}AJ  'P=DΜɴMp)mS O>x 0BG@#7.a2}qHġP}ST`Nӑ~iɂ\ƎQ.Mѫku!NQ^fH "iũ$P&ąsɒ_:l&٧[l)} 3)dvAgiW(97{?̙3G:{2;a;]DU A@5䃬ͷ eR7r](tz MAT\[Gk`@$oOǥ:-jR?jUE{}5~D]!L Ȁ ;.BTT)d9t$춡gqik)_̡ K.!{Ū!(j$ ToA0WSѣ١J*{ E#Mk6-;=knWtח{&&Ϋ6O!į9K26Rj%ـKmJېAp``ݹ*dё _qggGBA 4 ] Il!i>Ok۶T@(^xL:Q8dbbԪ%w8EuMD@h jZeʳrK%wANdMluETvx1_W6C˹Z'hkSħO-p`+ "0&Eg CԶ툽By 7qDFW F/4q~o⚍ $R@"uz9JmkI/toB[^uOyhbQK]$7mY@IDATŊI c09lpA7Ç k [wPr8lƠf?Z-9Z:!uIGmy'%Sr/cCxIxZ"nBiQ},Omuc73WHٴh;@ᓫCPe1q{ ix֗LlrJL0lmAQ)fQ1|FR٤p)NH+7mtSk \ȟ(r_ޤ?3*a6 ĉ-UyM7T4 !Љ޸`@c )9QqG=0/7e'm G#xyyMZBo%q~?,CC!;發c9g~2wU~?_}i۷IXLGo߾J*,ݓ'{ܥ85h`P4F#Рi}>(/= Ɛg@ Ccp!Hgt7  ʝ5%hl~bmA0!5 mz :V-En _~%o.0@Ce$P3% Q!]f0sr5v++!EB30uC!L4h04/.+ȥ`-4h-`Ȍ\~Z*~z7r72\}gѿIpIǻvR :dSi[kp0}]6zj0CyCm@~s+@ A4؎M.QY08~Ϟ޲r.\*r~bh-CZ\JSÔ)S$c.VqU&Zmޗj;kkb0\zѱD r(;eIi@{lyP31`ȑz{H mA 24s֯9FB&u_cR0YSl:4YM?͵;2]h*RhB tS~ӖCslϋc=(YM+u NquO+ 7Jҥ:*\5m2xTerXe?!wHEkER"BL)1Htbm_}1/[v4Bzl 36?U(\6o!{LQF{x饗Xη)˛xkNjVAmouqqHݱa? u7ځL@7ICSWjohv"D5tl[0Wod±B0=ɆW^qd5lU[`}E 6c.},ZO4x:qQv, m1Z4l0Őq } o,9k3XOaO\ Z :;;v(B V#KU~zA@򰯶3{ﭔ8{hh:mj^^4o*hԨtʕR wНv N%%\>3~7H>Xu  1 dmPY7@ɿI76]`|I-o1\hyJrlj'G Ykɇ¶դ1Gȵ0IoW-B=`4*؏B__)^ ̟qͦ[) 2[:W] .^FWN_KQG G66mn/C1iٚznqm@HH"mfTft,"Crwɗ>_8~ϗzCvQ|!(!l.r N[M#9@E'3otq)'*KnEvɤPZ ڬ1 Bo#7FkX8 H.Q#Hq.\ɑeJ۹ [I-^;vί{LQdN9'ztTӜ+/=wDJ"ТE ޽[ZY)+j섿'`~SVNow֛軽pBíZf‹2(TK`P-?谇#y2:+\h1o.oE4Q-PdRº@trlt%uRq/Fl/:uzeSzڒP@|o@ɘ Q>eۯd>^Wn@w-RJBd y݅ gAq~-Û eoc?Pvm/[Z@񾅵qG BG=WrdSnxz}犆YhGdMk=>\O.vz YqH>܌^Ѿ- %IHx"3[whH#PM'|{'o'e,+NWQyPVKafrz? ͐Bu9sF$$`` 7m\,.O|80T?9saiȳ};^r8[LJ1Uɀ3( G,G\51Xv<;-J7|)VL똂4u{ES1m1x,e2qvM5d1LNQm۶a͚5w 0wQ~ewח.CϿ X~9+ m~ZbVҊ'pfCʡb=ƣ0583 ` z]GdCN G.~nSSNևY4(W ŜHΕK7b\Vəg6h޼ب\&V[+ oEm[akLٲ",# {Y5kWSꟑgo}~4 @x9o ٢%QaC--So%''K.;"N(Tw}WqMӧOr'\bꍀ wëϣlCUhߴMHO\hz36df"f<}<yZ.s>l,\*챳He"?c֣@)$D6Qř9-,:^#a Aeo'~ŊҀVrpCNd|Ȼ}Cڂ+CbwC}8rAk3uD J8etDzcm3|zYi{uDn: XxHXZj8|Dтg ]2bxogKxclp"6c`leBva}liF 9NQ!UsJuͷYWdE¨W|Ƈ"0|p)`& `^l8^@&xivgu&vpxpv1ج䟯̦={`ƌ`5Z>6l2R}{ˇg B0~YX懍 I"Ek5n}vJv),Xu{ADYri?x2BsHs̙3^9HoQO ÔEO~_y(Zlފn{)GCA1t9 U={ v9ɱv c<AS R=0޶}&J0T@}Q޼[/ח,u(8C,mO <81S'7r.]p XH ![ s& m3Wl:'M> fUKa!Ro8x/_B>gPS~ qLBAUD* i=+o:'fJ}΍|FUدS׺JW$MH$%9<62ԓfZto^XI>v4qDxא]RG'# td\LYaZ@ɾq q%E pd!pIvѪZ~5б1|tRVe/3v%+AJM Z0M63$&$$QK/;B~p HF^!BGG&_e9?P,,JC|J@@3^8;i0>wl=<ٮu\:k0nwpTD2l(! ɻCWe^+ ,ڣΤɸlE襵 vu 1]rҟ?Eɋ=1ʢ_MH7o_u S0x74#.phЃ@dDi%r* -_ͩٳ4Ԥ>h]xyiHxZ_:xX-d[s=`Reuvr{N <,MޢtQjX5Yj>k|n->wG;w+Aa |5%E Rw: 3p*빪xHE`sC; qNˠγ%!iGy9WQ:. Q[;n/^۩K' CZ :&YnwrGfݾFH4Et$S H 7N`*5ifӧUV%eʄEB#uW/BוZ Ùh+Ga5N8A5umꄞ6-)O\ж^ǧP|po'L#ʒbR8vR\染: ,[U}AJ0*T%IY}FYL̎O}wsT8I(KOxIyZ q,,H@|17WJdv~w !WHٴY{fHd9aԛo?tLh(/@_ I%fígHUww[%j j]7;\~B'V|.JQ 9T@?,a57V;,˳cG4庲 \7ޔᦈE\0\ :UqB0J ZsP r[w#쯿W%,/!XB..C j]]^}aB0R5^cY(wBA1ɉ2A{h==)DI^:*sָٌލkQM² /Q4vp'fsw[U9ךYy튟Hݲ ֑6lGFȗezX Y'Oȉr/x -ڄ``Gp#QoU`%# JQtfЃH۹ЩLyM}P(aR花22 5i_3@@ PA/&d_9R(e!5n~j~[buR5F dH1`XɃr~KlG( J J V;L9BaIzF(utGo@؈Gm8YtIlg3%@@l%,ZXE[YGZܹa)1UC @W%Xk7ȭ, hfDOB΅Ea?K` \4(u%`pպP@@ P@S A݁J7~/+Ά9W]މx yL+U}%Z_\$zOhyH2W3Bf&KT f " PtJӭr1?v?@2tWgB%* 3>Qx}ԢLYGG\O!TCM\~Dݚx7ٵQ \UNu8c?H^#MaLǜi;a|ޏl5K1:p:zvNO!56d@dGp5z bi{ŧ磎P@AE#.Vhz9e$&qȼK2>jq`ate+O'@25p"ݬlG9ԴL= )p}\p\ J(<*r~d$GLd1;z]8e_UJGIm}/7 tsQWIe7z-ì-8sK2%tHq1"|]am#**@( h:u F> }ȇZ7Whoݭ_jJUl$b0+]gܵyj{[DX hGjz-ea'JDMJr^7o/oUSr !'AZ0O hIڀm W >iC򑞓u46zފGd0TV(9,S᏶XsU.w;+z ;=%;QRb{>k#6R؉}#7@t7:/TYfEMA1 g%Ubrc6UbqeGorM-,5"ޤX>m$bJB@h * x1@*"p4 &RITw||(|&HKZmNUƺJ2g,-n%NƵ˞#:0 GGzx+ ΂c(FL,($㥨o7Qi#HG{Z6N [`T7W/ʚEk*J Pb86/6Qް*6/}fQW/( ҤUY^+{k6X H PAUDzδG4dP쁩v̪ʠr}(5Xq8\ ;.`EY3+>~=A*[+@AP+?vqӵ 3Y10v]qo M&:`m춗Ge>_δ;tn~fF/CmJn8/Te{s1l)kGZRA-mn8q_lc0Qh[x=ݐh]$6J˶YKhA9VCM!Rl[2omt$ä+ᯒC J~,bQC_iʲHYLx;Ɔ<>;OzKdJ||)reSqLȰZ59J$sD@5sw%5Tj8Jbm\m=:)G^Ecf6rgV 9"WժsNNE9;RТs94$'n6quD@lU ՜+ ;gȍ+dgiV_}ZBb1G8YGDcʟ!,sԆJ']8[3*#ȸqvA#$O28|ƠK㐰 MME,J8M x4`6qC1{3v`SR{̟L&·-V۰|1_?|KZ\Ql|^]TSŬU| ZW\$Oe3AN\٪_].:_~R>z F[SXHۜ{ҷ+}{ ?J߂)עPu{W|0aO oTJƠUmX0m%Ȥq%U@6LInU\^Wk倫 콠,[FFm# ݯdA4AMY\4o^( &` Bq \48R6o]B=:jA? A.Rc_N 9g"zH;Ѣ/$-L+6mB |VBb*gU-{lw `>y6ռcg!!dwe@ j񠑷&}oH2)֑{;urG\ӅcA!I |8".s,JB@z ppSGZlٳ7h ^ O}7eV ;>dŋ4899R\8n2Z}3HXB96s<8 U!TB,j"1ej-lMR:xts-l'N`ӦM8z(^zaȐ!رd `KRwdN.rC@:"qq )).%(v C:DOaPdh[:ׯڵkS%ddd`W^y ({$N+#C@ mZo1oo.Yp#u0>im#b ̍7bٲeHHH`gY)f={Jo]tA``ͺX8|0f͚.4m4L8>\eq9q d:6 ƾ!;b@e:c.CV.NKW}M^؎`ʕXd @M.#F :` Z(7oq뭷UYΙ`QIgObe8޺I\  *t1#pvO5z}2}~<ݸ;m+|4?K^!#{߾}Jn?H@iehh(._]Rnj3ߣaÆ 'dJ+'Q0:r. ]+ע BwW~؊ #s%Il(؍)këÃ<9Lx xo]` Og?~|taU丬9 o&Ezs,nf`C{OWń69xB&CXY[d!ae/k#5 Ν;ѯ_? MRysE~fΧFCZLu},E]|7M\e=b FutYgvm^"K݅NfHgM>'OɝƎӧ; on $9b宒@-3vI].Ye[5M|F($}fʧaMR6yfg#*E@eM PnN GxC|E.$V\8uT3aTBf7e >#av4Զt2?sZenO?O]pǎS^lii7Ҽ:vmdmLlz̽LC,@ 2Q !y94loo^~3k'/j ܕRHᇒj^?uQo@Od%iNdQ&*_Jg]ld18 zy*~ܡ{(O:83kDd6+_JYRqȗ,Ơ @CT.1 *r.\ŔϪ}5Vu×HpBAk/nt1]ѻwo52٫27]ED.{Op1Z HsM]Vʅؼj-c%1+qT {Dzb3UfƑĭJ JUxNZM,H'56Xh}G8e|č๼ oF.9~)'4J&`p{єȡ\ 3cۑ6NFh ZpoZ(]ӧ#+#ͅq#:/RSq~؞TIiaiϠ#@AU ĝOL,tE N.BZD#o"{[S`m$uD!|ݎTdo۟6PRiqv튶ųk3hmV y݌n ϰu&hڱqQe6n rwi;"8qYRc_N'W@@ PJ`PJDC#YE)A݅8QTF^V*Gnגx`qqc^0q+_T(5,a!/㲯Rp}¥u`ٜ ON E0kemR H'cnG6ӆнp8ٴ @X *-7@߷XSK=,甤Y\%@)^ MtT$^N(HMMm 5k&4< /Y[18;$g):Rֺ&IqJqŬ2{[!l G@h J)2|gR:d^'CX訴o߬E"vpIoh(o_L~nTr!K nL/ c`"l޼ &yuI 3Jx^&Ɏ#_Q[r͹"P73`JJJB4m|!bd@#oFG:tr]GRHl+PϹx.u]|hIܰ E'<7dg|j]%^6B"{), dMW x`Pe6NI1K9JkW9 Nglkx$`ymEġ:w9_lC;Nrrh2HrYfP0B@+/dŸrF̙3ذZիWY i$l0'zTJ+t,|2JzPwd:$إ3E,ɓ@" S&0ke 6L'+w:G_R$u\2 8"o#XZ{at}: #YHaA(ᤰqD@@ P:`P:D/@"Ɔ1_gNTӗq1"s/x <q=2 qMޘk̵tu& _}JLq޺ ),̏e,rF@ ^ Pذ(2 /}s5"r`H6$R%(j)8H&(Mj\ͽE_sﯛ l)ڜ!WJ<55l-,_+9531ō @B@UKNۊnaet~m%f=#rrq'Ҩ*M߁[q{J" .8h^L|H‚=Ay8reLI-1nq8s?LM B$8H$g@l^ eM P*zLzQAsR}7uO4#:fsϡshߔ7IDNZ%Uf;pmxaAUK1΃spp!#9\j4P1pӺKҶ`kF :nwM!=bD@0s¤~? 6m rs-|Ь7waz98<ؖwֹO*љ8#$ĉ .th=_klo Y$M/7x ,q2(Gm}Y#~a$eJ(+$8R"O')ۘYSKU0Ü'QSekT-9$Bk>{P_(Ws/Ղޟ;6 V#5wg6$?B. B,LAduɫ1&u\'N9qbˆ0xk֕u\Oy Wr/J[O{Y {<1I zPxi,(M s6G P y&8Օ4oG _+eƊ.?xdߒZ@( ?ƟCܨ $$= #||nwyS NQXS\',5 ,oI WڴAu 9x˹>Q*1@#y:1ގ` [͖12%"M*7 ^&MBC i7OLB8|0x tYKƆs/(Z^x[:̣nL;vWng@ # *ሥl xaM&=3$E ɞ@[G}z"8Wjm.![|ի8q"z!}$h#)\L妔,@N\"s<{o}0Ƣ $mZ>ͩB}[CI Fq!T$0@IDAT8B0pńN^x6g LKAߍ1RƷ/u8øjW{r:H^ϢwGE@p&?'A@"&do`pZ"S=!e=B @4-]9@);;"S=dV?+5: UE0(`HH5GW[T!&D7z*U"rq <3?*bq0c ]VsTX< (?bwg׶6cW|>}TI999s]O?œAafW{ìz9uT *ԣla8۹(F=nMUI`w.nYWW|65Xfذa0k,1bZPbb"ܹsj5s(,xꩧJG%80, QLJ,_$nR-/.5~=s 6$%$03 X~X>sF]m)↑l@\\4iDoܸ*OV|Tí`ԏT9  Lu'\)B!{Z 38艳NWҦbAߊIVU$((HC;XYU4T<>|XV GW>dddgC@Vr'ea HytN@K`o0mq`kxlJ{NMMܻK ٻQG$R xT`Z[k 8nNйe#Du5%FCi =$ c־Ĵ;w^Uz:88? %qMU6ZđLJ~(N!(z6^#D⿵CwСCШQ#:(J/͸y:Ûj,=8'8Ha< D:Fop,|,:xRHt ^ ݐr#YK > ,_-ьtm~+k,DD$&“APοS^O,?$ | |gX0 X:e vт?\s#!nk ǠC-G_$op)VxFxD (u=/C3kn%}镚)PVa;gB]p:qirWCmto NPb.Q ,`o4 2'|y|w:J/Rh?n-̺ RE'qgruSZZ*pppŋUjC6JrQQZlq+MƋ~q ,Aw 0S^fGׄeD M߂(X{pQshAꛚ;,Yvat0k5eS0{1ifeMѪpJcÍzKz-` _7J &?W͢k"`~SyWŁ'NT~7IUOBr4AY %jAkpixs*ގnNX zK # Rj:3㺈75-V5MD d^cSn\:+o%fb˗㲰s}w=9/3dH\,Y֜; ;eW!8̈{FUdC[[-j \ &Q$,*n5ݻUNs|۰O QEw2>u!A(~/\F 6?h _cתynQw~GzD(0yԗnzA5<-[JA2!ɛP@T*p5KU~L  mVjKNNNvy'<6y*P5j>]s& !G$B !gt9C¹bham pט9.y\sΆ_|wi&.PҪQ}P"~ܩr XcnGO<[ߌ꼰@<ǿ%?{܏xs`U'5J*~sZ-Rt 3GGGU>"PA *!^;ܐ}?@] PiuIybSMJ&a?{C]=U wxuH *4 B&NZ&<<"J ȣbR2~Qw ģ?uQ"A{Zo>ؿr… 5%h N =\sxun,+uaēp"Zf }LouW?Nv}OWuU5cRJ1 mٲEb^ 6>!G㮾w+@/1P49t߷~.Bx@=^W\T tl-aO|s( Hy1)UkcΜ9۶mX;vlekHNȲMpuƠb`i-\B#D \&ng 80~~ros ؘ~!8w}v~q7ѿyI  *nAدo}W^ݓTfΝ;:zF TltA#,MBPcf-iio;;;qD-:X;Mg`S<-D/S<0Hv6y0&@ӍSҼrE/XšG.&QP) "&u99XyERZ^ML:աbWHIIׯU>}(?~T̅r\dj Š5:L-|hE 2x L\y1?|ENC/NB#נ3{3 |kzjS^2كqwсx:(thfټp jRL`č!芪PSqOŋUy9ӦMS9C %Khja8x1I,fsZ}r BFvϡCeY`rG.ú?" @@u0z^!Q [[z:75u=)l_lRن6Ns1r{{caI9B֮Дzf!sp 9=Q{f'#KClMcfۦf'ŶtBC,{x9\`%Ѐ3CU1Lw Dv1)tᅪ𽭵5 iE3]c}aAMӕ@p)*#UKa0՗C\~P"a"JNœ^ 5+׋-AR$Gs3ψEhgq(|4fUƩcT7]oh|H~rIyş?:NoMs"`R [ P@dUQq &(z_M1/B^U"aꝩlq ۸Lj3,*36 0٢AQ˳$N}Plk 4V$/s'YV,s.QրpwwSNi`,| n_Gm̀HJf i_"N[ +ph ? f=3:1Wlُ'n:q?\#pO:7Ϝpst:enR!<أNeؙ ߙS N߼`ycwp"OdX_w,_|7n81'gCPz[0i(Jb13 r-ϯtjݺcǎu#!C#~pU.yakgZ2iǀByg]`W=9\\ة%;gk}!KQc_: EנX&kkLw̜9*d2㧏h0e)bRW;8V鱵+`%+rƇځ]柋|we%(f%'bmLw fmi)^%q!5~WO+8kgĥX_k>{~z9xZeDGGUYĩW4qgIwnKF`^L X4}t1bF:]s#[ [Dk8l.,W: )._u/hT"].y(<>0ܻ?KZI4j 3Z ={CΏZN!n,((P&@@Q(Ȇ w.\ NE`b!\@Q_UYr*xUr&!SPTZ9Ҧww¨eD1Ps>0wUB+=?L_ST(wNQuF#M:˨g&''ĉ麜3)@I|r|ե:%&C_ S2՜i}[/pЂ@5e1`N uت0(#Z}MSU2W &Eԅ۞8p9)ZvNCB.uضʦ52CA"/5zEKMj. 1P8-l5~L;>wg2i1zc۽`i`L62ıb>c !.Pqkc=wޢ2 1ij!C 吃03YhP ]aZbǤ@emà_sڤ0/MXqt{Egg;XCvhkN6) ]~]g&6v ;65GNS'P('ԲdhmvRgy[;ٵm'}>p_ΞnGˉZF@CIoC|Z`C|-7ȇ|I^9 iy[9dám<.[ H=,%O+#zؗԓ4λI` D {(CښjZ5q6Lʔ@LC>Q9+БB. rܐuΘ(&@ХV}_`4:\t<۩)' 4X@H=V?w;VM2YpxA(|q]`Ǡ[]E孿 >VC5l{6_ԔU (=l{ADĥa ܕObPdd 7Aмgok"IA: w0pna_&?ܜj+%=Hb^ƨke_rZ q'p!dRԈݮ8Ho†Ntv>jk;x6j m:YX[9J'@Zv!|* XA>ƨt`چ1 J@Dtr7Bt"g߉dn~`?_l7GtBNf-"PR&=nBб((-cwU&W]\{ACOlߎ ,Gŵ*&[jU 3@; #){QxFxLyYue&㪥JWvWܶ5Xa=/dDC2C``L`>ѩ1X0s >Cѓ zOQwܽdk-,6[c Xam5)ˡT('3(%)E\xU@0e2OޅPͨxº*N Ĥ%ۃah(,)@, \Qȕ[̉6t>NV w/u\PG$1_|,>;tg \N#<:Z:eTEU8~q?DD 21fT>#$9B97 "~ɪEftr6o\1 UKt ^I޹"5O(*RM`^[ctc=1%UoxUp!Pk~;2eRżMʥfCK IxNA8p8[lsZ ;CƎAF`F,^\YYKasXJ0)j;˕`f;1p2끬1b`.7N*Hz,e]_OEKw8p!Vk|K>"晌 19:I?Ƒ9t^'i/MoUl2@m$b0V6b[}oЂb>lO-W| 4dkܧ[D #ӺZ%8{[|APȐd0c#:fFE10Qf /2k!_(̶:aFxآESзZ6Z0 ]7^4Sa_O[ 8{FЂsCW 2:*ܰuôhс9:ҭRY | t[{i8ФBcR6'X͆pv:ؖ7jt'AՠB5} .CN;$'X0n+?dI->T; m9lLfBAЛwzn1z߷!5<5&ygK/KYqU4ph`ހԲd۳i!P'r+1qv)A(@s+ˋ ƄL9ӈg4s{w++i <][]e09G]pHM>p5ȒM;q7GXVjnR 35BC u&`anGT׎f JZ&+?/Hs-\Dz׾׽Ȏ߀7CѶnBy0\[ [/1wltEr]028 4iK憭wS1h1pk|Zc h<ԣ=:79~3툫Ucg{#e5kQRk7} >kmA I +rCbkYqaHf#>;[ͫpwRz Z;rᖬ{ uy>7v吷rA1J) Y?fQz*$@ ˮRdaxM Ei6=#ϜOHI^hR!';bjvn{b}NO}c(LpmQ9,H䊹k'mau ǠN<&9f̻ըY8C,s碼s+FeIiU^Dw^4:i8oA#M\PdAP, ݊=wtg} c/A#9i`6\ ^asao냀EtT?v ;TOL %6'"qZsMaӈ &K E"o)^Ay{+u+Zʋڙ {m u4{Pbu%!tg9dal^woW+*r Lݧ޳w$_vZgtcΞhZסEe3NGz\[me(&?r178O!Q:@ߓ܌!!@< aV6 po?pu]?WBμp,wVM,C@9Kp?9dtq'1HYgd10'ñ]}g5~\EޙLPlOpu_"zH)*(ːZey(ώ y]:50gӭҷA@GUK';k_LHL3(9!ceB94Lr L!nߎ7RX ݾq{ ^& Z8}B952.O i *f&10k7ڎھMP2m 8Z=*ۡ@ %p+9[C[H!e҂oPPs) '@?"6`FO|םd!(dpe"W$( 8mJxAOc`r0 a 0آD&$ *ٱhLyb~?)8( [7E΋17:`SqKҧ8R=CnT?׹>f.haL޹Oq`QhO :A tts3_ E 3M{7tse'k\% L-"qk!jH4$}#@]moΫPw#yg^XɎ/*ǣy {p;MLalr rΧ1*ҷ/ì}IEJ &|6l8tr J+3>rе0k9wiD*rbա (Ltbac8! A|8n33`O?`5Ǝķ6gy(lǻd=QGN\) %ɗ|n6F'r hսfo%Yogx1IOя3/dǸ .CK+y9 [?.Qd)1'm~9G'QIҕ-Z7q?To|';bJvC>~G9\BP qZcJ10em5fS ʹBеsFFsjН@TB:$Ihur^vɎe 2Az3`.-&]`Ł[HdF¡u5l,,,~\< SsDV#(0ͅ>J' &BO߂cנy8+M2L!@<(Slߎ,n :4wO"ygS|KX\D&C:u~h _0/d @s'0p+[E=quG{ mS'^&F!@]pKu8C,/5ȟ|īdLc`&ػ1pZP{+[*t^V+wvGygp#yg^\N 09#P18E1ƈDrPQ⽈&տuThG8Q)N|f=*IL;3/dGGLXQׁg k}/Ivcp?BoK>sMEßqʞЧSS^&/PCfQ!$((4pl%"r D @D",@:߈">ݿ8&D@xcmM$R LE\Zޝ`ʰ^ؙ䝍=` UP :(AQOnls"& '@f~LOj;O|'`k U O'our QZ.̿V{50˓vIo@*]ou! ;hC-*jсu(|/ߦTYT wQL9 -]64^}^ yP7& [X Khu09Q=)Bi՘priyPM"cXӝ88m{++[W80 _FyI8LΜi-F9|Xhq䦩AZI*c@%5kT&YC:N[8rpg`K\ǢyeN4_z3s&@|t-o9;K@X#mxuN+q̫id*䐓3!uN84L#@A$䝭m>ocHYn{n36$q5TCF] ͈ c`@dx9D*Vc$O^hs{?^\ i!M02(:sC'7d6hR_iRS،McXe^7Yx yW6}7[#Q0, Q2D9F@9 FVKlo hӞ $̋(;(-+B`UyI i)Y!I|.*@wTeI:3yw'yg^Pi!} :ׯBNMmH޹D\ĥcKp32.,L`)0Xr ,ySoM윾0&\/psRJ-2-BX8_9d|`YbѲ c`̟m}J%CyYwFBC%eru2C9F M` A&e}x;O}7 DUrgoנ2: ¼ T%]"F5tJ cT*YZQNέ??еN̵Q),z!bAx7$W^60gӥq'7ѾCZU޹O5/jmI 3`rL߀!@>(Z Yn[ 0JcF;oNRte^mecOx/Fm'吷>C+,$)4$(/޻7ŨP㈀  5t >TЗWءsxEUޙ9ca_#A>|P^64X'0flp Ք <ڹܗYY",U8%W | c'Y#!qAo^7h3S;39!qrȨw#sq W !@aU";5d?AFnZ=79(\\Z/Cl $+k!C D;r #%D(>d&ީ6(_+rE Tc!@< jБX%Xՠ!+ z#2NAc`d<8^A#*cZN8OB֌;Q=R0ʓv:Ʊe`1 Q{:Aqpo@x1xfD|~B`$pw--]zi:Q &DzXT"-@ fwGK9A$rutGe0~3z4V"~ۈZEtn an?C-N0#23 u*]nprtB cKj n ,./)x7W ]"`Fh&uEkvR+G?ryLu 6?nBz"`10J"T fSpJ.^W9#Dc`^ϓzC$Tu\ Eq8L~=tNe 23H` 2/: A!v/GP#.J$D"@ D"@ D"@ D"@ D?@2,|IENDB`mclust/man/covw.Rd0000644000175000017500000000210513465001006013724 0ustar nileshnilesh\name{covw} \alias{covw} \title{Weighted means, covariance and scattering matrices conditioning on a weighted matrix} \description{ Compute efficiently (via Fortran code) the means, covariance and scattering matrices conditioning on a weighted or indicator matrix } \usage{ covw(X, Z, normalize = TRUE) } \arguments{ \item{X}{A \eqn{(n x p)} data matrix, with \eqn{n} observations on \eqn{p} variables.} \item{Z}{A \eqn{(n x G)} matrix of weights, with \eqn{G} number of groups.} \item{normalize}{A logical indicating if rows of \code{Z} should be normalized to sum to one.} } \value{A list with the following components: \item{mean}{A \eqn{(p x G)} matrix of weighted means.} \item{S}{A \eqn{(p x p x G)} array of weighted covariance matrices.} \item{W}{A \eqn{(p x p x G)} array of weighted scattering matrices.} } %\seealso{} \author{M. Fop and L. Scrucca} \examples{ # Z as an indicator matrix X <- iris[,1:4] Z <- unmap(iris$Species) str(covw(X, Z)) # Z as a matrix of weights mod <- Mclust(X, G = 3, modelNames = "VVV") str(covw(X, mod$z)) } \keyword{multivariate} mclust/man/bic.Rd0000644000175000017500000000355414124774626013536 0ustar nileshnilesh\name{bic} \alias{bic} \title{ BIC for Parameterized Gaussian Mixture Models } \description{ Computes the BIC (Bayesian Information Criterion) for parameterized mixture models given the loglikelihood, the dimension of the data, and number of mixture components in the model. } \usage{ bic(modelName, loglik, n, d, G, noise=FALSE, equalPro=FALSE, \dots) } \arguments{ \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{loglik}{ The log-likelihood for a data set with respect to the Gaussian mixture model specified in the \code{modelName} argument. } \item{n}{ The number of observations in the data used to compute \code{loglik}. } \item{d}{ The dimension of the data used to compute \code{loglik}. } \item{G}{ The number of components in the Gaussian mixture model used to compute \code{loglik}. } \item{noise}{ A logical variable indicating whether or not the model includes an optional Poisson noise component. The default is to assume no noise component. } \item{equalPro}{ A logical variable indicating whether or not the components in the model are assumed to be present in equal proportion. The default is to assume unequal mixing proportions. } \item{\dots}{ Catches unused arguments in an indirect or list call via \code{do.call}. } } \value{ The BIC or Bayesian Information Criterion for the given input arguments. } \seealso{ \code{\link{mclustBIC}}, \code{\link{nVarParams}}, \code{\link{mclustModelNames}}. } \examples{ \donttest{ n <- nrow(iris) d <- ncol(iris)-1 G <- 3 emEst <- me(modelName="VVI", data=iris[,-5], unmap(iris[,5])) names(emEst) args(bic) bic(modelName="VVI", loglik=emEst$loglik, n=n, d=d, G=G) # do.call("bic", emEst) ## alternative call } } \keyword{cluster} mclust/man/combiTree.Rd0000644000175000017500000000306214124774626014704 0ustar nileshnilesh\name{combiTree} \alias{combiTree} \title{Tree structure obtained from combining mixture components} \description{The method implemented in \code{\link{clustCombi}} can be used for combining Gaussian mixture components for clustering. This provides a hierarchical structure which can be graphically represented as a tree.} \usage{ combiTree(object, type = c("triangle", "rectangle"), yaxis = c("entropy", "step"), edgePar = list(col = "darkgray", lwd = 2), \dots) } \arguments{ \item{object}{ An object of class \code{'clustCombi'} resulting from a call to \code{\link{clustCombi}}. } \item{type}{ A string specifying the dendrogram's type. Possible values are \code{"triangle"} (default), and \code{"rectangle"}. } \item{yaxis}{ A string specifying the quantity used to draw the vertical axis. Possible values are \code{"entropy"} (default), and \code{"step"}. } \item{edgePar}{ A list of plotting parameters. See \code{\link[stats]{dendrogram}}. } \item{\dots}{Further arguments passed to or from other methods.} } %\details{} \value{ The function always draw a tree and invisibly returns an object of class \code{'dendrogram'} for fine tuning. } %\references{} \author{L. Scrucca} %\note{} \seealso{\code{\link{clustCombi}}} \examples{ \donttest{ data(Baudry_etal_2010_JCGS_examples) output <- clustCombi(data = ex4.1) combiTree(output) combiTree(output, type = "rectangle") combiTree(output, yaxis = "step") combiTree(output, type = "rectangle", yaxis = "step") } } \keyword{cluster} \keyword{hplot} mclust/man/mstep.Rd0000644000175000017500000000671314124774626014131 0ustar nileshnilesh\name{mstep} \alias{mstep} \title{M-step for parameterized Gaussian mixture models} \description{ Maximization step in the EM algorithm for parameterized Gaussian mixture models. } \usage{ mstep(data, modelName, z, prior = NULL, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the ith observation belonging to the \emph{k}th component of the mixture. In analyses involving noise, this should not include the conditional probabilities for the noise component. } \item{prior}{ Specification of a conjugate prior on the means and variances. The default assumes no prior. } \item{warn}{ A logical value indicating whether or not certain warnings (usually related to singularity) should be issued when the estimation fails. The default is given by \code{mclust.options("warn")}. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{parameters}{ \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{Attributes:}{ \code{"info"} For those models with iterative M-steps (\code{"VEI"} and \code{"VEV"}), information on the iteration.\cr \code{"WARNING"} An appropriate warning if problems are encountered in the computations. } } \note{ This function computes the M-step only for MVN mixtures, so in analyses involving noise, the conditional probabilities input should exclude those for the noise component. \cr In contrast to \code{me} for the EM algorithm, computations in \code{mstep} are carried out unless failure due to overflow would occur. To impose stricter tolerances on a single \code{mstep}, use \code{me} with the \emph{itmax} component of the \code{control} argument set to 1. } \seealso{ \code{\link{mstepE}}, \dots, \code{\link{mstepVVV}}, \code{\link{emControl}}, \code{\link{me}}, \code{\link{estep}}, \code{\link{mclust.options}}. } \examples{ \donttest{ mstep(modelName = "VII", data = iris[,-5], z = unmap(iris[,5]))} } \keyword{cluster} mclust/man/wreath.Rd0000644000175000017500000000103113175055360014247 0ustar nileshnilesh\name{wreath} \alias{wreath} \title{Data Simulated from a 14-Component Mixture} \usage{data(wreath)} \description{ A dataset consisting of 1000 observations drawn from a 14-component normal mixture in which the covariances of the components have the same size and shape but differ in orientation. } \references{ C. Fraley, A. E. Raftery and R. Wehrens (2005). Incremental model-based clustering for large datasets with small clusters. \emph{Journal of Computational and Graphical Statistics 14:1:18}. } \keyword{datasets} mclust/src/0000755000175000017500000000000014157117042012504 5ustar nileshnileshmclust/src/mclust.f0000644000175000017500000151702513726112623014176 0ustar nileshnileshc modified to avoid printing for calls from Fortran within R double precision function dgam (x) c jan 1984 edition. w. fullerton, c3, los alamos scientific lab. c jan 1994 wpp@ips.id.ethz.ch, ehg@research.att.com declare xsml c jun 2019 renamed function from dgamma to avoid warning with intrinsic c function already named dgamma double precision x, gamcs(42), dxrel, pi, sinpiy, sq2pil, xmax, 1 xmin, y, d9lgmc, dcsevl, d1mach, xsml C external d1mach, d9lgmc, dcsevl, dexp, dint, dlog, dsin, dsqrt, C 1 initds external d1mach, d9lgmc, dcsevl c c series for gam on the interval 0. to 1.00000e+00 c with weighted error 5.79e-32 c log weighted error 31.24 c significant figures required 30.00 c decimal places required 32.05 c data gam cs( 1) / +.8571195590 9893314219 2006239994 2 d-2 / data gam cs( 2) / +.4415381324 8410067571 9131577165 2 d-2 / data gam cs( 3) / +.5685043681 5993633786 3266458878 9 d-1 / data gam cs( 4) / -.4219835396 4185605010 1250018662 4 d-2 / data gam cs( 5) / +.1326808181 2124602205 8400679635 2 d-2 / data gam cs( 6) / -.1893024529 7988804325 2394702388 6 d-3 / data gam cs( 7) / +.3606925327 4412452565 7808221722 5 d-4 / data gam cs( 8) / -.6056761904 4608642184 8554829036 5 d-5 / data gam cs( 9) / +.1055829546 3022833447 3182350909 3 d-5 / data gam cs( 10) / -.1811967365 5423840482 9185589116 6 d-6 / data gam cs( 11) / +.3117724964 7153222777 9025459316 9 d-7 / data gam cs( 12) / -.5354219639 0196871408 7408102434 7 d-8 / data gam cs( 13) / +.9193275519 8595889468 8778682594 0 d-9 / data gam cs( 14) / -.1577941280 2883397617 6742327395 3 d-9 / data gam cs( 15) / +.2707980622 9349545432 6654043308 9 d-10 / data gam cs( 16) / -.4646818653 8257301440 8166105893 3 d-11 / data gam cs( 17) / +.7973350192 0074196564 6076717535 9 d-12 / data gam cs( 18) / -.1368078209 8309160257 9949917230 9 d-12 / data gam cs( 19) / +.2347319486 5638006572 3347177168 8 d-13 / data gam cs( 20) / -.4027432614 9490669327 6657053469 9 d-14 / data gam cs( 21) / +.6910051747 3721009121 3833697525 7 d-15 / data gam cs( 22) / -.1185584500 2219929070 5238712619 2 d-15 / data gam cs( 23) / +.2034148542 4963739552 0102605193 2 d-16 / data gam cs( 24) / -.3490054341 7174058492 7401294910 8 d-17 / data gam cs( 25) / +.5987993856 4853055671 3505106602 6 d-18 / data gam cs( 26) / -.1027378057 8722280744 9006977843 1 d-18 / data gam cs( 27) / +.1762702816 0605298249 4275966074 8 d-19 / data gam cs( 28) / -.3024320653 7353062609 5877211204 2 d-20 / data gam cs( 29) / +.5188914660 2183978397 1783355050 6 d-21 / data gam cs( 30) / -.8902770842 4565766924 4925160106 6 d-22 / data gam cs( 31) / +.1527474068 4933426022 7459689130 6 d-22 / data gam cs( 32) / -.2620731256 1873629002 5732833279 9 d-23 / data gam cs( 33) / +.4496464047 8305386703 3104657066 6 d-24 / data gam cs( 34) / -.7714712731 3368779117 0390152533 3 d-25 / data gam cs( 35) / +.1323635453 1260440364 8657271466 6 d-25 / data gam cs( 36) / -.2270999412 9429288167 0231381333 3 d-26 / data gam cs( 37) / +.3896418998 0039914493 2081663999 9 d-27 / data gam cs( 38) / -.6685198115 1259533277 9212799999 9 d-28 / data gam cs( 39) / +.1146998663 1400243843 4761386666 6 d-28 / data gam cs( 40) / -.1967938586 3451346772 9510399999 9 d-29 / data gam cs( 41) / +.3376448816 5853380903 3489066666 6 d-30 / data gam cs( 42) / -.5793070335 7821357846 2549333333 3 d-31 / c data pi / 3.1415926535 8979323846 2643383279 50 d0 / c sq2pil is 0.5*alog(2*pi) = alog(sqrt(2*pi)) data sq2pil / 0.9189385332 0467274178 0329736405 62 d0 / data ngam, xmin, xmax, xsml, dxrel / 0, 4*0.d0 / c if (ngam.ne.0) go to 10 ngam = initds (gamcs, 42, 0.1*sngl(d1mach(3)) ) c call d9gaml (xmin, xmax) xsml = exp (max (log(d1mach(1)), -log(d1mach(2)))+0.01d0) dxrel = sqrt (d1mach(4)) c 10 y = abs(x) if (y.gt.10.d0) go to 50 c c compute gamma(x) for -xbnd .le. x .le. xbnd. reduce interval and find c gamma(1+y) for 0.0 .le. y .lt. 1.0 first of all. c n = int(x) if (x.lt.0.d0) n = n - 1 y = x - dble(float(n)) n = n - 1 dgam = 0.9375d0 + dcsevl (2.d0*y-1.d0, gamcs, ngam) if (n.eq.0) return c if (n.gt.0) go to 30 c c compute gamma(x) for x .lt. 1.0 c n = -n if (x.eq.0.d0) then dgam = d1mach(2) return endif if (x.lt.0.0d0 .and. x+dble(float(n-2)).eq.0.d0) then dgam = -d1mach(2) return endif if (y.lt.xsml) then dgam = d1mach(2) return endif c do 20 i=1,n dgam = dgam/(x+dble(float(i-1)) ) 20 continue return c c gamma(x) for x .ge. 2.0 and x .le. 10.0 c 30 do 40 i=1,n dgam = (y+dble(float(i))) * dgam 40 continue return c c gamma(x) for dabs(x) .gt. 10.0. recall y = dabs(x). c 50 if (x.gt.xmax) then dgam = d1mach(2) return endif c dgam = 0.d0 if (x.lt.xmin) return c dgam = exp ((y-0.5d0)*log(y) - y + sq2pil + d9lgmc(y) ) if (x.gt.0.d0) return c sinpiy = sin (pi*y) c if (sinpiy.eq.0.d0) then dgam = -d1mach(2) return endif c dgam = -pi/(y*sinpiy*dgam) c return end C modified to omit priniting for calls from Fortran within R subroutine d9gaml (xmin, xmax) c june 1977 edition. w. fullerton, c3, los alamos scientific lab. c c calculate the minimum and maximum legal bounds for x in gamma(x). c xmin and xmax are not the only bounds, but they are the only non- c trivial ones to calculate. c c output arguments -- c xmin dble prec minimum legal value of x in gamma(x). any smaller c value of x might result in underflow. c xmax dble prec maximum legal value of x in gamma(x). any larger c value of x might cause overflow. c double precision xmin, xmax, alnbig, alnsml, xln, xold, d1mach C external d1mach, dlog external d1mach c alnsml = log(d1mach(1)) xmin = -alnsml do 10 i=1,10 xold = xmin xln = log(xmin) xmin = xmin - xmin*((xmin+0.5d0)*xln - xmin - 0.2258d0 + alnsml) 1 / (xmin*xln+0.5d0) if (abs(xmin-xold).lt.0.005d0) go to 20 10 continue C call seteru (27hd9gaml unable to find xmin, 27, 1, 2) xmin = d1mach(2) xmax = -d1mach(2) return c 20 xmin = -xmin + 0.01d0 c alnbig = log (d1mach(2)) xmax = alnbig do 30 i=1,10 xold = xmax xln = log(xmax) xmax = xmax - xmax*((xmax-0.5d0)*xln - xmax + 0.9189d0 - alnbig) 1 / (xmax*xln-0.5d0) if (abs(xmax-xold).lt.0.005d0) go to 40 30 continue C call seteru (27hd9gaml unable to find xmax, 27, 2, 2) xmin = d1mach(2) xmax = -d1mach(2) return c 40 xmax = xmax - 0.01d0 xmin = dmax1 (xmin, -xmax+1.d0) c return end double precision function dcsevl (x, a, n) double precision a(n), x, twox, b0, b1, b2 double precision d1mach external d1mach c if (n.lt.1) then dcsevl = -d1mach(2) return endif if (n.gt.1000) then dcsevl = d1mach(2) return endif if (x.lt.(-1.1d0) .or. x.gt.1.1d0) then dcsevl = d1mach(2) return endif C added by CF to avoid uninitialized warnings b2 = 0 c twox = 2.0d0*x b1 = 0.d0 b0 = 0.d0 do 10 i=1,n b2 = b1 b1 = b0 ni = n - i + 1 b0 = twox*b1 - b2 + a(ni) 10 continue c dcsevl = 0.5d0 * (b0-b2) c return end double precision function d9lgmc (x) double precision x, algmcs(15), xbig, xmax, dcsevl, d1mach external d1mach, dcsevl, initds c data algmcs( 1) / +.1666389480 4518632472 0572965082 2 d+0 / data algmcs( 2) / -.1384948176 0675638407 3298605913 5 d-4 / data algmcs( 3) / +.9810825646 9247294261 5717154748 7 d-8 / data algmcs( 4) / -.1809129475 5724941942 6330626671 9 d-10 / data algmcs( 5) / +.6221098041 8926052271 2601554341 6 d-13 / data algmcs( 6) / -.3399615005 4177219443 0333059966 6 d-15 / data algmcs( 7) / +.2683181998 4826987489 5753884666 6 d-17 / data algmcs( 8) / -.2868042435 3346432841 4462239999 9 d-19 / data algmcs( 9) / +.3962837061 0464348036 7930666666 6 d-21 / data algmcs( 10) / -.6831888753 9857668701 1199999999 9 d-23 / data algmcs( 11) / +.1429227355 9424981475 7333333333 3 d-24 / data algmcs( 12) / -.3547598158 1010705471 9999999999 9 d-26 / data algmcs( 13) / +.1025680058 0104709120 0000000000 0 d-27 / data algmcs( 14) / -.3401102254 3167487999 9999999999 9 d-29 / data algmcs( 15) / +.1276642195 6300629333 3333333333 3 d-30 / c data nalgm, xbig, xmax / 0, 2*0.d0 / c if (nalgm.ne.0) go to 10 nalgm = initds (algmcs, 15, sngl(d1mach(3)) ) xbig = 1.0d0/sqrt(d1mach(3)) xmax = exp (dmin1(log(d1mach(2)/12.d0), -log(12.d0*d1mach(1)))) c 10 if (x.lt.10.d0) then d9lgmc = d1mach(2) return endif c if (x.ge.xmax) go to 20 c d9lgmc = 1.d0/(12.d0*x) if (x.lt.xbig) d9lgmc = dcsevl (2.0d0*(10.d0/x)**2-1.d0, algmcs, 1 nalgm) / x return c 20 d9lgmc = 0.d0 C call seteru (34hd9lgmc x so big d9lgmc underflows, 34, 2, 0) return c end double precision function dlngam (x) double precision x, y, xmax, dxrel, pi double precision sinpiy, sqpi2l, sq2pil double precision d1mach, d9lgmc external d1mach, d9lgmc double precision dgam c external dgamma c data sq2pil / 0.9189385332 0467274178 0329736405 62 d0 / c sq2pil = alog (sqrt(2*pi)), sqpi2l = alog(sqrt(pi/2)) data sqpi2l / +.2257913526 4472743236 3097614947 441 d+0 / data pi / 3.1415926535 8979323846 2643383279 50 d0 / c data xmax, dxrel / 2*0.d0 / c C added by CF to avoid uninitialized warnings dlngam = 0.d0 if (xmax.ne.0.d0) go to 10 xmax = d1mach(2)/dlog(d1mach(2)) dxrel = dsqrt (d1mach(4)) c 10 y = abs (x) if (y.gt.10.d0) go to 20 c c dlog (dabs (dgam(x)) ) for dabs(x) .le. 10.0 c dlngam = log (abs (dgam(x)) ) return c c dlog ( dabs (dgam(x)) ) for dabs(x) .gt. 10.0 c C20 if (y.gt.xmax) call seteru ( C 1 39hdlngam dabs(x) so big dlngam overflows, 39, 2, 2) 20 if (y.gt.xmax) dlngam = d1mach(2) if (y.gt.xmax) return c if (x.gt.0.d0) dlngam = sq2pil + (x-0.5d0)*log(x) - x + d9lgmc(y) if (x.gt.0.d0) return c sinpiy = abs (sin(pi*y)) C if (sinpiy.eq.0.d0) call seteru ( C 1 31hdlngam x is a negative integer, 31, 3, 2) if (sinpiy.eq.0.d0) dlngam = -d1mach(2) if (sinpiy.eq.0.d0) return c dlngam = sqpi2l + (x-0.5d0)*log(y) - x - log(sinpiy) - d9lgmc(y) c C if (dabs((x-dint(x-0.5d0))*dlngam/x).lt.dxrel) call seteru ( C 1 68hdlngam answer lt half precision because x too near negative C 2integer, 68, 1, 1) return c end function initds (dos, nos, eta) double precision dos(nos) integer i1mach external i1mach c C if (nos.lt.1) call seteru ( C 1 35hinitds number of coefficients lt 1, 35, 2, 2) if (nos.lt.1) initds = i1mach(9) c C added by CF to avoid uninitialized warnings i = 0 err = 0. do 10 ii=1,nos i = nos + 1 - ii err = err + abs(sngl(dos(i))) if (err.gt.eta) go to 20 10 continue c C20 if (i.eq.nos) call seteru (28hinitds eta may be too small, 28, C 1 1, 2) 20 continue initds = i c return end subroutine absrng( l, v, i, vmin, vmax) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE double precision v(*) integer i, j, k, l double precision temp, vmin, vmax c---------------------------------------------------------------------------- temp = abs(v(1)) vmin = temp vmax = temp if (l .eq. 1) return if (i .eq. 1) then do j = 2, l temp = abs(v(j)) vmin = min(vmin,temp) vmax = max(vmax,temp) end do else k = 1 + i do j = 2, l temp = abs(v(k)) vmin = min(vmin,temp) vmax = max(vmax,temp) k = k + i end do end if return end SUBROUTINE D2NORM ( N, X, INCX, VALUE ) * .. Scalar Arguments .. INTEGER INCX, N * .. Array Arguments .. DOUBLE PRECISION X( * ), VALUE * .. * * DNRM2 returns the euclidean norm of a vector via the function * name, so that * * DNRM2 := sqrt( x'*x ) * * THIS FUNCTION MODELLED AFTER DNRM2 BUT WRITTEN AS A SUBROUTINE * * -- This version written on 25-October-1982. * Modified on 14-October-1993 to inline the call to DLASSQ. * Sven Hammarling, Nag Ltd. * * * .. Parameters .. DOUBLE PRECISION ONE , ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. Local Scalars .. INTEGER IX DOUBLE PRECISION ABSXI, NORM, SCALE, SSQ * .. Intrinsic Functions .. INTRINSIC ABS, SQRT * .. * .. Executable Statements .. IF( N.LT.1 .OR. INCX.LT.1 )THEN NORM = ZERO ELSE IF( N.EQ.1 )THEN NORM = ABS( X( 1 ) ) ELSE SCALE = ZERO SSQ = ONE * The following loop is equivalent to this call to the LAPACK * auxiliary routine: * CALL DLASSQ( N, X, INCX, SCALE, SSQ ) * DO 10, IX = 1, 1 + ( N - 1 )*INCX, INCX IF( X( IX ).NE.ZERO )THEN ABSXI = ABS( X( IX ) ) IF( SCALE.LT.ABSXI )THEN SSQ = ONE + SSQ*( SCALE/ABSXI )**2 SCALE = ABSXI ELSE SSQ = SSQ + ( ABSXI/SCALE )**2 END IF END IF 10 CONTINUE NORM = SCALE * SQRT( SSQ ) END IF * VALUE = NORM RETURN * * End of D2NORM. * END subroutine mclrup( l, n, v, r, lr) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer l, n, lr double precision cs, sn c double precision v(n), r(lr,n) double precision v(*), r(lr,*) integer i, j, k, m if (l .eq. 1) return k = l - 1 if (k .le. n) then call dcopy( n, v, 1, r(k,1), lr) if (k .eq. 1) return if (n .gt. 1) then i = 1 m = n do j = 2, k call drotg( r(i,i), r(k,i), cs, sn) m = m - 1 call drot( m, r(i,j), lr, r(k,j), lr, cs, sn) i = j end do else call drotg( r(1,1), r(k,1), cs, sn) end if else if (n .gt. 1) then i = 1 m = n do j = 2, n call drotg( r(i,i), v(i), cs, sn) m = m - 1 call drot( m, r(i,j), lr, v(j), 1, cs, sn) i = j end do end if call drotg( r(n,n), v(n), cs, sn) end if return end subroutine mcltrw( x, n, p, u, ss) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, p integer n, p double precision ss c double precision x(n,p), u(p) double precision x(n,*), u(*) double precision ddot external ddot integer i, j double precision fac double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision dummy(1) c------------------------------------------------------------------------------ c form mean fac = one / sqrt(dble(n)) c call dcopy( p, zero, 0, u, 1) dummy(1) = zero call dcopy( p, dummy, 0, u, 1) do i = 1, n call daxpy( p, fac, x(i,1), n, u, 1) end do c subtract mean and form sum of squares ss = zero do j = 1, p call daxpy( n, (-fac), u(j), 0, x(1,j), 1) ss = ss + ddot(n, x(1,j), 1, x(1,j), 1) end do return end subroutine mclvol( x, n, p, u, v, w, * work, lwork, iwork, liwork, * info) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, lwork, liwork, info c integer iwork(liwork) integer iwork(*) c double precision x(n,p), u(p), v(p,p), w(p,p), work(lwork), double precision x(n,*), u(*), v(p,*), w(p,p), work(*) integer i, j double precision temp, cmin, cmax double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision EPSMAX parameter (EPSMAX = 2.2204460492503131d-16) double precision FLMAX parameter (FLMAX = 1.7976931348623157D+308) double precision dummy(1) c------------------------------------------------------------------------------ c form mean temp = one / dble(n) c call dcopy( p, zero, 0, u, 1) dummy(1) = zero call dcopy( p, dummy, 0, u, 1) do i = 1, n call daxpy( p, temp, x(i,1), n, u, 1) end do c subtract mean do j = 1, p call daxpy( n, (-one), u(j), 0, x(1,j), 1) end do c if (.false.) then c this gets the eigenvectors but x is overwritten c get right singular vectors c call dgesvd( 'N', 'A', n, p, x, n, u, c * dummy, 1, w, p, work, lwork, info) c if (info .lt. 0) return c if (info .eq. 0) then c lwork = int(work(1)) c do i = 1, p c v(i,i) = w(i,i) c if (i .gt. 1) then c do j = 1, (i-1) c v(i,j) = w(j,i) c v(j,i) = w(i,j) c end do c end if c end do c goto 100 c end if c end if c form crossproduct call dsyrk( 'U', 'T', p, n, one, x, n, zero, w, p) c get eigenvectors do j = 1, p do i = 1, j v(i,j) = w(i,j) end do end do call dsyevd( 'V', 'U', p, v, p, u, * work, lwork, iwork, liwork, info) if (info .lt. 0) return if (info .eq. 0) then lwork = int(work(1)) liwork = iwork(1) goto 100 end if c EPSMAX = d1mach(4) call dsyevx( 'V', 'A', 'U', p, w, p, dummy, dummy, i, i, * sqrt(EPSMAX), j, u, v, p, * work, lwork, iwork(p+1), iwork, info) if (info .ne. 0) return lwork = int(work(1)) liwork = -1 100 continue c FLMAX = d1mach(2) c form xv c vol = one do j = 1, p call dgemv( 'N', n, p, one, x, n, v(1,j), 1, zero, work, 1) cmax = -FLMAX cmin = FLMAX do i = 1, n temp = work(i) if (temp .gt. cmax) cmax = temp if (temp .lt. cmin) cmin = temp end do u(j) = cmax - cmin c vol = vol * (cmax - cmin) end do return end subroutine sgnrng( l, v, i, vmin, vmax) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE double precision v(*) integer i, j, k, l double precision temp, vmin, vmax c---------------------------------------------------------------------------- temp = v(1) vmin = temp vmax = temp if (l .eq. 1) return if (i .eq. 1) then do j = 2, l temp = v(j) vmin = min(vmin,temp) vmax = max(vmax,temp) end do else k = 1 + i do j = 2, l temp = v(k) vmin = min(vmin,temp) vmax = max(vmax,temp) k = k + i end do end if return end subroutine shapeo( TRANSP, s, O, l, m, w, info) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical TRANSP integer l, m, info c double precision s(l), O(l,l,m), w(l,l) double precision s(*), O(l,l,*), w(l,*) integer j, k double precision temp double precision zero, one parameter (zero = 0.d0, one = 1.d0) c------------------------------------------------------------------------------ if (TRANSP) then do j = 1, l temp = sqrt(s(j)) do k = 1, m call dscal( l, temp, O(j,1,k), l) end do end do do k = 1, m call dsyrk( 'U', 'T', l, l, one, O(1,1,k), l, zero, w, l) do j = 1, l call dcopy( j, w(1,j), 1, O(1,j,k), 1) end do do j = 2, l call dcopy( j-1, w(1,j), 1, O(j,1,k), l) end do end do info = 0 return end if if (.not. TRANSP) then do j = 1, l temp = sqrt(s(j)) do k = 1, m call dscal( l, temp, O(1,j,k), 1) end do end do do k = 1, m call dsyrk( 'U', 'N', l, l, one, O(1,1,k), l, zero, w, l) do j = 1, l call dcopy( j, w(1,j), 1, O(1,j,k), 1) end do do j = 2, l call dcopy( j-1, w(1,j), 1, O(j,1,k), l) end do end do info = 0 return end if info = -1 return end subroutine uncholf ( UPPER, T, l, n, info) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical UPPER integer l, n, info c double precision T(abs(n), abs(n)) double precision T( l , * ) integer i, j, k double precision ddot external ddot c------------------------------------------------------------------------------ if (UPPER) then do i = 2, n do j = 1, (i-1) T(i,j) = ddot( j, T(1,i), 1, T(1,j), 1) end do end do do k = 1, n T(k,k) = ddot( k, T(1,k), 1, T(1,k), 1) end do do k = 1, n-1 call dcopy( n-k, T(k+1,k), 1, T(k,k+1), l) end do info = 0 return end if if (.not. UPPER) then do i = 2, n do j = 1, (i-1) T(j,i) = ddot( j, T(i,1), l, T(j,1), l) end do end do do k = 1, n T(k,k) = ddot( k, T(k,1), l, T(k,1), l) end do do k = 2, n call dcopy( k-1, T(1,k), 1, T(k,1), l) end do return end if info = -1 return end subroutine wardsw( i, n, d) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer i, n double precision d(*) integer i1, n1, ii, nn, k double precision temp double precision FLMAX parameter (FLMAX = 1.7976931348623157D+308) *----------------------------------------------------------------------------- i1 = i - 1 ii = (i1*(i1-1))/2 + 1 n1 = n - 1 nn = (n1*(n1-1))/2 + 1 c if (i .gt. 1) then call dswap( i1, d(nn), 1, d(ii), 1) c call dcopy( i1, FLMAX, 0, d(nn), 1) ii = ii + i1 + i1 nn = nn + i c end if if (n1 .eq. i) return k = i 100 continue temp = d(ii) d(ii) = d(nn) d(nn) = temp c d(nn) = FLMAX ii = ii + k nn = nn + 1 k = k + 1 if (k .lt. n1) goto 100 c d(nn) = FLMAX return end subroutine es1e ( x, mu, sigsq, pro, n, G, Vinv, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, G double precision sigsq, hood, Vinv c double precision x(n), mu(G), pro(G[+1]), z(n,G[+1]) double precision x(*), mu(*), pro( * ), z(n, * ) integer i, k, nz double precision temp, const, muk, prok, tmin, tmax, sum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMAX parameter (RTMAX = 1.340780792994260d154) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (sigsq .le. zero) then hood = FLMAX return end if const = pi2log + log(sigsq) do k = 1, G muk = mu(k) c prok = pro(k) do i = 1, n temp = x(i) - muk c z(i,k) = prok*exp(-(const+(temp*temp)/sigsq)/two) if (sigsq .lt. one .and. * abs(temp) .ge. sqrt(sigsq)*RTMAX) then hood = FLMAX return end if z(i,k) = -(const+(temp*temp)/sigsq)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c temp = zero c do k = 1, nz c temp = temp + z(i,k) c end do c hood = hood + log(temp) c call dscal( nz, (one/temp), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine hc1e ( x, n, ic, ng, ns, nd, d) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, ic(n), ng, ns, nd c double precision x(n), d(ng*(ng-1)/2) double precision x(*), d(*) integer lg, ld, ll, lo, ls integer i, j, k, m integer ni, nj, nij, iopt, jopt, iold, jold integer ij, ici, icj, ii, ik, jk double precision ri, rj, rij, si, sj, sij double precision temp, dij, dopt, dold external wardsw double precision one parameter (one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) c------------------------------------------------------------------------------ iopt = 0 jopt = 0 lg = ng ld = (ng*(ng-1))/2 ll = nd-ng lo = nd c call intpr( 'ic', -1, ic, n) c call intpr( 'no. of groups', -1, lg, 1) c group heads should be first among rows of x i = 1 j = 2 1 continue icj = ic(j) if (icj .ne. j) goto 2 if (j .eq. lg) goto 3 i = j j = j + 1 goto 1 2 continue k = i m = j + 1 do j = m, n icj = ic(j) if (icj .gt. k) then k = k + 1 c call dswap( p, x(k,1), n, x(j,1), n) temp = x(k) x(k) = x(j) x(j) = temp ic(j) = ic(k) ic(k) = icj end if end do 3 continue c call intpr( 'ic', -1, ic, n) do j = 1, n i = ic(j) if (i .ne. j) then ic(j) = 0 ni = ic(i) nij = ni + 1 ic(i) = nij ri = dble(ni) rij = dble(nij) sj = sqrt(one/rij) si = sqrt(ri)*sj c update column sum in kth row c call dscal( p, si, x(i,1), n) c call daxpy( p, sj, x(j,1), n, x(i,1), n) x(i) = si*x(i) + sj*x(j) else ic(j) = 1 end if end do c call intpr( 'ic', -1, ic, n) dopt = FLMAX ij = 0 do j = 2, lg nj = ic(j) rj = dble(nj) do i = 1, (j-1) ni = ic(i) ri = dble(ni) nij = ni + nj rij = dble(nij) si = sqrt(ri/rij) sj = sqrt(rj/rij) c call dcopy( p, x(i,1), n, v, 1) c call dscal( p, sj, v, 1) c call daxpy( p, (-si), x(j,1), n, v, 1) c dij = ddot(p, v, 1, v, 1) temp = sj*x(i) - si*x(j) dij = temp*temp ij = ij + 1 d(ij) = dij if (dij .le. dopt) then dopt = dij iopt = i jopt = j end if end do end do c if (.false.) then c i = 1 c ij = 1 c do j = 2, ng c call dblepr( 'dij', -1, d(ij), i) c ij = ij + i c i = j c end do c end if if (ns .eq. 1) then if (iopt .lt. jopt) then x(1) = dble(iopt) ic(1) = jopt else x(1) = dble(jopt) ic(1) = iopt end if d(1) = dopt return end if ls = 1 100 continue ni = ic(iopt) nj = ic(jopt) nij = ni + nj ic(iopt) = nij ic(jopt) = -iopt if (jopt .ne. lg) then call wardsw( jopt, lg, d) m = ic(jopt) ic(jopt) = ic(lg) ic(lg) = m end if si = dble(ni) sj = dble(nj) sij = dble(nij) dold = dopt iold = iopt jold = jopt iopt = -1 jopt = -1 dopt = FLMAX lg = lg - 1 ld = ld - lg ii = (iold*(iold-1))/2 if (iold .gt. 1) then ik = ii - iold + 1 do j = 1, (iold - 1) nj = ic(j) rj = dble(nj) ik = ik + 1 jk = ld + j dij = (rj+si)*d(ik)+(rj+sj)*d(jk) dij = (dij-rj*dold)/(rj+sij) d(ik) = dij end do end if if (iold .lt. lg) then ik = ii + iold i = iold do j = (iold + 1), lg nj = ic(j) rj = dble(nj) jk = ld + j dij = (rj+si)*d(ik)+(rj+sj)*d(jk) dij = (dij-rj*dold)/(rj+sij) d(ik) = dij ik = ik + i i = j end do end if d(lo) = dold lo = lo - 1 d(lo) = dble(iold) lo = lo - 1 d(lo) = dble(jold) lo = lo - 1 c update d and find max jopt = 2 iopt = 1 dopt = d(1) if (lg .eq. 2) goto 900 ij = 1 do i = 2, ld si = d(i) if (si .le. dopt) then ij = i dopt = si end if end do if (ij .gt. 1) then do i = 2, ij iopt = iopt + 1 if (iopt .ge. jopt) then jopt = jopt + 1 iopt = 1 end if end do end if ls = ls + 1 if (ls .eq. ns) goto 900 goto 100 900 continue d(lo) = dopt lo = lo - 1 d(lo) = dble(iopt) lo = lo - 1 d(lo) = dble(jopt) do i = 1, ng ic(i) = i end do lo = nd - 1 ld = lo si = d(lo) lo = lo - 1 sj = d(lo) ic(int(sj)) = ng if (si .lt. sj) then x(1) = si d(ld) = sj else x(1) = sj d(ld) = si end if ld = ld - 1 lg = ng + 1 do k = 2, ns lo = lo - 1 d(ld) = d(lo) ld = ld - 1 lo = lo - 1 i = int(d(lo)) ici = ic(i) lo = lo - 1 j = int(d(lo)) icj = ic(j) if (ici .gt. icj) ic(i) = icj ic(j) = ic(lg-k) if (ici .lt. icj) then x(k) = dble(ici) d(ld) = dble(icj) else x(k) = dble(icj) d(ld) = dble(ici) end if ld = ld - 1 end do ld = nd lo = nd - 1 do k = 1, ns ic(k) = int(d(lo)) lo = lo - 1 ld = ld - 1 d(ld) = d(lo) lo = lo - 1 end do ld = nd lo = 1 do k = 1, ns si = d(lo) d(lo) = d(ld) d(ld) = si ld = ld - 1 lo = lo + 1 end do return end subroutine me1e ( EQPRO, x, n, G, Vinv, z, maxi, tol, eps, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, G, maxi double precision Vinv, eps, tol c double precision x(n), z(n,G[+1]), mu(G), sigsq, pro(G[+1]) double precision x(*), z(n, * ), mu(*), sigsq, pro( * ) integer nz, iter, k, i double precision hold, hood, err, prok, tmin, tmax, rteps double precision const, sum, sumz, smu, temp, term, zsum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( nz, one/dble(nz), 0, pro, 1) dummy(1) = one/dble(nz) call dcopy( nz, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 sumz = zero sigsq = zero zsum = one do k = 1, G sum = zero smu = zero do i = 1, n temp = z(i,k) sum = sum + temp smu = smu + temp*x(i) end do sumz = sumz + sum if (.not. EQPRO) pro(k) = sum / dble(n) zsum = min(sum,zsum) if (sum .gt. rteps) then smu = smu / sum mu(k) = smu do i = 1, n temp = x(i) - smu temp = temp*temp sigsq = sigsq + z(i,k)*temp z(i,k) = temp end do end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if if (Vinv .le. zero) then sigsq = sigsq / dble(n) else sigsq = sigsq / sumz end if if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if if (sigsq .le. eps) then tol = err eps = FLMAX maxi = iter return end if const = pi2log + log(sigsq) do k = 1, G c temp = pro(k) do i = 1, n c z(i,k) = temp*exp(-(const+(z(i,k)/sigsq))/two) z(i,k) = -(const+(z(i,k)/sigsq))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine me1ep ( EQPRO, x, n, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, G, maxi double precision pshrnk, pmu, pscale, pdof double precision Vinv, eps, tol c double precision x(n), z(n,G[+1]), mu(G), sigsq, pro(G[+1]) double precision x(*), z(n, * ), mu(*), sigsq, pro( * ) integer nz, iter, k, i double precision hold, hood, err, prok, tmin, tmax double precision const, sum, sumz, smu, temp, term, zsum double precision pmupmu, cgam, cmu, rmu, rgam, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision dlngam external dlngam double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( nz, one/dble(nz), 0, pro, 1) dummy(1) = one/dble(nz) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX pmupmu = pmu*pmu iter = 0 100 continue iter = iter + 1 sigsq = zero zsum = one do k = 1, G sumz = zero smu = zero do i = 1, n temp = z(i,k) sumz = sumz + temp smu = smu + temp*x(i) end do if (.not. EQPRO) pro(k) = sumz / dble(n) zsum = min(zsum,sumz) if (sumz .gt. rteps) then smu = smu/sumz sum = zero do i = 1, n term = x(i) - smu term = term*term sum = sum + z(i,k)*term end do term = (pshrnk*sumz)/(pshrnk+sumz) temp = (pmupmu + smu*smu) - two*pmu*smu sigsq = sigsq + sum + term*temp term = sumz/(pshrnk+sumz) temp = pshrnk/(pshrnk+sumz) mu(k) = term*smu + temp*pmu end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if sigsq = (pscale + sigsq)/(pdof + dble(n+G) + two) c if (Vinv .le. zero) then c sigsq = sigsq / dble(n) c else c sigsq = sigsq / sumz c end if term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if if (sigsq .le. eps) then tol = err eps = FLMAX maxi = iter return end if const = pi2log + log(sigsq) do k = 1, G c temp = pro(k) do i = 1, n term = x(i) - mu(k) z(i,k) = -(const+((term*term)/sigsq))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter cmu = dble(G)*(pi2log-log(pshrnk))/two sum = zero do k = 1, G temp = pmu - mu(k) temp = temp*temp sum = sum - (pshrnk/sigsq)*temp end do term = log(sigsq) rmu = (sum - dble(G)*term)/two temp = pdof/two cgam = temp*log(pscale/two) - dlngam(temp) rgam = -(temp+one)*term - (pscale/sigsq)/two pdof = (cmu+cgam) + (rmu+rgam) return end subroutine ms1e ( x, z, n, G, mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, G c double precision x(n), z(n,G), mu(G), sigsq, pro(G) double precision x(*), z(n,*), mu(*), sigsq, pro(*) integer i, k double precision sum, smu, sumz, temp double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) c------------------------------------------------------------------------------ sumz = zero sigsq = zero do k = 1, G sum = zero smu = zero do i = 1, n temp = z(i,k) sum = sum + temp smu = smu + temp*x(i) end do sumz = sumz + sum pro(k) = sum / dble(n) if (sigsq .gt. one .or. smu .le. sum*FLMAX) then smu = smu / sum mu(k) = smu if (sigsq .ne. FLMAX) then do i = 1, n temp = abs(x(i) - smu) sigsq = sigsq + z(i,k)*(temp*temp) end do end if else mu(k) = FLMAX sigsq = FLMAX end if end do c sumz .eq. n when no noise if (sigsq .ne. FLMAX) sigsq = sigsq / sumz return end subroutine ms1ep ( x, z, n, G, * pshrnk, pmu, pscale, pdof, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, G double precision pshrnk, pmu, pscale, pdof c double precision x(n), z(n,G), mu(G), sigsq, pro(G) double precision x(*), z(n,*), mu(*), sigsq, pro(*) integer k, i double precision pmupmu double precision sum, sumz, smu, temp, term double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero pmupmu = pmu*pmu sigsq = zero do k = 1, G sumz = zero smu = zero do i = 1, n temp = z(i,k) sumz = sumz + temp smu = smu + temp*x(i) end do pro(k) = sumz / dble(n) if (sumz .gt. one .or. smu .lt. sumz*FLMAX) then smu = smu/sumz sum = zero term = sumz/(pshrnk+sumz) temp = pshrnk/(pshrnk+sumz) mu(k) = term*smu + temp*pmu if (sigsq .ne. FLMAX) then do i = 1, n term = abs(x(i) - smu) sum = sum + z(i,k)*(term*term) end do term = (pshrnk*sumz)/(pshrnk+sumz) temp = (pmupmu + smu*smu) - two*pmu*smu sigsq = sigsq + sum + term*temp end if else mu(k) = FLMAX sigsq = FLMAX end if end do if (sigsq .ne. FLMAX) then temp = pdof + dble(n) + two if (pshrnk .gt. zero) temp = temp + dble(G) sigsq = (pscale + sigsq)/temp end if return end subroutine eseee ( CHOL, x, mu, Sigma, pro, n, p, G, Vinv, * w, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c character CHOL logical CHOL c integer n, p, G integer n, p, G double precision hood, Vinv c double precision x(n,p), w(p), z(n,G[+1]) double precision x(n,*), w(*), z(n, * ) c double precision mu(p,G), Sigma(p,p), pro(G[+1]) double precision mu(p,*), Sigma(p,*), pro( * ) integer info, i, j, k, nz double precision detlog, prok, tmin, tmax double precision umin, umax, const, temp, sum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMAX parameter (RTMAX = 1.340780792994260d154) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ c if (CHOL .eq. 'N') then if (.not. CHOL) then c Cholesky factorization call dpotrf( 'U', p, Sigma, p, info) if (info .ne. 0) then w(1) = dble(info) hood = FLMAX return end if end if call absrng( p, Sigma, (p+1), umin, umax) c rc = umin/(one+umax) if (umax .le. one .and. umax .ge. umin*RTMAX) then w(1) = zero hood = FLMAX return end if if (umax .ge. one .and. umin .le. umax*RTMIN) then w(1) = zero hood = FLMAX return end if detlog = zero do j = 1, p detlog = detlog + log(abs(Sigma(j,j))) end do const = dble(p)*pi2log/two + detlog do k = 1, G c prok = pro(k) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dtrsv( 'U', 'T', 'N', p, Sigma, p, w, 1) temp = ddot( p, w, 1, w, 1)/two c z(i,k) = prok*exp(-(const+temp)) z(i,k) = -(const+temp) end do end do w(1) = zero if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then w(1) = zero hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do w(1) = zero return end double precision function detmc2( n, u) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer k, n double precision q double precision u(n,*) double precision zero, two parameter (zero = 0.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) detmc2 = zero do k = 1, n q = u(k,k) if (q .eq. zero) then detmc2 = -FLMAX return end if detmc2 = detmc2 + log(abs(q)) end do detmc2 = two*detmc2 return end subroutine meeee ( EQPRO, x, n, p, G, Vinv, z, maxi, tol, eps, * mu, U, pro, w) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi double precision Vinv, eps, tol c double precision x(n,p), z(n,G), w(p) double precision x(n,*), z(n,*), w(*) c double precision mu(p,G), U(p,p), pro(G) double precision mu(p,*), U(p,*), pro(*) integer nz, p1, iter, i, j, k, j1 double precision piterm, sclfac, sumz, sum, zsum double precision cs, sn, umin, umax, rc, detlog, rteps double precision const, hold, hood, err, temp, term double precision prok, tmin, tmax double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if piterm = dble(p)*pi2log/two p1 = p + 1 eps = max(eps,zero) rteps = sqrt(eps) tol = max(tol,zero) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX c zero out the lower triangle i = 1 dummy(1) = zero do j = 2, p c call dcopy( p-i, zero, 0, U(j,i), 1) call dcopy( p-i, dummy, 0, U(j,i), 1) i = j end do iter = 0 100 continue iter = iter + 1 dummy(1) = zero do j = 1, p c call dcopy( j, zero, 0, U(1,j), 1) call dcopy( j, dummy, 0, U(1,j), 1) end do sumz = zero zsum = one do k = 1, G c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum if (.not. EQPRO) pro(k) = sum / dble(n) zsum = min(zsum,sum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( U(j,j), w(j), cs, sn) call drot( p-j, U(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( U(p,p), w(p), cs, sn) end do end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX hood = eps maxi = iter return end if if (Vinv .le. zero) then sclfac = one/sqrt(dble(n)) else sclfac = one/sqrt(sumz) end if do j = 1, p call dscal( j, sclfac, U(1,j), 1) end do c condition number call absrng( p, U, p1, umin, umax) rc = umin/(one+umax) if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if if (rc .le. rteps) then tol = err eps = FLMAX hood = eps maxi = iter return end if detlog = zero do j = 1, p detlog = detlog + log(abs(U(j,j))) end do const = piterm + detlog do k = 1, G c temp = pro(k) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dtrsv( 'U', 'T', 'N', p, U, p, w, 1) sum = ddot( p, w, 1, w, 1)/two c z(i,k) = temp * exp(-(const+sum)) z(i,k) = -(const+sum) end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine meeeep( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, * mu, U, pro, w) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi double precision Vinv, eps, tol c double precision pshrnk, pmu(p), pscale(p,p), pdof double precision pshrnk, pmu(*), pscale(p,*), pdof c double precision x(n,p), z(n,G), w(p) double precision x(n,*), z(n,*), w(*) c double precision mu(p,G), U(p,p), pro(G) double precision mu(p,*), U(p,*), pro(*) integer nz, p1, iter, i, j, k, j1 double precision piterm, sclfac, sumz, sum, zsum double precision cs, sn, umin, umax, rc, detlog, rteps double precision const, hold, hood, err, temp, term double precision prok, tmin, tmax double precision cmu, cgam, rmu, rgam double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision twolog parameter (twolog = 0.6931471805599453d0) double precision pilog parameter (pilog = 1.144729885849400d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision ddot, dlngam external ddot, dlngam double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if piterm = dble(p)*pi2log/two p1 = p + 1 sclfac = one/sqrt(dble(n)) eps = max(eps,zero) rteps = sqrt(eps) tol = max(tol,zero) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 c copy pscale to U do j = 1, p call dcopy( p, pscale(1,j), 1, U(1,j), 1) end do sumz = zero zsum = one do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum if (.not. EQPRO) pro(k) = sum / dble(n) zsum = min(zsum,sum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( U(j,j), w(j), cs, sn) call drot( p-j, U(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( U(p,p), w(p), cs, sn) end do call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) const = sum + pshrnk temp = (sum*pshrnk)/const call dscal( p, sqrt(temp), w, 1) j = 1 do j1 = 2, p call drotg( U(j,j), w(j), cs, sn) call drot( p-j, U(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( U(p,p), w(p), cs, sn) call dscal( p, sum/const, mu(1,k), 1) call daxpy( p, pshrnk/const, pmu, 1, mu(1,k), 1) end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if term = pdof + dble(p) + one if (pshrnk .gt. zero) term = term + dble(G) if (Vinv .le. zero) then sclfac = one/sqrt(term+dble(n)) else sclfac = one/sqrt(term+dble(sumz)) end if do j = 1, p call dscal( j, sclfac, U(1,j), 1) end do if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if c condition number call absrng( p, U, p1, umin, umax) rc = umin/(one+umax) if (rc .le. rteps) then tol = err eps = FLMAX maxi = iter return end if detlog = zero do j = 1, p detlog = detlog + log(abs(U(j,j))) end do const = piterm + detlog do k = 1, G c temp = pro(k) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dtrsv( 'U', 'T', 'N', p, U, p, w, 1) sum = ddot( p, w, 1, w, 1)/two c z(i,k) = temp * exp(-(const+sum)) z(i,k) = -(const+sum) end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter if (pshrnk .gt. zero) then cmu = dble(p)*(log(pshrnk) - pi2log)/two rmu = zero do k = 1, G call daxpy( p, (-one), mu(1,k), 1, pmu, 1) call dtrsv('U','T','N',p,U,p,pmu,1) rmu = rmu + ddot( p, pmu, 1, pmu, 1) end do sum = zero term = zero temp = zero do j = 1, p call dcopy( p, pscale(j,1), p, pmu, 1) c call dtrsv('U','T','N', p, U, p, pmu, 1) i = p-j+1 c call dtrsv('U','T','N', i, U(j,j),i,pmu(j),1) call dtrsv('U','T','N', i, U(j,j),p,pmu(j),1) sum = sum + ddot(i, pmu(j), 1, pmu(j), 1) temp = temp + log(abs(pscale(j,j))) term = term + dlngam((pdof+one-dble(j))/two) end do rmu = -(detlog+pshrnk*rmu/two) const = -dble(p)*(pdof*twolog+(dble(p)-one)*pilog/two) cgam = (const/two-pdof*temp) - term rgam = -((pdof+dble(p)+one)*detlog + sum/two) pdof = (dble(G)*cmu+rmu) + (cgam+rgam) else pdof = FLMAX end if return end subroutine mseee ( x, z, n, p, G, w, mu, U, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision x(n,p), z(n,G), w(p) double precision x(n,*), z(n,*), w(*) c double precision mu(p,G), U(p,p), pro(G) double precision mu(p,*), U(p,*), pro(*) c------------------------------------------------------------------------------ c c x double (input) (n,p) matrix of observations. c z double (input) (n,G) conditional probabilities. c n integer (input) number of observations. c p integer (input) dimension of the data. c G integer (input) number of Gaussian clusters in the mixture. c w double (scratch) (p) c mu double (output) (p,G) mean for each group. c U double (output) (p,p) upper triangular Cholesky factor of the c common covariance matrix for the groups: transpose(U) * U = Sigma. c pro double (output) (G) mixing proportions (ignore result if equal). integer i, j, k, j1 double precision sum, sumz, zsum, temp, cs, sn double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision dummy(1) c----------------------------------------------------------------------------- dummy(1) = zero do j = 1, p c call dcopy p, zero, 0, U(1,j), 1) call dcopy( p, dummy, 0, U(1,j), 1) end do sumz = zero zsum = one do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum pro(k) = sum / dble(n) if (sum .gt. one .or. one .gt. sum*FLMAX) then zsum = min(zsum,sum) call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( U(j,j), w(j), cs, sn) call drot( p-j, U(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( U(p,p), w(p), cs, sn) end do else zsum = zero c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (zsum .eq. zero) return c sumz .eq. n when no noise do j = 1, p call dscal( j, one/sqrt(sumz), U(1,j), 1) end do return end subroutine mseeep( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * w, mu, U, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision x(n,p), z(n,G), w(p) double precision x(n,*), z(n,*), w(*) c double precision pshrnk, pmu(p), pscale(p,p), pdof double precision pshrnk, pmu(*), pscale(p,*), pdof c double precision mu(p,G), U(p,p), pro(G) double precision mu(p,*), U(p,*), pro(*) integer i, j, k, j1 double precision sclfac, const, temp double precision sum, sumz, zsum, cs, sn double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .le. zero) pshrnk = zero do j = 1, p call dcopy( p, pscale(1,j), 1, U(1,j), 1) end do sumz = zero zsum = one do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum pro(k) = sum / dble(n) if (sum .ge. one .or. one .gt. sum*FLMAX) then zsum = min(zsum,sum) call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( U(j,j), w(j), cs, sn) call drot( p-j, U(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( U(p,p), w(p), cs, sn) end do call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) const = sum + pshrnk temp = (sum*pshrnk)/const call dscal( p, sqrt(temp), w, 1) j = 1 do j1 = 2, p call drotg( U(j,j), w(j), cs, sn) call drot( p-j, U(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( U(p,p), w(p), cs, sn) call dscal( p, sum/const, mu(1,k), 1) call daxpy( p, pshrnk/const, pmu, 1, mu(1,k), 1) else zsum = zero c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (zsum .eq. zero) return temp = pdof+dble(n+p+1) if (pshrnk .gt. zero) temp = temp + dble(G) sclfac = one/sqrt(temp) do j = 1, p call dscal( j, sclfac, U(1,j), 1) end do return end subroutine eseei ( x, mu, scale, shape, pro, n, p, G, * Vinv, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G double precision scale, hood, Vinv c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), shape(p), pro(G[+1]) double precision mu(p,*), shape(*), pro( * ) integer i, j, k, nz double precision sum, temp, const, tmin, tmax double precision smin, smax, prok double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (scale .le. zero) then hood = FLMAX return end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then hood = FLMAX return end if temp = sqrt(scale) do j = 1, p shape(j) = temp*sqrt(shape(j)) end do const = dble(p)*(pi2log+log(scale)) do k = 1, G c prok = pro(k) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) if (abs(temp) .ge. shape(j)*FLMAX) then hood = FLMAX return end if temp = temp/shape(j) if (abs(temp) .gt. RTMIN) sum = sum + temp*temp end do c z(i,k) = prok*exp(-(const+sum)/two) z(i,k) = -(const+sum)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine meeei ( EQPRO, x, n, p, G, Vinv, z, maxi, tol, eps, * mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi double precision Vinv, eps, tol, scale c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), shape(p), pro(G[+1]) double precision mu(p,*), shape(*), pro( * ) integer nz, iter, i, j, k double precision sum, sumz, temp, term, zsum double precision const, hold, hood, err, smin, smax double precision prok, tmin, tmax, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) sumz = zero zsum = one do k = 1, G c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) z(i,k) = sqrt(temp) end do sumz = sumz + sum if (.not. EQPRO) pro(k) = sum/dble(n) zsum = min(zsum,sum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do j = 1, p sum = zero do i = 1, n temp = z(i,k)*(x(i,j) - mu(j,k)) sum = sum + temp*temp end do shape(j) = shape(j) + sum end do end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if call sgnrng(p, shape, 1, smin, smax) if (smin .le. zero) then scale = zero tol = err eps = FLMAX maxi = iter return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then scale = FLMAX tol = err eps = FLMAX maxi = iter return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if if (Vinv .le. zero) then scale = temp/dble(n) else scale = temp/sumz end if if (temp .le. eps) then tol = err eps = FLMAX maxi = iter return end if call dscal( p, one/temp, shape, 1) call sgnrng(p, shape, 1, smin, smax) if (smin .le. eps .or. scale .le. eps) then tol = err eps = FLMAX maxi = iter return end if if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if const = dble(p)*(pi2log+log(scale)) do k = 1, G c prok = pro(k) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + (temp*temp)/shape(j) end do c z(i,k) = prok*exp(-(const+(sum/scale))/two) z(i,k) = -(const+(sum/scale))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine meeeip( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, * mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof double precision Vinv, eps, tol, scale c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), shape(p), pro(G[+1]) double precision mu(p,*), shape(*), pro( * ) integer nz, iter, i, j, k double precision sum, sumz, temp, term, zsum double precision const, hold, hood, err, smin, smax double precision prok, tmin, tmax, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 c call dcopy( p, pscale, 0, shape, 1) dummy(1) = pscale call dcopy( p, dummy, 0, shape, 1) sumz = zero zsum = one do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) z(i,k) = sqrt(temp) end do sumz = sumz + sum if (.not. EQPRO) pro(k) = sum/dble(n) zsum = min(zsum,sum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) term = pshrnk + sum const = (pshrnk*sum)/term do j = 1, p sum = zero do i = 1, n temp = z(i,k)*(x(i,j) - mu(j,k)) sum = sum + (temp*temp) end do shape(j) = shape(j) + sum temp = pmu(j) - mu(j,k) shape(j) = shape(j) + const*(temp*temp) end do end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if call sgnrng(p, shape, 1, smin, smax) if (smin .le. zero) then scale = zero tol = err eps = FLMAX maxi = iter return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then scale = FLMAX tol = err eps = FLMAX maxi = iter return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if term = pdof + one if (pshrnk .gt. zero) term = term + one if (Vinv .le. zero) then scale = temp/(term + dble(n)) else scale = temp/(term + sumz) end if if (temp .le. eps) then tol = err eps = FLMAX maxi = iter return end if call dscal( p, one/temp, shape, 1) call sgnrng(p, shape, 1, smin, smax) if (smin .le. eps .or. scale .le. eps) then tol = err eps = FLMAX maxi = iter return end if if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if const = dble(p)*(pi2log+log(scale)) do k = 1, G c prok = pro(k) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + (temp*temp)/shape(j) end do c z(i,k) = prok*exp(-(const+(sum/scale))/two) z(i,k) = -(const+(sum/scale))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine mseei ( x, z, n, p, G, mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision x(n,p), z(n,G) double precision x(n,*), z(n,*) c double precision mu(p,G), scale, shape(p), pro(G) double precision mu(p,*), scale, shape(*), pro(*) integer i, j, k double precision sum, sumz, temp, smin, smax double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ sumz = zero do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) z(i,k) = sqrt(temp) end do sumz = sumz + sum pro(k) = sum/dble(n) if (sum .gt. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) else c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) do j = 1, p sum = zero do i = 1, n do k = 1, G if (mu(1,k) .eq. FLMAX) then scale = FLMAX return end if temp = z(i,k)*(x(i,j) - mu(j,k)) if (abs(temp) .gt. RTMIN) sum = sum + temp*temp end do end do shape(j) = shape(j) + sum end do call sgnrng(p, shape, 1, smin, smax) if (smin .eq. zero) then scale = zero return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if if (sumz .lt. one .and. temp .ge. sumz*FLMAX) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if scale = temp/sumz if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if call dscal( p, one/temp, shape, 1) return end subroutine mseeip( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof c double precision x(n,p), z(n,G) double precision x(n,*), z(n,*) c double precision mu(p,G), scale, shape(p), pro(G[+1]) double precision mu(p,*), scale, shape(*), pro( * ) integer i, j, k double precision sum, sumz, temp, term double precision const, smin, smax double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero c call dcopy( p, pscale, 0, shape, 1) dummy(1) = pscale call dcopy( p, dummy, 0, shape, 1) sumz = zero do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) z(i,k) = sqrt(temp) end do sumz = sumz + sum pro(k) = sum/dble(n) if (sum .gt. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) term = pshrnk + sum const = (pshrnk*sum)/term do j = 1, p sum = zero do i = 1, n temp = z(i,k)*(x(i,j) - mu(j,k)) if (abs(temp) .gt. RTMIN) sum = sum + temp*temp end do shape(j) = shape(j) + sum temp = pmu(j) - mu(j,k) shape(j) = shape(j) + const*(temp*temp) end do else c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do call sgnrng(p, shape, 1, smin, smax) if (smin .eq. zero) then scale = zero return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .ge. BIGLOG) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if if (temp .gt. SMALOG) then smin = exp(temp) else smin = zero end if term = pdof + sumz + two if (pshrnk .gt. zero) term = term + dble(G) scale = smin/term if (smin .lt. one .and. one .ge. smin*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if call dscal( p, one/smin, shape, 1) return end subroutine eseev ( x, mu, scale, shape, O, pro, n, p, G, * Vinv, v, w, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, p, G integer n, p, G double precision scale, Vinv, hood c double precision x(n,p), v(p), w(p), z(n,G[+1]) double precision x(n,*), v(*), w(*), z(n, * ) c double precision mu(p,G), shape(p), O(p,p,G), pro(G[+1]) double precision mu(p,*), shape(*), O(p,p,*), pro( * ) integer i, j, k, nz double precision const, temp, tmin, tmax double precision smin, smax, prok, sum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ if (scale .le. zero) then hood = FLMAX return end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then hood = FLMAX return end if temp = sqrt(scale) do j = 1, p shape(j) = temp*sqrt(shape(j)) end do const = dble(p)*(pi2log + log(scale)) do k = 1, G c prok = pro(k) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dgemv( 'N', p, p, one, O(1,1,k), p, * w, 1, zero, v, 1) do j = 1, p if (shape(j) .lt. one .and. * abs(v(j)) .ge. shape(j)*FLMAX) then hood = FLMAX return end if v(j) = v(j)/shape(j) end do temp = ddot( p, v, 1, v, 1) c z(i,k) = prok*exp(-(const+temp)/two) z(i,k) = -(const+temp)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine meeev ( EQPRO, x, n, p, G, Vinv, z, maxi, tol, eps, * lwork, mu, scale, shape, O, pro, w, s) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi, lwork double precision Vinv, eps, tol, scale double precision x(n,*), z(n, * ), w( * ), s(*) double precision mu(p,*), shape(*), O(p,p,*), pro( * ) integer nz, p1, iter, i, j, k, l, j1, info double precision dnp, temp, term, rteps double precision sumz, sum, smin, smax, cs, sn double precision const, rc, hood, hold, err double precision prok, tmin, tmax, zsum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if p1 = p + 1 dnp = dble(n*p) eps = max(eps,zero) rteps = sqrt(eps) tol = max(tol,zero) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) sumz = zero zsum = one l = 0 do k = 1, G dummy(1) = zero c call dcopy( p, zero, 0, mu(1,k), 1) call dcopy( p, dummy, 0, mu(1,k), 1) do j = 1, p c call dcopy( p, zero, 0, O(1,j,k), 1) call dcopy( p, dummy, 0, O(1,j,k), 1) end do sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum zsum = min(zsum,sum) if (.not. EQPRO) pro(k) = sum / dble(n) if (sum .ge. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) end do call dgesvd( 'N', 'O', p, p, O(1,1,k), p, s, * dummy, 1, dummy, 1, w, lwork, info) if (info .ne. 0) then l = info else do j = 1, p temp = s(j) shape(j) = shape(j) + temp*temp end do end if end if end do if (l .ne. 0 .or. zsum .lt. rteps) then lwork = l c w(1) = FLMAX tol = err if (l .ne. 0) then eps = FLMAX else eps = -FLMAX end if maxi = iter return end if term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then lwork = 0 c w(1) = smin tol = err eps = FLMAX maxi = iter return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then tol = err eps = FLMAX maxi = iter return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if if (Vinv .le. zero) then scale = temp/dble(n) else scale = temp/sumz end if if (temp .le. eps) then lwork = 0 c w(1) = temp tol = err eps = FLMAX maxi = iter return end if call dscal( p, one/temp, shape, 1) call sgnrng( p, shape, 1, smin, smax) if (smin .le. eps) then lwork = 0 c w(1) = -smin tol = err eps = FLMAX maxi = iter return end if if (scale .le. eps) then c w(1) = -scale lwork = 0 tol = err eps = FLMAX maxi = iter return end if temp = sqrt(scale) do j = 1, p w(j) = temp*sqrt(shape(j)) end do call absrng( p, w, 1, smin, smax) rc = smin / (one + smax) if (smin .le. rteps) then c w(1) = -smin lwork = 0 tol = err eps = FLMAX maxi = iter return end if const = dble(p)*(pi2log + log(scale))/two do k = 1, G c temp = pro(k) do i = 1, n call dcopy( p, x(i,1), n, w(p1), 1) call daxpy( p, (-one), mu(1,k), 1, w(p1), 1) call dgemv( 'N', p, p, one, O(1,1,k), p, w(p1), 1, zero, s, 1) do j = 1, p s(j) = s(j) / w(j) end do sum = ddot( p, s, 1, s, 1)/two c z(i,k) = temp*exp(-(const+sum)) z(i,k) = -(const+sum) end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 lwork = 0 c w(1) = rc tol = err eps = hood maxi = iter return end subroutine meeevp( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, * lwork, mu, scale, shape, O, pro, w, s) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi, lwork c double precision pshrnk, pmu(p), pscale(p,p), pdof double precision pshrnk, pmu(*), pscale(p,*), pdof double precision Vinv, eps, tol, scale c double precision x(n,p), z(n,G[+1]), w(lwork), s(p) double precision x(n,*), z(n, * ), w( * ), s(*) c double precision mu(p,G), shape(p), O(p,p,G), pro(G[+1]) double precision mu(p,*), shape(*), O(p,p,*), pro( * ) integer nz, p1, iter, i, j, k, l, j1, info double precision dnp, temp, term, rteps double precision sumz, sum, smin, smax, cs, sn double precision const, rc, hood, hold, err double precision prok, tmin, tmax, zsum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if p1 = p + 1 dnp = dble(n*p) eps = max(eps,zero) rteps = sqrt(eps) tol = max(tol,zero) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) zsum = one sumz = zero l = 0 do k = 1, G c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do j = 1, p call dcopy( p, pscale(1,j), 1, O(1,j,k), 1) end do sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum if (.not. EQPRO) pro(k) = sum / dble(n) zsum = min(zsum,sum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) end do call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) term = sum+pshrnk const = (sum*pshrnk)/term call dscal( p, sqrt(const), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) call dscal( p, sum/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) call dgesvd( 'N', 'O', p, p, O(1,1,k), p, s, * dummy, 1, dummy, 1, w, lwork, info) if (info .ne. 0) then l = info else do j = 1, p temp = s(j) shape(j) = shape(j) + temp*temp end do end if end if end do if (l .ne. 0 .or. zsum .le. rteps) then lwork = l c w(1) = FLMAX tol = err if (l .ne. 0) then eps = FLMAX else eps = -FLMAX end if maxi = iter return end if if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if call sgnrng( p, shape, 1, smin, smax) if (smin .eq. zero) then c w(1) = smin tol = err eps = FLMAX maxi = iter return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then tol = err eps = FLMAX maxi = iter return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if term = pdof + dble(p) + one if (pshrnk .gt. zero) term = term + one if (Vinv .le. zero) then scale = temp/(term + dble(n)) else scale = temp/(term + sumz) end if if (temp .le. eps) then c w(1) = temp tol = err eps = FLMAX maxi = iter return end if call dscal( p, one/temp, shape, 1) call sgnrng( p, shape, 1, smin, smax) if (smin .le. eps) then c w(1) = -smin tol = err eps = FLMAX maxi = iter return end if if (scale .le. eps) then c w(1) = -scale tol = err eps = FLMAX maxi = iter return end if temp = sqrt(scale) do j = 1, p w(j) = temp*sqrt(shape(j)) end do call sgnrng( p, w, 1, smin, smax) rc = smin / (one + smax) if (smin .le. rteps) then c w(1) = -smin tol = err eps = FLMAX maxi = iter return end if const = dble(p)*(pi2log + log(scale))/two do k = 1, G c temp = pro(k) do i = 1, n call dcopy( p, x(i,1), n, w(p1), 1) call daxpy( p, (-one), mu(1,k), 1, w(p1), 1) call dgemv( 'N', p, p, one, O(1,1,k), p, w(p1), 1, zero, s, 1) do j = 1, p s(j) = s(j) / w(j) end do sum = ddot( p, s, 1, s, 1)/two c z(i,k) = temp*exp(-(const+sum)) z(i,k) = -(const+sum) end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 lwork = 0 c w(1) = rc tol = err eps = hood maxi = iter return end subroutine mseev ( x, z, n, p, G, w, lwork, * mu, scale, shape, O, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G, lwork double precision scale c double precision x(n,p), z(n,G), w(lwork) double precision x(n,*), z(n,*), w( * ) c double precision shape(p), O(p,p,G), mu(p,G), pro(G) double precision shape(*), O(p,p,*), mu(p,*), pro(*) integer i, j, k, j1, l, info double precision sum, sumz, temp double precision cs, sn, smin, smax double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision BIGLOG parameter (BIGLOG = 709.d0) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c------------------------------------------------------------------------------ c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) l = 0 sumz = zero scale = zero do k = 1, G dummy(1) = zero c call dcopy( p, zero, 0, mu(1,k), 1) call dcopy( p, dummy, 0, mu(1,k), 1) do j = 1, p c call dcopy( p, zero, 0, O(1,j,k), 1) call dcopy( p, dummy, 0, O(1,j,k), 1) end do sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum pro(k) = sum / dble(n) if (sum .ge. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) end do call dgesvd( 'N', 'O', p, p, O(1,1,k), p, z(1,k), * dummy, 1, dummy, 1, w, lwork, info) if (info .ne. 0) then l = info else if (scale .ne. FLMAX) then do j = 1, p temp = z(j,k) shape(j) = shape(j) + temp*temp end do end if else scale = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (scale .eq. FLMAX .or. l .ne. 0) then lwork = l if (l .ne. 0) then scale = FLMAX else scale = -FLMAX end if c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if lwork = 0 call sgnrng( p, shape, 1, smin, smax) if (smin .eq. zero) then scale = FLMAX return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if if (temp .ge. sumz*FLMAX) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if scale = temp/sumz if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) return end if call dscal( p, one/temp, shape, 1) return end subroutine mseevp( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * w, lwork, mu, scale, shape, O, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G, lwork c double precision pshrnk, pmu(p), pscale(p,p), pdof double precision pshrnk, pmu(*), pscale(p,*), pdof double precision scale c double precision x(n,p), z(n,G), w(lwork) double precision x(n,*), z(n,*), w( * ) c double precision mu(p,G), shape(p), O(p,p,G), pro(G) double precision mu(p,*), shape(*), O(p,p,*), pro(*) integer p1, i, j, k, l, j1, info double precision temp, term, const double precision sumz, sum, smin, smax, cs, sn double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision BIGLOG parameter (BIGLOG = 709.d0) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .gt. zero) pshrnk = zero p1 = p + 1 c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) l = 0 sumz = zero scale = zero do k = 1, G c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do j = 1, p call dcopy( p, pscale(1,j), 1, O(1,j,k), 1) end do sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum pro(k) = sum / dble(n) if (sum .ge. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) end do call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) term = sum+pshrnk const = (sum*pshrnk)/term call dscal( p, sqrt(const), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) call dscal( p, sum/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) call dgesvd( 'N', 'O', p, p, O(1,1,k), p, z(1,k), * dummy, 1, dummy, 1, w, lwork, info) if (info .ne. 0) then l = info else if (scale .ne. FLMAX) then do j = 1, p temp = z(j,k) shape(j) = shape(j) + temp*temp end do end if else scale = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (scale .eq. FLMAX .or. l .ne. 0) then lwork = l if (l .ne. 0) then scale = FLMAX else scale = -FLMAX end if c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if lwork = 0 call sgnrng( p, shape, 1, smin, smax) if (smin .eq. zero) then scale = FLMAX return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if term = pdof + dble(p) + one if (pshrnk .gt. zero) term = term + one scale = temp/(term + sumz) if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if call dscal( p, one/temp, shape, 1) return end subroutine eseii ( x, mu, sigsq, pro, n, p, G, Vinv, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G double precision sigsq, hood, Vinv c double precision x(n,p), mu(p,G), pro(G[+1]), z(n,G[+1]) double precision x(n,*), mu(p,*), pro( * ), z(n, * ) integer i, j, k, nz double precision sum, temp, const, prok, tmin, tmax double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c----------------------------------------------------------------------------- if (sigsq .le. zero) then hood = FLMAX return end if const = dble(p)*(pi2log+log(sigsq)) do k = 1, G c prok = pro(k) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + temp*temp end do c z(i,k) = prok*exp(-(const+sum/sigsq)/two) if (sigsq .lt. one .and. sum .ge. sigsq*FLMAX) then hood = FLMAX return end if z(i,k) = -(const+sum/sigsq)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine hceii ( x, n, p, ic, ng, ns, v, nd, d) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, ic(n), ng, ns, nd c double precision x(n,p), v(p), d(ng*(ng-1)/2) double precision x(n,*), v(*), d(*) integer lg, ld, ll, lo, ls integer i, j, k, m integer ni, nj, nij, iopt, jopt, iold, jold integer ij, ici, icj, ii, ik, jk double precision ri, rj, rij, si, sj, sij double precision dij, dopt, dold external wardsw double precision one parameter (one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision ddot external ddot c------------------------------------------------------------------------------ iopt = 0 lg = ng ld = (ng*(ng-1))/2 ll = nd-ng lo = nd c call intpr( 'ic', -1, ic, n) c call intpr( 'no. of groups', -1, lg, 1) c group heads should be first among rows of x i = 1 j = 2 1 continue icj = ic(j) if (icj .ne. j) goto 2 if (j .eq. lg) goto 3 i = j j = j + 1 goto 1 2 continue k = i m = j + 1 do j = m, n icj = ic(j) if (icj .gt. k) then k = k + 1 call dswap( p, x(k,1), n, x(j,1), n) ic(j) = ic(k) ic(k) = icj end if end do 3 continue c call intpr( 'ic', -1, ic, n) do j = 1, n i = ic(j) if (i .ne. j) then ic(j) = 0 ni = ic(i) nij = ni + 1 ic(i) = nij ri = dble(ni) rij = dble(nij) sj = sqrt(one/rij) si = sqrt(ri)*sj c update column sum in kth row call dscal( p, si, x(i,1), n) call daxpy( p, sj, x(j,1), n, x(i,1), n) else ic(j) = 1 end if end do c call intpr( 'ic', -1, ic, n) dopt = FLMAX ij = 0 do j = 2, lg nj = ic(j) rj = dble(nj) do i = 1, (j-1) ni = ic(i) ri = dble(ni) nij = ni + nj rij = dble(nij) si = sqrt(ri/rij) sj = sqrt(rj/rij) call dcopy( p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) dij = ddot(p, v, 1, v, 1) ij = ij + 1 d(ij) = dij if (dij .le. dopt) then dopt = dij iopt = i jopt = j end if end do end do c if (.false.) then c i = 1 c ij = 1 c do j = 2, ng c call dblepr( 'dij', -1, d(ij), i) c ij = ij + i c i = j c end do c end if if (ns .eq. 1) then if (iopt .lt. jopt) then x(1,1) = iopt x(1,2) = jopt else x(1,1) = jopt x(1,2) = iopt end if d(1) = dopt return end if ls = 1 100 continue ni = ic(iopt) nj = ic(jopt) nij = ni + nj ic(iopt) = nij ic(jopt) = -iopt if (jopt .ne. lg) then call wardsw( jopt, lg, d) m = ic(jopt) ic(jopt) = ic(lg) ic(lg) = m end if si = dble(ni) sj = dble(nj) sij = dble(nij) dold = dopt iold = iopt jold = jopt iopt = -1 jopt = -1 dopt = FLMAX lg = lg - 1 ld = ld - lg ii = (iold*(iold-1))/2 if (iold .gt. 1) then ik = ii - iold + 1 do j = 1, (iold - 1) nj = ic(j) rj = dble(nj) ik = ik + 1 jk = ld + j dij = (rj+si)*d(ik)+(rj+sj)*d(jk) dij = (dij-rj*dold)/(rj+sij) d(ik) = dij end do end if if (iold .lt. lg) then ik = ii + iold i = iold do j = (iold + 1), lg nj = ic(j) rj = dble(nj) jk = ld + j dij = (rj+si)*d(ik)+(rj+sj)*d(jk) dij = (dij-rj*dold)/(rj+sij) d(ik) = dij ik = ik + i i = j end do end if d(lo) = dold lo = lo - 1 d(lo) = dble(iold) lo = lo - 1 d(lo) = dble(jold) lo = lo - 1 c update d and find max jopt = 2 iopt = 1 dopt = d(1) if (lg .eq. 2) goto 900 ij = 1 do i = 2, ld si = d(i) if (si .le. dopt) then ij = i dopt = si end if end do if (ij .gt. 1) then do i = 2, ij iopt = iopt + 1 if (iopt .ge. jopt) then jopt = jopt + 1 iopt = 1 end if end do end if ls = ls + 1 if (ls .eq. ns) goto 900 goto 100 900 continue d(lo) = dopt lo = lo - 1 d(lo) = dble(iopt) lo = lo - 1 d(lo) = dble(jopt) do i = 1, ng ic(i) = i end do lo = nd - 1 ld = lo si = d(lo) lo = lo - 1 sj = d(lo) ic(int(sj)) = ng if (si .lt. sj) then x(1,1) = si x(1,2) = sj else x(1,1) = sj x(1,2) = si end if lg = ng + 1 do k = 2, ns lo = lo - 1 d(ld) = d(lo) ld = ld - 1 lo = lo - 1 i = int(d(lo)) ici = ic(i) lo = lo - 1 j = int(d(lo)) icj = ic(j) if (ici .gt. icj) ic(i) = icj ic(j) = ic(lg-k) if (ici .lt. icj) then x(k,1) = dble(ici) x(k,2) = dble(icj) else x(k,1) = dble(icj) x(k,2) = dble(ici) end if end do ld = nd lo = 1 do k = 1, ns si = d(lo) d(lo) = d(ld) d(ld) = si ld = ld - 1 lo = lo + 1 end do return end subroutine meeii ( EQPRO, x, n, p, G, Vinv, z, maxi, tol, eps, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi double precision Vinv, eps, tol, sigsq c double precision x(n,p), z(n,G[+1]), mu(p,G), pro(G[+1]) double precision x(n,*), z(n, * ), mu(p,*), pro( * ) integer nz, iter, i, j, k double precision sum, sumz, temp, term, prok, tmax, tmin, rteps double precision const, hold, hood, err, dnp, zsum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return dnp = dble(n*p) if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 sigsq = zero sumz = zero zsum = one do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum if (.not. EQPRO) pro(k) = sum/dble(n) zsum = min(sum,zsum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n sum = zero do j = 1, p temp = abs(x(i,j) - mu(j,k)) if (temp .gt. RTMIN) sum = sum + temp*temp end do if (sqrt(z(i,k))*sqrt(sum) .gt. RTMIN) * sigsq = sigsq + z(i,k)*sum z(i,k) = sum end do else sigsq = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if if (Vinv .le. zero) then sigsq = sigsq / dnp else sigsq = sigsq / (dble(p)*sumz) end if term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if if (sigsq .le. eps) then tol = err eps = FLMAX maxi = iter return end if const = dble(p)*(pi2log+log(sigsq)) do k = 1, G c temp = pro(k) do i = 1, n c z(i,k) = temp*exp(-(const+(z(i,k)/sigsq))/two) z(i,k) = -(const+(z(i,k)/sigsq))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine meeiip( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof double precision Vinv, eps, tol, sigsq c double precision x(n,p), z(n,G[+1]), mu(p,G), pro(G[+1]) double precision x(n,*), z(n, * ), mu(p,*), pro( * ) integer nz, iter, i, j, k double precision sum, sumk, sumz, temp, term, tmax, tmin double precision const, hold, hood, err, dnp, prok double precision pmupmu, cmu, cgam, rmu, rgam, zsum, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMLOG parameter (SMLOG = -708.d0) double precision ddot, dlngam external ddot, dlngam double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return dnp = dble(n*p) if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 pmupmu = ddot( p, pmu, 1, pmu, 1) 100 continue iter = iter + 1 sigsq = zero sumz = zero zsum = one do k = 1, G sumk = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumk = sumk + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sumk if (.not. EQPRO) pro(k) = sumk/dble(n) zsum = min(zsum,sumk) if (sumk .gt. rteps) then call dscal( p, (one/sumk), mu(1,k), 1) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + temp*temp end do sigsq = sigsq + z(i,k)*sum end do temp = pmupmu + ddot( p, mu(1,k), 1, mu(1,k), 1) temp = temp - two*ddot( p, mu(1,k), 1, pmu, 1) const = sumk+pshrnk sigsq = sigsq + ((pshrnk*sumk)/const)*temp call dscal( p, (sumk/const), mu(1,k), 1) call daxpy(p, (pshrnk/const), pmu, 1, mu(1,k), 1) end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if term = zero if (Vinv .le. zero) then sigsq = sigsq / (pdof + dble((n+G)*p) + two) else sigsq = sigsq / (pdof + (sumz+dble(G))*dble(p) + two) do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if if (sigsq .le. eps) then tol = err eps = FLMAX maxi = iter return end if const = dble(p)*(pi2log+log(sigsq)) do i = 1, n do k = 1, G sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + temp*temp end do z(i,k) = -(const+(sum/sigsq))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMLOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter cmu = dble(p)*(log(pshrnk)-pi2log)/two const = pdof/two cgam = const*log(pscale/two)-dlngam(const) rmu = zero do k = 1, G temp = pmupmu + ddot( p, mu(1,k), 1, mu(1,k), 1) temp = temp - two*ddot( p, mu(1,k), 1, pmu, 1) rmu = rmu + (pshrnk*temp)/sigsq end do term = log(sigsq) rmu = -(rmu + dble(p)*term)/two rgam = -(const+one)*term - (pscale/sigsq)/two pdof = (dble(G)*cmu+cgam) + (rmu+rgam) return end subroutine mseii ( x, z, n, p, G, mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision x(n,p), z(n,G), mu(p,G), sigsq, pro(G) double precision x(n,*), z(n,*), mu(p,*), sigsq, pro(*) integer i, j, k double precision sum, sumz, temp double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision dummy(1) c------------------------------------------------------------------------------ sumz = zero sigsq = zero do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum pro(k) = sum/dble(n) if (sum .ge. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) if (sigsq .ne. FLMAX) then do i = 1, n sum = zero do j = 1, p temp = abs(x(i,j) - mu(j,k)) if (temp .gt. RTMIN) sum = sum + temp*temp end do if (sqrt(z(i,k))*sqrt(sum) .gt. RTMIN) * sigsq = sigsq + z(i,k)*sum end do end if else sigsq = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do c sumz .eq. n when no noise if (sigsq .ne. FLMAX) sigsq = sigsq / (sumz*dble(p)) return end subroutine mseiip( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof c double precision x(n,p), z(n,G), mu(p,G), sigsq, pro(G) double precision x(n,*), z(n,*), mu(p,*), sigsq, pro(*) integer i, j, k double precision sum, sumz, pmupmu double precision const, temp, dnp double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero pscale = pscale*1.d0 dnp = dble(n*p) pmupmu = ddot( p, pmu, 1, pmu, 1) sumz = zero sigsq = zero do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum pro(k) = sum/dble(n) if (sum .gt. one .or. one .le. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) temp = pmupmu + ddot( p, mu(1,k), 1, mu(1,k), 1) temp = temp - two*ddot( p, mu(1,k), 1, pmu, 1) const = sum+pshrnk call dscal( p, (sum/const), mu(1,k), 1) call daxpy(p, (pshrnk/const), pmu, 1, mu(1,k), 1) if (sigsq .ne. FLMAX) then sigsq = sigsq + ((pshrnk*sum)/const)*temp do i = 1, n sum = zero do j = 1, p temp = abs(x(i,j) - mu(j,k)) if (temp .gt. RTMIN) sum = sum + temp*temp end do if (sqrt(z(i,k))*sqrt(sum) .gt. RTMIN) * sigsq = sigsq + z(i,k)*sum end do end if else sigsq = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (sigsq .eq. FLMAX) return temp = pdof + sumz*dble(p) + two if (pshrnk .gt. zero) temp = temp + dble(G*p) sigsq = sigsq / temp return end subroutine esevi ( x, mu, scale, shape, pro, n, p, G, * Vinv, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G double precision scale, hood, Vinv c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), shape(p,G), pro(G[+1]) double precision mu(p,*), shape(p,*), pro( * ) integer i, j, k, nz double precision sum, temp, const, tmin, tmax double precision smin, smax, prok double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMAX parameter (RTMAX = 1.340780792994260d154) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (scale .le. zero) then hood = FLMAX return end if do k = 1, G call sgnrng( p, shape(1,k), 1, smin, smax) if (smin .eq. zero) then hood = FLMAX return end if end do temp = sqrt(scale) do k = 1, G do j = 1, p shape(j,k) = temp*sqrt(shape(j,k)) end do end do const = dble(p)*(pi2log+log(scale)) do k = 1, G c prok = pro(k) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) if (shape(j,k) .lt. one .and. * abs(temp) .ge. shape(j,k)*FLMAX) then hood = FLMAX return end if temp = temp/shape(j,k) if (abs(temp) .ge. RTMAX) then hood = FLMAX return end if if (abs(temp) .gt. RTMIN) sum = sum + temp*temp end do c z(i,k) = prok*exp(-(const+sum)/two) z(i,k) = -(const+sum)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine meevi ( EQPRO, x, n, p, G, Vinv, z, maxi, tol, eps, * mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi double precision Vinv, eps, tol, scale c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), shape(p,G), pro(G[+1]) double precision mu(p,*), shape(p,*), pro( * ) integer nz, iter, i, j, k double precision sum, sumz, temp, term, epsmin double precision hold, hood, err, smin, smax, const double precision prok, tmin, tmax, zsum, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G c if (EQPRO) call dscal( G, one/dble(G), pro, 1) wrong? if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 sumz = zero zsum = one do k = 1, G dummy(1) = zero c call dcopy( p, zero, 0, shape(1,k), 1) call dcopy( p, dummy, 0, shape(1,k), 1) sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum if (.not. EQPRO) pro(k) = sum /dble(n) zsum = min(sum,zsum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do j = 1, p sum = zero do i = 1, n temp = x(i,j) - mu(j,k) if (sqrt(z(i,k))*abs(temp) .gt. RTMIN) * sum = sum + z(i,k)*(temp*temp) end do shape(j,k) = shape(j,k) + sum end do else c call dcopy( p, FLMAX, 0, mu(1,k), 1) c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) call dcopy( p, dummy, 0, shape(1,k), 1) end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if scale = zero epsmin = FLMAX do k = 1, G call sgnrng(p, shape(1,k), 1, smin, smax) epsmin = min(smin,epsmin) if (smin .gt. zero) then sum = zero do j = 1, p sum = sum + log(shape(j,k)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) tol = err eps = FLMAX maxi = iter return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if scale = scale + temp epsmin = min(temp,epsmin) if (temp .lt. eps) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) tol = err eps = FLMAX maxi = iter return end if call dscal( p, one/temp, shape(1,k), 1) end if end do term = zero if (Vinv .gt. zero) then scale = scale /sumz do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else scale = scale /dble(n) end if if (scale .le. eps) then tol = epsmin eps = FLMAX maxi = iter return end if do k = 1, G call sgnrng( p, shape(1,k), 1, smin, smax) if (smin .le. eps) then tol = smin eps = FLMAX maxi = iter return end if end do const = dble(p)*(pi2log + log(scale)) do k = 1, G do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + (temp*temp)/shape(j,k) end do c z(i,k) = pro(k)*exp(-(const+(sum/scale))/two) z(i,k) = -(const+(sum/scale))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine meevip( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, * mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof double precision Vinv, eps, tol, scale c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), shape(p,G), pro(G[+1]) double precision mu(p,*), shape(p,*), pro( * ) integer nz, iter, i, j, k double precision sum, sumz, temp, term, epsmin, zsum double precision hold, hood, err, smin, smax, const double precision prok, tmin, tmax, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G c if (EQPRO) call dscal( G, one/dble(G), pro, 1) wrong? if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 sumz = zero zsum = one do k = 1, G c call dcopy( p, pscale, 0, shape(1,k), 1) dummy(1) = pscale call dcopy( p, dummy, 0, shape(1,k), 1) sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum if (.not. EQPRO) pro(k) = sum /dble(n) zsum = min(sum,zsum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) term = pshrnk + sum const = (pshrnk*sum)/term do j = 1, p do i = 1, n temp = x(i,j) - mu(j,k) if (abs(temp)*sqrt(z(i,k)) .gt. RTMIN) * shape(j,k) = shape(j,k) + z(i,k)*(temp*temp) end do temp = pmu(j) - mu(j,k) shape(j,k) = shape(j,k) + const*(temp*temp) end do call dscal( p, sum/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) else c call dcopy( p, FLMAX, 0, mu(1,k), 1) c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) call dcopy( p, dummy, 0, shape(1,k), 1) end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if scale = zero epsmin = FLMAX do k = 1, G call sgnrng(p, shape(1,k), 1, smin, smax) epsmin = min(smin,epsmin) if (smin .gt. zero) then sum = zero do j = 1, p sum = sum + log(shape(j,k)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) tol = err eps = FLMAX maxi = iter return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if scale = scale + temp epsmin = min(temp,epsmin) if (temp .lt. eps) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) tol = err eps = FLMAX maxi = iter return end if call dscal( p, one/temp, shape(1,k), 1) end if end do term = pdof + one if (Vinv .le. zero) then term = term + dble(n) else term = term + sumz end if if (pshrnk .gt. zero) term = term + one scale = scale/term if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if if (scale .le. eps) then tol = err eps = FLMAX maxi = iter return end if do k = 1, G call sgnrng( p, shape(1,k), 1, smin, smax) if (smin .le. eps) then tol = err eps = FLMAX maxi = iter return end if end do const = dble(p)*(pi2log + log(scale)) do k = 1, G do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + (temp*temp)/shape(j,k) end do c z(i,k) = pro(k)*exp(-(const+(sum/scale))/two) z(i,k) = -(const+(sum/scale))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine msevi ( x, z, n, p, G, mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision x(n,p), z(n,G) double precision x(n,*), z(n,*) c double precision mu(p,G), scale, shape(p,G), pro(G) double precision mu(p,*), scale, shape(p,*), pro(*) integer i, j, k double precision smin, smax double precision sum, sumz, temp double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ scale = zero sumz = zero do k = 1, G dummy(1) = zero c call dcopy( p, zero, 0, shape(1,k), 1) call dcopy( p, dummy, 0, shape(1,k), 1) sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) z(i,k) = sqrt(temp) end do sumz = sumz + sum pro(k) = sum/dble(n) if (sum .ge. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) else scale = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) call dcopy( p, dummy, 0, shape(1,k), 1) end if end do if (scale .eq. FLMAX) return c pro(k) now contains n_k do j = 1, p do k = 1, G sum = zero do i = 1, n temp = z(i,k)*(x(i,j) - mu(j,k)) if (abs(temp) .gt. RTMIN) sum = sum + temp*temp end do shape(j,k) = shape(j,k) + sum end do end do scale = zero do k = 1, G call sgnrng(p, shape(1,k), 1, smin, smax) if (smin .le. zero) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if sum = zero do j = 1, p sum = sum + log(shape(j,k)) end do temp = sum/dble(p) if (temp .ge. BIGLOG) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if if (scale .ne. FLMAX) scale = scale + temp if (temp .lt. one .and. one .ge. temp*FLMAX) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if call dscal( p, one/temp , shape(1,k), 1) 100 continue end do if (sumz .lt. one .and. one .ge. sumz*FLMAX) then scale = FLMAX return end if scale = scale/sumz return end subroutine msevip( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof c double precision x(n,p), z(n,G) double precision x(n,*), z(n,*) c double precision mu(p,G), scale, shape(p,G), pro(G) double precision mu(p,*), scale, shape(p,*), pro(*) integer i, j, k double precision sum, sumz, temp, term double precision smin, smax, const double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero sumz = zero scale = zero do k = 1, G c call dcopy( p, pscale, 0, shape(1,k), 1) dummy(1) = pscale call dcopy( p, dummy, 0, shape(1,k), 1) c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum pro(k) = sum /dble(n) if (sum .ge. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) term = pshrnk + sum const = (pshrnk*sum)/term do j = 1, p do i = 1, n temp = x(i,j) - mu(j,k) if (abs(temp)*sqrt(z(i,k)) .gt. RTMIN) * shape(j,k) = shape(j,k) + z(i,k)*(temp*temp) end do temp = pmu(j) - mu(j,k) shape(j,k) = shape(j,k) + const*(temp*temp) end do call dscal( p, sum/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) else scale = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) call dcopy( p, dummy, 0, shape(1,k), 1) end if end do if (scale .eq. FLMAX) return scale = zero do k = 1, G call sgnrng(p, shape(1,k), 1, smin, smax) if (smin .le. zero) then c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if sum = zero do j = 1, p sum = sum + log(shape(j,k)) end do temp = sum/dble(p) if (temp .ge. BIGLOG) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero endif if (scale .ne. FLMAX) scale = scale + temp if (temp .le. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if call dscal( p, one/temp, shape(1,k), 1) 100 continue end do term = pdof + sumz + two if (pshrnk .gt. zero) term = term + dble(G) scale = scale/term return end subroutine es1v ( x, mu, sigsq, pro, n, G, Vinv, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, G double precision hood, Vinv c double precision x(n), mu(G), sigsq(G), pro(G[+1]), z(n,G[+1]) double precision x(*), mu(*), sigsq(*), pro( * ), z(n, * ) integer i, k, nz double precision temp, const, tmin, tmax, sum double precision muk, sigsqk, prok, sigmin double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMAX parameter (RTMAX = 1.340780792994260d154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c------------------------------------------------------------------------------ call sgnrng( G, sigsq, 1, sigmin, temp) if (sigmin .le. zero) then hood = FLMAX return end if do k = 1, G c prok = pro(k) muk = mu(k) sigsqk = sigsq(k) const = pi2log + log(sigsqk) do i = 1, n temp = x(i) - muk c z(i,k) = prok*exp(-(const+(temp*temp)/sigsqk)/two) if (sigsqk .lt. one .and. * abs(temp) .ge. sqrt(sigsqk)*RTMAX) then hood = FLMAX return end if z(i,k) = -(const+(temp*temp)/sigsqk)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c temp = zero c do k = 1, nz c temp = temp + z(i,k) c end do c hood = hood + log(temp) c call dscal( nz, (one/temp), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine hc1v ( x, n, ic, ng, ns, ALPHA, nd, d) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, ic(n), ng, ns, nd integer n, ic(*), ng, ns, nd c double precision x(n), ALPHA, d(ng*(ng-1)/2) double precision x(*), ALPHA, d(*) integer lg, ld, ll, lo, ls, i, j, k, m integer ni, nj, nij, nopt, niop, njop integer ij, ici, icj, iopt, jopt, iold double precision ALFLOG double precision qi, qj, qij, ri, rj, rij, si, sj double precision tracei, tracej, trcij, trop double precision termi, termj, trmij, tmop double precision temp, dij, dopt, siop, sjop double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision sqrthf parameter (sqrthf = .70710678118654757274d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision EPSMAX parameter (EPSMAX = 2.2204460492503131d-16) double precision ddot external ddot c------------------------------------------------------------------------------ c call dblepr( 'x', -1, x, n) c call intpr( 'n', -1, n, 1) c call intpr( 'ic', -1, ic, n) c call intpr( 'ng', -1, ng, 1) c call intpr( 'ns', -1, ns, 1) c call dblepr( 'alpha', -1, alpha, 1) c call intpr( 'nd', -1, nd, 1) iopt = 0 jopt = 0 niop = 0 njop = 0 nopt = 0 siop = 0 sjop = 0 tmop = 0.d0 trop = 0.d0 lg = ng ld = (ng*(ng-1))/2 ll = nd-ng lo = nd if (ng .eq. 1) return ALPHA = max(ALPHA,EPSMAX) ALFLOG = log(ALPHA) c group heads should be first among rows of x i = 1 j = 2 1 continue icj = ic(j) if (icj .ne. j) goto 2 if (j .eq. lg) goto 3 i = j j = j + 1 goto 1 2 continue k = i m = j + 1 do j = m, n icj = ic(j) if (icj .gt. k) then k = k + 1 c call dswap( p, x(k,1), n, x(j,1), n) temp = x(k) x(k) = x(j) x(j) = temp ic(j) = ic(k) ic(k) = icj end if end do 3 continue c set up pointers do j = 1, n i = ic(j) if (i .ne. j) then c update sum of squares k = ic(i) if (k .eq. 1) then ic(i) = j ic(j) = 2 c call dscal( p, sqrthf, x(i,1), n) c call dscal( p, sqrthf, x(j,1), n) c call dcopy( p, x(j,1), n, v, 1) c call daxpy( p, (-one), x(i,1), n, v, 1) c call daxpy( p, one, x(j,1), n, x(i,1), n) c x(j,1) = ddot( p, v, 1, v, 1) temp = sqrthf*(x(j) - x(i)) x(i) = sqrthf*(x(j) + x(i)) x(j) = temp*temp else ic(j) = 0 ni = ic(k) ic(k) = ni + 1 ri = dble(ni) rij = dble(ni+1) qj = one/rij qi = ri*qj si = sqrt(qi) sj = sqrt(qj) c call dcopy( p, x(j,1), n, v, 1) c call dscal( p, si, v, 1) c call daxpy( p, (-sj), x(i,1), n, v, 1) c x(k,1) = x(k,1) + ddot(p, v, 1, v, 1) c call dscal( p, si, x(i,1), n) c call daxpy( p, sj, x(j,1), n, x(i,1), n) temp = si*x(j) - sj*x(i) x(k) = x(k) + temp*temp x(i) = si*x(i) + sj*x(j) end if else ic(j) = 1 end if end do c store terms also so as not to recompute them do k = 1, ng i = ic(k) if (i .ne. 1) then ni = ic(i) ri = dble(ni) d(nd-k+1) = ri*log((x(i)+ALPHA)/ri) end if end do c call intpr( 'ic', -1, ic, n) c compute change in likelihood and determine minimum dopt = FLMAX ij = 0 do j = 2, ng nj = ic(j) if (nj .eq. 1) then tracej = zero termj = ALFLOG rj = one else tracej = x(nj) nj = ic(nj) rj = dble(nj) termj = d(nd-j+1) end if do i = 1, (j-1) ni = ic(i) if (ni .eq. 1) then tracei = zero termi = ALFLOG ri = one else tracei = x(ni) ni = ic(ni) ri = dble(ni) termi = d(nd-i+1) end if nij = ni + nj rij = dble(nij) qij = one/rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) c call dcopy(p, x(i,1), n, v, 1) c call dscal( p, sj, v, 1) c call daxpy( p, (-si), x(j,1), n, v, 1) temp = sj*x(i) - si*x(j) c trcij = (tracei + tracej) + ddot(p,v,1,v,1) trcij = (tracei + tracej) + temp*temp trmij = rij*log((trcij+ALPHA)/rij) dij = trmij - (termi + termj) ij = ij + 1 d(ij) = dij if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij nopt = nij niop = ni njop = nj siop = si sjop = sj iopt = i jopt = j end if end do end do c call dblepr( 'dij', -1, d, (ng*(ng-1))/2) if (ns .eq. 1) then if (iopt .lt. jopt) then x(1) = dble(iopt) ic(1) = jopt else x(1) = dble(jopt) ic(1) = iopt end if d(1) = dopt return end if if (niop .ne. 1) ic(ic(iopt)) = 0 if (njop .ne. 1) ic(ic(jopt)) = 0 ls = 1 100 continue c if (.false.) then c ij = 1 c jj = 1 c do j = 2, n c nj = ic(j) c if (nj .ne. 0 .and. abs(nj) .le. n) then c call dblepr( 'dij', -1, d(ij), jj) c ij = ij + jj c jj = jj + 1 c end if c end do c end if c call dscal( p, siop, x(iopt,1), n) c call daxpy( p, sjop, x(jopt,1), n, x(iopt,1), n) x(iopt) = siop*x(iopt)+sjop*x(jopt) if (jopt .ne. lg) then call wardsw( jopt, lg, d) c call dcopy( p, x(lg,1), n, x(jopt,1), n) x(jopt) = x(lg) m = ic(jopt) ic(jopt) = ic(lg) ic(lg) = m end if ic(iopt) = lg c ic(lg) = nopt c x(lg,1) = trop x(lg) = trop c x(lg,2) = tmop d(lo) = dopt lo = lo - 1 ic(lg) = lo d(lo) = tmop lo = lo - 1 d(lo) = dble(nopt) lo = lo - 1 d(lo) = dble(iopt) lo = lo - 1 d(lo) = dble(jopt) lo = lo - 1 lg = lg - 1 ld = ld - lg iold = iopt iopt = -1 jopt = -1 dopt = FLMAX ni = nopt ri = dble(ni) tracei = trop termi = tmop ij = ((iold-1)*(iold-2))/2 if (iold .gt. 1) then do j = 1, (iold - 1) nj = ic(j) if (nj .ne. 1) then c tracej = x(nj,1) tracej = x(nj) k = ic(nj) termj = d(k) nj = int(d(k-1)) rj = dble(nj) else tracej = zero termj = ALFLOG rj = one end if nij = ni + nj rij = dble(nij) qij = one/rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) c call dcopy( p, x(iold,1), n, v, 1) c call dscal( p, sj, v, 1) c call daxpy( p, (-si), x(j,1), n, v, 1) temp = sj*x(iold)-si*x(j) c trcij = (tracei + tracej) + ddot(p,v,1,v,1) trcij = (tracei + tracej) + temp*temp trmij = rij*log((trcij+ALPHA)/rij) dij = trmij - (termi + termj) ij = ij + 1 d(ij) = dij if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij iopt = j jopt = iold nopt = nij niop = ni njop = nj sjop = si siop = sj end if end do end if if (iold .lt. lg) then i = iold ij = ij + i do j = (iold + 1), lg nj = ic(j) if (nj .ne. 1) then c tracej = x(nj,1) tracej = x(nj) k = ic(nj) termj = d(k) nj = int(d(k-1)) rj = dble(nj) else tracej = zero termj = ALFLOG rj = one end if nij = ni + nj rij = dble(nij) qij = one /rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) c call dcopy( p, x(iold,1), n, v, 1) c call dscal( p, sj, v, 1) c call daxpy( p, (-si), x(j,1), n, v, 1) temp = sj*x(iold) - si*x(j) c trcij = (tracei + tracej) + ddot(p,v,1,v,1) trcij = (tracei + tracej) + temp*temp trmij = rij*log((trcij+ALPHA)/rij) dij = trmij - (termi + termj) d(ij) = dij if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij iopt = iold jopt = j nopt = nij niop = ni njop = nj siop = si sjop = sj end if ij = ij + i i = j end do end if c update d and find max jopt = 2 iopt = 1 dopt = d(1) if (lg .eq. 2) goto 900 ij = 1 dopt = d(1) do i = 2, ld qi = d(i) if (qi .le. dopt) then ij = i dopt = qi end if end do if (ij .gt. 1) then do i = 2, ij iopt = iopt + 1 if (iopt .ge. jopt) then jopt = jopt + 1 iopt = 1 end if end do end if i = ic(iopt) j = ic(jopt) if (iopt .ne. iold .and. jopt .ne. iold) then if (i .ne. 1) then tracei = x(i) ici = ic(i) termi = d(ici) niop = int(d(ici-1)) ri = dble(niop) else tracei = zero termi = ALFLOG niop = 1 ri = one end if if (j .ne. 1) then c tracej = x(j,1) tracej = x(j) icj = ic(j) termj = d(icj) njop = int(d(icj-1)) rj = dble(njop) else tracej = zero termj = ALFLOG njop = 1 rj = one end if nopt = niop + njop rij = dble(nopt) qij = one/rij qi = ri*qij qj = rj*qij siop = sqrt(qi) sjop = sqrt(qj) c call dcopy( p, x(iopt,1), n, v, 1) c call dscal( p, sjop, v, 1) c call daxpy( p, (-siop), x(jopt,1), n, v, 1) temp = sjop*x(iopt)-siop*x(jopt) c trop = (tracei + tracej) + ddot(p,v,1,v,1) trop = (tracei + tracej) + temp*temp tmop = rij*log((trop+ALPHA)/rij) end if ls = ls + 1 if (ls .eq. ns) goto 900 goto 100 900 continue d(lo) = dopt lo = lo - 1 d(lo) = tmop lo = lo - 1 d(lo) = dble(nopt) lo = lo - 1 d(lo) = dble(iopt) lo = lo - 1 d(lo) = dble(jopt) do i = 1, ng ic(i) = i end do lo = nd - 3 ld = nd - 1 si = d(lo) lo = lo - 1 sj = d(lo) lo = lo - 1 ic(int(sj)) = ng if (si .lt. sj) then x(1) = si d(ld) = sj else x(1) = sj d(ld) = si end if ld = ld - 1 lg = ng + 1 do k = 2, ns d(ld) = d(lo) ld = ld - 1 lo = lo - 3 i = int(d(lo)) ici = ic(i) lo = lo - 1 j = int(d(lo)) lo = lo - 1 icj = ic(j) if (ici .gt. icj) ic(i) = icj ic(j) = ic(lg-k) if (ici .lt. icj) then x(k) = dble(ici) d(ld) = dble(icj) else x(k) = dble(icj) d(ld) = dble(ici) end if ld = ld - 1 end do ld = nd lo = nd - 1 do k = 1, ns ic(k) = int(d(lo)) lo = lo - 1 ld = ld - 1 d(ld) = d(lo) lo = lo - 1 end do ld = nd lo = 1 do k = 1, ns si = d(lo) d(lo) = d(ld) d(ld) = si ld = ld - 1 lo = lo + 1 end do return end subroutine me1v ( EQPRO, x, n, G, Vinv, z, maxi, tol, eps, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, G, maxi double precision Vinv, eps, tol c double precision x(n), z(n,G[+1]), mu(G), sigsq(G), pro(G[+1]) double precision x(*), z(n, * ), mu(*), sigsq(*), pro( * ) integer nz, iter, k, i double precision hold, hood, err, sum, smu, zsum double precision const, temp, term, sigmin, sigsqk double precision prok, tmin, tmax, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 zsum = one do k = 1, G sum = zero smu = zero do i = 1, n temp = z(i,k) sum = sum + temp smu = smu + temp*x(i) end do if (.not. EQPRO) pro(k) = sum / dble(n) zsum = min(sum,zsum) if (sum .gt. rteps) then smu = smu / sum mu(k) = smu sigsqk = zero do i = 1, n temp = x(i) - smu temp = temp*temp sigsqk = sigsqk + z(i,k)*temp z(i,k) = temp end do sigsq(k) = sigsqk / sum end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if sigmin = FLMAX do k = 1, G sigmin = min(sigmin,sigsq(k)) end do if (sigmin .le. eps) then tol = err eps = FLMAX maxi = iter return end if do k = 1, G sigsqk = sigsq(k) const = pi2log + log(sigsqk) do i = 1, n c z(i,k) = temp*exp(-(const+(z(i,k)/sigsqk))/two) z(i,k) = -(const+(z(i,k)/sigsqk))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine me1vp ( EQPRO, x, n, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, G, maxi double precision pshrnk, pmu, pscale, pdof double precision Vinv, eps, tol c double precision x(n), z(n,G[+1]), mu(G), sigsq(G), pro(G[+1]) double precision x(*), z(n, * ), mu(*), sigsq(*), pro( * ) integer nz, iter, k, i double precision hold, hood, err, pmupmu double precision sumz, sum, smu, zsum, rteps double precision const, temp, term, sigmin, sigsqk double precision prok, tmin, tmax double precision cmu, cgam, rmu, rgam double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision three parameter (three = 3.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dlngam external dlngam double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX pmupmu = pmu*pmu iter = 0 100 continue iter = iter + 1 zsum = one do k = 1, G sumz = zero smu = zero do i = 1, n temp = z(i,k) sumz = sumz + temp smu = smu + temp*x(i) end do if (.not. EQPRO) pro(k) = sumz / dble(n) zsum = min(zsum,sumz) if (sumz .gt. rteps) then smu = smu/sumz sum = zero do i = 1, n term = abs(x(i) - smu) if (term .ge. eps .or. sqrt(z(i,k))*term .gt. RTMIN) * sum = sum + z(i,k)*(term*term) end do term = (pshrnk*sumz)/(pshrnk+sumz) temp = (pmupmu + smu*smu) - two*pmu*smu sigsq(k) = (pscale + sum + term*temp)/(pdof+sumz+three) term = sumz/(pshrnk+sumz) temp = pshrnk/(pshrnk+sumz) mu(k) = term*smu + temp*pmu end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if sigmin = FLMAX do k = 1, G sigmin = min(sigmin,sigsq(k)) end do if (sigmin .le. eps) then tol = err eps = FLMAX maxi = iter return end if do k = 1, G sigsqk = sigsq(k) const = pi2log + log(sigsqk) do i = 1, n term = abs(x(i) - mu(k)) if (term .gt. RTMIN) then z(i,k) = -(const+((term*term)/sigsqk))/two else z(i,k) = -const/two end if end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter cmu = dble(G)*(pi2log-log(pshrnk))/two const = pdof/two cgam = dble(G)*(const*log(pscale/two) - dlngam(const)) rmu = zero rgam = zero do k = 1, G temp = pmu - mu(k) temp = temp*temp term = log(sigsq(k)) rmu = rmu + (term + (pshrnk/sigsq(k))*temp) rgam = rgam + ((pdof+3.d0)*term + pscale/sigsq(k)) end do rmu = -rmu /two rgam = -rgam/two pdof = (cmu+cgam) + (rmu+rgam) return end subroutine ms1v ( x, z, n, G, mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, G c double precision x(n), z(n,G), mu(G), sigsq(G), pro(G) double precision x(*), z(n,*), mu(*), sigsq(*), pro(*) integer i, k double precision sum, smu, temp, sigsqk double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) c------------------------------------------------------------------------------ do k = 1, G sum = zero smu = zero do i = 1, n temp = z(i,k) sum = sum + temp smu = smu + temp*x(i) end do pro(k) = sum / dble(n) if (sum .gt. one .or. smu .le. sum*FLMAX) then smu = smu / sum mu(k) = smu sigsqk = zero do i = 1, n temp = abs(x(i) - smu) sigsqk = sigsqk + z(i,k)*(temp*temp) end do sigsq(k) = sigsqk / sum else mu(k) = FLMAX sigsq(k) = FLMAX end if end do return end subroutine ms1vp ( x, z, n, G, * pshrnk, pmu, pscale, pdof, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, G double precision pshrnk, pmu, pscale, pdof c double precision x(n), z(n,G), mu(G), sigsq(G), pro(G) double precision x(*), z(n,*), mu(*), sigsq(*), pro(*) integer k, i double precision pmupmu double precision sumz, sum, smu double precision temp, term double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero pmupmu = pmu*pmu do k = 1, G sumz = zero smu = zero do i = 1, n temp = z(i,k) sumz = sumz + temp smu = smu + temp*x(i) end do pro(k) = sumz / dble(n) if (sumz .gt. one .or. smu .le. sumz*FLMAX) then smu = smu/sumz term = sumz/(pshrnk+sumz) temp = pshrnk/(pshrnk+sumz) mu(k) = term*smu + temp*pmu sum = zero do i = 1, n term = abs(x(i) - smu) sum = sum + z(i,k)*(term*term) end do term = (pshrnk*sumz)/(pshrnk+sumz) temp = (pmupmu + smu*smu) - two*pmu*smu if (pshrnk .gt. zero) then sigsq(k) = (pscale + sum + term*temp)/(pdof+sumz+3.d0) else sigsq(k) = (pscale + sum + term*temp)/(pdof+sumz+two) end if else mu(k) = FLMAX sigsq(k) = FLMAX end if end do return end subroutine esvei ( x, mu, scale, shape, pro, n, p, G, * Vinv, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G double precision hood, Vinv c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), scale(G), shape(p), pro(G[+1]) double precision mu(p,*), scale(*), shape(*), pro( * ) integer i, j, k, nz double precision sum, temp, const, tmin, tmax double precision smin, smax, prok, scalek double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMAX parameter (RTMAX = 1.340780792994260d154) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c----------------------------------------------------------------------------- call sgnrng( G, scale, 1, smin, smax) if (smin .le. zero) then hood = FLMAX return end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then hood = FLMAX return end if do j = 1, p shape(j) = sqrt(shape(j)) end do do k = 1, G c prok = pro(k) scalek = scale(k) const = dble(p)*(pi2log+log(scalek)) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) if (shape(j) .lt. one .and. * abs(temp) .ge. shape(j)*FLMAX) then hood = FLMAX return end if temp = temp/shape(j) if (abs(temp) .ge. RTMAX) then hood = FLMAX return end if if (abs(temp) .gt. RTMIN) sum = sum + temp*temp end do c z(i,k) = prok*exp(-(const+sum/scalek)/two) if (scalek .lt. one .and. * sum .ge. scalek*FLMAX) then hood = FLMAX return end if z(i,k) = -(const+sum/scalek)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. * one .le. sum*FLMAX) then hood = FLMAX return end if if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine mevei ( EQPRO, x, n, p, G, Vinv, z, maxi, tol, eps, * mu, scale, shape, pro, scl, shp, w) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi(2) double precision Vinv, eps, tol(2) c double precision x(n,p), z(n,G[+1]), scl(G), shp(p), w(p,G) double precision x(n,*), z(n, * ), scl(*), shp(*), w(p,*) c double precision mu(p,G), scale(G), shape(p), pro(G[+1]) double precision mu(p,*), scale(*), shape(*), pro( * ) integer nz, i, j, k integer iter, maxi1, maxi2, inner, inmax double precision tol1, tol2, sum, temp, term, tmin, tmax double precision prok, scalek, smin, smax, const, zsum double precision hold, hood, err, errin, dnp, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ maxi1 = maxi(1) maxi2 = max(maxi(2),0) if (maxi1 .le. 0) return dnp = dble(n*p) inmax = 0 if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G end if eps = max(eps,zero) tol1 = max(tol(1),zero) tol2 = max(tol(2),zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX errin = FLMAX c start with shape and scale equal to 1 c call dcopy(p, one, 0, shape, 1) c call dcopy(G, one, 0, scale, 1) dummy(1) = one call dcopy(p, dummy, 0, shape, 1) call dcopy(G, dummy, 0, scale, 1) iter = 0 100 continue inner = 0 zsum = one do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sum zsum = min(zsum,sum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do j = 1, p sum = zero do i = 1, n temp = x(i,j) - mu(j,k) sum = sum + z(i,k)*(temp*temp) end do w(j,k) = sum end do end if end do call dscal( G, dble(p), pro, 1) if (zsum .le. rteps) then eps = -FLMAX tol(1) = zsum tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if if (maxi2 .le. 0) goto 120 110 continue call sgnrng(p, shape, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if inner = inner + 1 c scale estimate call dcopy( G, scale, 1, scl, 1) do k = 1, G sum = zero do j = 1, p sum = sum + w(j,k)/shape(j) end do scale(k) = sum/pro(k) end do call sgnrng(G, scale, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if c shape estimate call dcopy( p, shape, 1, shp, 1) do j = 1, p sum = zero do k = 1, G sum = sum + w(j,k)/scale(k) end do shape(j) = sum end do call sgnrng(p, shape, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if if (temp .le. eps) then eps = temp tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if call dscal( p, one/temp, shape, 1) errin = zero do k = 1, G errin = max(errin, abs(scl(k)-scale(k))/(one + scale(k))) end do do j = 1, p errin = max(errin, abs(shp(j)-shape(j))/(one + shape(j))) end do if (errin .gt. tol2 .and. inner .le. maxi2) goto 110 120 continue iter = iter + 1 inmax = max(inner, inmax) if (.not. EQPRO) call dscal( G, one/dnp, pro, 1) term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else c if (EQPRO) call dscal( G, one/dble(G), pro, 1) wrong? if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if call sgnrng( G, scale, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = inmax return end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = inmax return end if do k = 1, G c prok = pro(k) scalek = scale(k) const = dble(p)*(pi2log+log(scalek)) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + (temp*temp)/shape(j) end do c z(i,k) = prok*exp(-(const+sum/scalek)/two) z(i,k) = -(const+sum/scalek)/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol1 .and. iter .lt. maxi1) goto 100 tol(1) = err tol(2) = errin eps = hood maxi(1) = iter maxi(2) = inmax return end subroutine meveip( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, * mu, scale, shape, pro, scl, shp, w) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi(2) double precision Vinv, eps, tol(2) c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof c double precision x(n,p), z(n,G[+1]), scl(G), shp(p), w(p,G) double precision x(n,*), z(n, * ), scl(*), shp(*), w(p,*) c double precision mu(p,G), scale(G), shape(p), pro(G[+1]) double precision mu(p,*), scale(*), shape(*), pro( * ) integer nz, i, j, k integer iter, maxi1, maxi2, inner, inmax double precision tol1, tol2, sum, temp, term, tmin, tmax double precision prok, scalek, smin, smax, const, sumz double precision hold, hood, err, errin, dnp, zsum, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero maxi1 = maxi(1) maxi2 = max(maxi(2),0) if (maxi1 .le. 0) return dnp = dble(n*p) inmax = 0 if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G end if eps = max(eps,zero) tol1 = max(tol(1),zero) tol2 = max(tol(2),zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX errin = FLMAX c start with shape and scale equal to 1 c call dcopy(p, one, 0, shape, 1) c call dcopy(G, one, 0, scale, 1) dummy(1) = one call dcopy(p, dummy, 0, shape, 1) call dcopy(G, dummy, 0, scale, 1) iter = 0 100 continue inner = 0 zsum = one do k = 1, G sumz = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sumz zsum = min(zsum,sumz) if (sumz .gt. rteps) then term = pshrnk + sumz const = (pshrnk*sumz)/term call dscal( p, (one/sumz), mu(1,k), 1) do j = 1, p sum = zero do i = 1, n temp = x(i,j) - mu(j,k) sum = sum + z(i,k)*(temp*temp) end do temp = pmu(j) - mu(j,k) w(j,k) = pscale + sum + const*(temp*temp) end do call dscal( p, sumz/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) end if end do call dscal( G, dble(p), pro, 1) if (zsum .le. rteps) then eps = -FLMAX tol(1) = zsum tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if if (maxi2 .le. 0) goto 120 110 continue call sgnrng(p, shape, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if inner = inner + 1 c scale estimate call dcopy( G, scale, 1, scl, 1) temp = pdof + two if (pshrnk .gt. zero) temp = temp + one do k = 1, G sum = zero do j = 1, p sum = sum + w(j,k)/shape(j) end do scale(k) = sum/(pro(k)+temp) end do call sgnrng(G, scale, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if c shape estimate call dcopy( p, shape, 1, shp, 1) do j = 1, p sum = zero do k = 1, G sum = sum + w(j,k)/scale(k) end do shape(j) = sum end do call sgnrng(p, shape, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if if (temp .le. eps) then eps = temp tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if call dscal( p, one/temp, shape, 1) errin = zero do k = 1, G errin = max(errin, abs(scl(k)-scale(k))/(one + scale(k))) end do do j = 1, p errin = max(errin, abs(shp(j)-shape(j))/(one + shape(j))) end do if (errin .gt. tol2 .and. inner .le. maxi2) goto 110 120 continue iter = iter + 1 inmax = max(inner, inmax) if (.not. EQPRO) call dscal( G, one/dnp, pro, 1) term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else c if (EQPRO) call dscal( G, one/dble(G), pro, 1) wrong? if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 1, pro, 1) end if end if call sgnrng( G, scale, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = inmax return end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = inmax return end if do k = 1, G c prok = pro(k) scalek = scale(k) const = dble(p)*(pi2log+log(scalek)) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + (temp*temp)/shape(j) end do c z(i,k) = prok*exp(-(const+sum/scalek)/two) z(i,k) = -(const+sum/scalek)/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol1 .and. iter .lt. maxi1) goto 100 tol(1) = err tol(2) = errin eps = hood maxi(1) = iter maxi(2) = inmax return end subroutine msvei ( x, z, n, p, G, maxi, tol, * mu, scale, shape, pro, scl, shp, w) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G, maxi double precision tol c double precision x(n,p), z(n,G), scl(G), shp(p), w(p,G) double precision x(n,*), z(n,*), scl(*), shp(*), w(p,*) c double precision mu(p,G), scale(G), shape(p), pro(G) double precision mu(p,*), scale(*), shape(*), pro(*) integer i, j, k, inner double precision sum, temp, smin, smax, err double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ tol = max(tol,zero) err = FLMAX c start with the equal volume and shape estimate do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sum if (sum .gt. one .or. one .lt. sum*FLMAX) then err = min(err,sum) call dscal( p, (one/sum), mu(1,k), 1) do j = 1, p sum = zero do i = 1, n temp = x(i,j) - mu(j,k) temp = temp*temp temp = z(i,k)*temp sum = sum + temp end do w(j,k) = sum end do else err = -FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (err .lt. zero) then call dscal( G, one/dble(n), pro, 1) c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) tol = FLMAX maxi = 0 return end if c call dcopy( p, one, 0, shape, 1) c call dcopy( G, one, 0, scale, 1) dummy(1) = one call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) call dscal( G, dble(p), pro, 1) inner = 0 err = FLMAX 100 continue call sgnrng(p, shape, 1, smin, smax) if (smin .le. zero) goto 200 inner = inner + 1 c scale estimate call dcopy( G, scale, 1, scl, 1) do k = 1, G sum = zero do j = 1, p if (shape(j) .gt. one .or. * w(j,k) .lt. shape(j)*FLMAX) then sum = sum + w(j,k)/shape(j) else scale(k) = FLMAX goto 110 end if end do scale(k) = sum/pro(k) 110 continue end do call sgnrng(G, scale, 1, smin, smax) if (smin .le. zero .or. smax .eq. FLMAX) goto 200 c shape estimate call dcopy( p, shape, 1, shp, 1) do j = 1, p sum = zero do k = 1, G if (scale(k) .gt. one .or. w(j,k) .lt. scale(k)*FLMAX) then sum = sum + w(j,k)/scale(k) else shape(j) = FLMAX goto 120 end if end do shape(j) = sum 120 continue end do call sgnrng(p, shape, 1, smin, smax) if (smin .le. zero .or. smax .eq. FLMAX) goto 200 sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then c call dcopy( G, FLMAX, 0, scale, 1) c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( G, dummy, 0, scale, 1) call dcopy( p, dummy, 0, shape, 1) goto 200 end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if call dscal( p, one/temp, shape, 1) err = zero do k = 1, G err = max(err, abs(scl(k) - scale(k))/(one + scale(k))) end do do j = 1, p err = max(err, abs(shp(j) - shape(j))/(one + shape(j))) end do if (err .gt. tol .and. inner .le. maxi) goto 100 200 continue call dscal( G, one/dble(n*p), pro, 1) tol = err maxi = inner return end subroutine msveip( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * maxi, tol, * mu, scale, shape, pro, scl, shp, w) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G, maxi double precision tol c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof c double precision x(n,p), z(n,G), scl(G), shp(p), w(p,G) double precision x(n,*), z(n,*), scl(*), shp(*), w(p,*) c double precision mu(p,G), scale(G), shape(p), pro(G) double precision mu(p,*), scale(*), shape(*), pro(*) integer i, j, k, inner double precision sum, temp, term, err double precision smin, smax, const, sumz double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ tol = max(tol,zero) err = FLMAX c start with shape and scale equal to 1 do k = 1, G sumz = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sumz if (sumz .gt. one .or. one .lt. sumz*FLMAX) then err = min(err,sumz) term = pshrnk + sumz const = (pshrnk*sumz)/term call dscal( p, (one/sumz), mu(1,k), 1) do j = 1, p sum = zero do i = 1, n temp = x(i,j) - mu(j,k) sum = sum + z(i,k)*(temp*temp) end do temp = pmu(j) - mu(j,k) w(j,k) = pscale + sum + const*(temp*temp) end do call dscal( p, sumz/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) else err = -FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (err .lt. zero) then call dscal( G, one/dble(n), pro, 1) c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) tol = FLMAX maxi = 0 return end if c call dcopy(p, one, 0, shape, 1) c call dcopy(G, one, 0, scale, 1) dummy(1) = one call dcopy(p, dummy, 0, shape, 1) call dcopy(G, dummy, 0, scale, 1) call dscal( G, dble(p), pro, 1) if (maxi .le. 0) return inner = 0 err = FLMAX 100 continue call sgnrng(p, shape, 1, smin, smax) if (smin .le. zero) goto 200 inner = inner + 1 c scale estimate call dcopy( G, scale, 1, scl, 1) do k = 1, G sum = zero do j = 1, p if (shape(j) .ge. one .or. * w(j,k) .le. shape(j)*FLMAX) then sum = sum + w(j,k)/shape(j) else scale(k) = FLMAX goto 110 end if end do temp = pdof + pro(k) + two if (pshrnk .gt. zero) temp = temp + one scale(k) = sum/temp 110 continue end do call sgnrng(G, scale, 1, smin, smax) if (smin .le. zero .or. smax .ge. FLMAX) then c call dcopy( G, FLMAX, 0, scale, 1) c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if c shape estimate call dcopy( p, shape, 1, shp, 1) do j = 1, p sum = zero do k = 1, G if (scale(k) .gt. w(j,k) .or. * w(j,k) .lt. scale(k)*FLMAX) then sum = sum + w(j,k)/scale(k) else shape(j) = FLMAX goto 120 end if end do shape(j) = sum 120 continue end do call sgnrng(p, shape, 1, smin, smax) if (smin .le. zero .or. smax .ge. FLMAX) then c call dcopy( G, FLMAX, 0, scale, 1) c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .ge. BIGLOG) then c call dcopy( G, FLMAX, 0, scale, 1) c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if call dscal( p, one/temp, shape, 1) err = zero do k = 1, G err = max(err, abs(scl(k)-scale(k))/(one + scale(k))) end do do j = 1, p err = max(err, abs(shp(j)-shape(j))/(one + shape(j))) end do if (err .gt. tol .and. inner .le. maxi) goto 100 200 continue call dscal( G, one/dble(n*p), pro, 1) tol = err maxi = inner return end subroutine esvev ( x, mu, scale, shape, O, pro, n, p, G, * Vinv, v, w, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, p, G integer n, p, G double precision Vinv, hood c double precision x(n,p), z(n,G[+1]), mu(p,G), pro(G[+1]) double precision x(n,*), z(n, * ), mu(p,*), pro( * ) c double precision v(p), w(p) double precision v(*), w(*) c double precision scale(G), shape(p), O(p,p,G) double precision scale(*), shape(*), O(p,p,*) integer i, j, k, nz double precision const, temp, tmin, tmax double precision smin, smax, scalek, prok, sum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ call sgnrng( G, scale, 1, smin, smax) if (smin .le. zero) then hood = FLMAX return end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then hood = FLMAX return end if do j = 1, p shape(j) = sqrt(shape(j)) end do do k = 1, G scalek = scale(k) const = dble(p)*(pi2log+log(scalek)) c prok = pro(k) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dgemv( 'N', p, p, one, O(1,1,k), p, * w, 1, zero, v, 1) do j = 1, p if (shape(j) .lt. one .and. * abs(v(j)) .ge. shape(j)*FLMAX) then hood = FLMAX return end if v(j) = v(j)/shape(j) end do temp = ddot( p, v, 1, v, 1) if (scalek .lt. one .and. temp .ge. scalek*FLMAX) then hood = FLMAX return end if temp = temp/scalek c z(i,k) = prok*exp(-(const+temp)/two) z(i,k) = -(const+temp)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine mevev ( EQPRO, x, n, p, G, Vinv, z, * maxi, tol, eps, lwork, * mu, scale, shape, O, pro, w, s) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi(2), lwork double precision Vinv, eps, tol(2) double precision x(n,*), z(n, * ), w( * ), s(*) double precision mu(p,*), pro( * ) double precision scale(*), shape(*), O(p,p,*) integer maxi1, maxi2, p1, inmax, iter integer nz, i, j, k, l, j1, info, inner double precision tol1, tol2, dnp, term, rteps double precision errin, smin, smax, sumz, tmin, tmax double precision cs, sn, hold, hood, err, zsum double precision const, temp, sum, prok, scalek double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ maxi1 = maxi(1) maxi2 = maxi(2) if (maxi1 .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G end if eps = max(eps,zero) rteps = sqrt(eps) tol1 = max(tol(1),zero) tol2 = max(tol(2),zero) p1 = p + 1 dnp = dble(n*p) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX errin = FLMAX inmax = 0 iter = 0 100 continue sumz = zero zsum = one l = 0 do k = 1, G c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do j = 1, p call dcopy( p, dummy, 0, O(1,j,k), 1) end do sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum zsum = min(zsum,sum) pro(k) = sum if (sum .ge. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) end do call dgesvd( 'N', 'O', p, p, O(1,1,k), p, z(1,k), * dummy, 1, dummy, 1, w, lwork, info) if (info .ne. 0) then l = info else do j = 1, p temp = z(j,k) z(j,k) = temp*temp end do end if end if end do iter = iter + 1 if (l .ne. 0 .or. zsum .lt. rteps) then if (Vinv .ge. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if lwork = l c w(1) = FLMAX tol(1) = err tol(2) = errin if (l .ne. 0) then eps = FLMAX else eps = -FLMAX end if maxi(1) = -1 maxi(2) = -1 return end if if (iter .eq. 1) then c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) do j = 1, p sum = zero do k = 1, G sum = sum + z(j,k) end do shape(j) = sum end do call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then if (.not. EQPRO) call dscal( G, one/dble(n), pro, 1) if (Vinv .ge. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if lwork = 0 c w(1) = smin tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = -1 maxi(2) = -1 return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = -1 maxi(2) = -1 return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if if (Vinv .le. zero) then c call dcopy (G, temp/dble(n), 0, scale, 1) dummy(1) = temp/dble(n) call dcopy (G, dummy, 0, scale, 1) else c call dcopy (G, temp/sumz, 0, scale, 1) dummy(1) = temp/sumz call dcopy (G, dummy, 0, scale, 1) end if if (temp .le. eps) then if (.not. EQPRO) call dscal( G, one/dble(n), pro, 1) if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if lwork = 0 c w(1) = temp c w(2) = zero tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = -1 maxi(2) = -1 return end if call dscal( p, one/temp, shape, 1) end if c inner iteration to estimate scale and shape c pro now contains n*pro inner = 0 errin = zero if (maxi2 .le. 0) goto 120 110 continue call dcopy( p, shape, 1, w , 1) call dcopy( G, scale, 1, w(p1), 1) c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) do k = 1, G sum = zero do j = 1, p sum = sum + z(j,k)/w(j) end do temp = sum/(pro(k)*dble(p)) scale(k) = temp if (temp .le. eps) then lwork = 0 c w(1) = temp tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = iter maxi(2) = max(inner,inmax) return end if do j = 1, p shape(j) = shape(j) + z(j,k)/temp end do end do inner = inner + 1 call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then if (.not. EQPRO) call dscal( G, one/dble(n), pro, 1) if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if lwork = 0 c w(1) = smin tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = iter maxi(2) = max(inner,inmax) return end if c normalize the shape matrix sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = -1 maxi(2) = -1 return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if if (temp .le. eps) then if (.not. EQPRO) call dscal( G, one/dble(n), pro, 1) if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if lwork = 0 c w(1) = temp tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = iter maxi(2) = max(inner,inmax) end if call dscal( p, one/temp, shape, 1) errin = zero do j = 1, p errin = max(abs(w(j)-shape(j))/(one+shape(j)), errin) end do do k = 1, G errin = max(abs(scale(k)-w(p+k))/(one+scale(k)), errin) end do if (errin .ge. tol2 .and. inner .lt. maxi2) goto 110 120 continue inmax = max(inner,inmax) smin = smin/temp smax = smax/temp if (.not. EQPRO) call dscal( G, one/dble(n), pro, 1) term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. eps) then lwork = 0 c w(1) = -smin tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = iter maxi(2) = inmax return end if call sgnrng( G, scale, 1, smin, smax) if (smin .le. eps) then lwork = 0 c w(1) = -smin tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = iter maxi(2) = inmax return end if do j = 1, p s(j) = sqrt(shape(j)) end do call sgnrng( p, s, 1, smin, smax) if (smin .le. rteps) then lwork = 0 c w(1) = -smin tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = iter maxi(2) = inmax return end if do k = 1, G c prok = pro(k) scalek = scale(k) const = dble(p)*(pi2log + log(scalek)) do i = 1, n call dcopy( p, x(i,1), n, w(p1), 1) call daxpy( p, (-one), mu(1,k), 1, w(p1), 1) call dgemv( 'N', p, p, one, O(1,1,k), p, w(p1), 1, zero, w, 1) do j = 1, p w(j) = w(j) / s(j) end do sum = ddot(p,w,1,w,1)/scalek c z(i,k) = prok*exp(-(const+sum)/two) z(i,k) = -(const+sum)/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol1 .and. iter .lt. maxi1) goto 100 c smin = sqrt(smin) c smax = sqrt(smax) c rcmin = FLMAX c do k = 1, G c temp = sqrt(scale(k)) c rcmin = min(rcmin,(temp*smin)/(one+temp*smax)) c end do lwork = 0 c w(1) = rcmin tol(1) = err tol(2) = errin eps = hood maxi(1) = iter maxi(2) = inmax return end subroutine mevevp( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, lwork, * mu, scale, shape, O, pro, w, s) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi(2), lwork c double precision pshrnk, pmu(p), pscale(p,p), pdof double precision pshrnk, pmu(*), pscale(p,*), pdof double precision Vinv, eps, tol(2) c double precision x(n,p), z(n,G[+1]), w(lwork), s(p) double precision x(n,*), z(n, * ), w( * ), s(*) c double precision mu(p,G), pro(G[+1]) double precision mu(p,*), pro( * ) c double precision scale(G), shape(p), O(p,p,G) double precision scale(*), shape(*), O(p,p,*) integer maxi1, maxi2, p1, inmax, iter integer nz, i, j, k, l, j1, info, inner double precision tol1, tol2, dnp, term, rteps double precision errin, smin, smax, sumz, tmin, tmax double precision cs, sn, hold, hood, err, zsum double precision const, temp, sum, prok, scalek double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision ddot external ddot double precision dummy(1) c----------------------------------------------------------------------------- if (pshrnk .lt. zero) pshrnk = zero pdof = pdof*1.d0 maxi1 = maxi(1) maxi2 = maxi(2) if (maxi1 .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G end if eps = max(eps,zero) rteps = sqrt(eps) tol1 = max(tol(1),zero) tol2 = max(tol(2),zero) p1 = p + 1 dnp = dble(n*p) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX errin = FLMAX inmax = 0 inner = 0 iter = 0 100 continue zsum = one l = 0 do k = 1, G c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do j = 1, p call dcopy( p, pscale(1,j), 1, O(1,j,k), 1) end do sumz = zero do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sumz / dble(n) zsum = min(zsum,sumz) if (sumz .gt. rteps) then call dscal( p, (one/sumz), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) end do call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) term = sumz+pshrnk const = (sumz*pshrnk)/term call dscal( p, sqrt(const), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) call dscal( p, sumz/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) call dgesvd( 'N', 'O', p, p, O(1,1,k), p, z(1,k), * dummy, 1, dummy, 1, w, lwork, info) if (info .ne. 0) then l = info else do j = 1, p temp = z(j,k) z(j,k) = temp*temp end do end if end if end do iter = iter + 1 if (l .ne. 0 .or. zsum .le. rteps) then lwork = l c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) if (l .ne. 0) then eps = FLMAX else eps = -FLMAX end if goto 200 end if if (iter .eq. 1) then c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) do j = 1, p sum = zero do k = 1, G sum = sum + z(j,k) end do shape(j) = sum end do call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then eps = FLMAX goto 200 return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then eps = FLMAX goto 200 return end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if do k = 1, G scale(k) = temp / (pro(k)*dble(n)) end do if (temp .le. eps) then eps = FLMAX goto 200 return end if call dscal( p, one/temp, shape, 1) end if inner = 0 errin = zero if (maxi2 .le. 0) goto 120 110 continue call dcopy( p, shape, 1, w , 1) call dcopy( G, scale, 1, w(p1), 1) call sgnrng( p+G, w, 1, smin, smax) if (smin .le. zero) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) do k = 1, G sum = zero do j = 1, p if (w(j) .le. z(j,k) .and. z(j,k) .lt. w(j)*rteps) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if sum = sum + z(j,k)/w(j) end do temp = sum/(pro(k)*dble(n*p)) scale(k) = temp do j = 1, p if (temp .le. z(j,k) .and. z(j,k) .lt. temp*rteps) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if shape(j) = shape(j) + z(j,k)/temp end do end do inner = inner + 1 call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then eps = FLMAX goto 200 return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then eps = FLMAX goto 200 return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if if (temp .le. eps) then eps = FLMAX goto 200 end if call dscal( p, one/temp, shape, 1) errin = zero do j = 1, p errin = max(abs(w(j)-shape(j))/(one+shape(j)), errin) end do do k = 1, G errin = max(abs(scale(k)-w(p+k))/(one+scale(k)), errin) end do if (errin .ge. tol2 .and. inner .lt. maxi2) goto 110 120 continue inmax = max(inner,inmax) smin = smin/temp smax = smax/temp term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. eps) then eps = FLMAX goto 200 return end if call sgnrng( G, scale, 1, smin, smax) if (smin .le. eps) then eps = FLMAX goto 200 return end if do j = 1, p s(j) = sqrt(shape(j)) end do call sgnrng( p, s, 1, smin, smax) if (smin .le. rteps) then eps = FLMAX goto 200 return end if do k = 1, G c prok = pro(k) scalek = scale(k) const = dble(p)*(pi2log + log(scalek)) do i = 1, n call dcopy( p, x(i,1), n, w(p1), 1) call daxpy( p, (-one), mu(1,k), 1, w(p1), 1) call dgemv( 'N', p, p, one, O(1,1,k), p, w(p1), 1, zero, w, 1) do j = 1, p w(j) = w(j) / s(j) end do sum = ddot(p,w,1,w,1)/scalek c z(i,k) = prok*exp(-(const+sum)/two) z(i,k) = -(const+sum)/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol1 .and. iter .lt. maxi1) goto 100 c smin = sqrt(smin) c smax = sqrt(smax) c rcmin = FLMAX c do k = 1, G c temp = sqrt(scale(k)) c rcmin = min(rcmin,(temp*smin)/(one+temp*smax)) c end do c w(1) = rcmin lwork = 0 eps = hood 200 continue tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = inmax return end subroutine msvev ( x, z, n, p, G, w, lwork, maxi, tol, * mu, scale, shape, O, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G, maxi, lwork double precision tol c double precision x(n,p), z(n,G), w(max(4*p,5*p-4,p+G)) double precision x(n,*), z(n,*), w(*) c double precision scale(G), shape(p), O(p,p,G), mu(p,G), pro(G) double precision scale(*), shape(*), O(p,p,*), mu(p,*), pro(*) integer p1, i, j, k, j1, inner, info double precision temp double precision err, sum, smin, smax, cs, sn double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision BIGLOG parameter (BIGLOG = 709.d0) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c----------------------------------------------------------------------------- tol = max(tol,zero) p1 = p + 1 err = FLMAX inner = 0 c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) do k = 1, G dummy(1) = zero c call dcopy( p, zero, 0, mu(1,k), 1) call dcopy( p, dummy, 0, mu(1,k), 1) do j = 1, p c call dcopy( p, zero, 0, O(1,j,k), 1) call dcopy( p, dummy, 0, O(1,j,k), 1) end do sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sum if (sum .ge. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) if (lwork .gt. 0) then do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) end do call dgesvd( 'N', 'O', p, p, O(1,1,k), p, z(1,k), * dummy, 1, dummy, 1, w, lwork, info) if (info .ne. 0) then inner = info else do j = 1, p temp = z(j,k) temp = temp*temp shape(j) = shape(j) + temp z(j,k) = temp end do end if end if else err = zero c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do c inner iteration estimates scale and shape c pro now contains n*pro if (inner .ne. 0 .or. err .eq. zero) then lwork = inner c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if lwork = 0 call sgnrng( p, shape, 1, smin, smax) if (smin .eq. zero) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if do k = 1, G scale(k) = temp / (pro(k)*dble(n)) end do if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if call dscal( p, one/temp, shape, 1) c iteration to estimate scale and shape c pro now contains n*pro if (maxi .le. 0) goto 200 100 continue call dcopy( p, shape, 1, w , 1) call dcopy( G, scale, 1, w(p1), 1) call absrng( p, w, 1, smin, smax) if (smin .le. one .and. one .ge. smin*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) do k = 1, G sum = zero do j = 1, p sum = sum + z(j,k)/w(j) end do temp = (sum/pro(k))/dble(p) scale(k) = temp if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if do j = 1, p shape(j) = shape(j) + z(j,k)/temp end do end do inner = inner + 1 call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if c normalize the shape matrix sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .ge. BIGLOG) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if call dscal( p, one/temp, shape, 1) err = zero do j = 1, p err = max(abs(w(j)-shape(j))/(one+shape(j)), err) end do do k = 1, G err = max(abs(scale(k)-w(p+k))/(one+scale(k)), err) end do if (err .ge. tol .and. inner .lt. maxi) goto 100 200 continue call dscal( G, one/dble(n), pro, 1) tol = err maxi = inner return end subroutine msvevp( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * w, lwork, maxi, tol, * mu, scale, shape, O, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G, maxi, lwork c double precision pshrnk, pmu(p), pscale(p,p), pdof double precision pshrnk, pmu(*), pscale(p,*), pdof double precision tol c double precision x(n,p), z(n,G), w(lwork) double precision x(n,*), z(n,*), w( * ) c double precision mu(p,G), pro(G) double precision mu(p,*), pro(*) c double precision scale(G), shape(p), O(p,p,G) double precision scale(*), shape(*), O(p,p,*) integer p1, i, j, k, l, j1, inner, info double precision sum, term, temp, err, smin, smax double precision sumz, cs, sn, const double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c----------------------------------------------------------------------------- if (maxi .le. 0) return if (pshrnk .le. zero) pshrnk = zero pdof = pdof*1.d0 tol = max(tol,zero) p1 = p + 1 err = FLMAX inner = 0 l = 0 c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) do k = 1, G c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do j = 1, p call dcopy( p, pscale(1,j), 1, O(1,j,k), 1) end do sumz = zero do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sumz / dble(n) if (sumz .ge. one .or. one .lt. sumz*FLMAX) then call dscal( p, (one/sumz), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) end do call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) term = sumz+pshrnk const = (sumz*pshrnk)/term call dscal( p, sqrt(const), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) call dscal( p, sumz/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) call dgesvd( 'N', 'O', p, p, O(1,1,k), p, z(1,k), * dummy, 1, dummy, 1, w, lwork, info) if (info .ne. 0) then l = info else do j = 1, p temp = z(j,k) temp = temp*temp shape(j) = shape(j) + temp z(j,k) = temp end do end if else err = zero c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (l .ne. 0 .or. err .eq. zero) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if do k = 1, G scale(k) = temp / (pro(k)*dble(n)) end do if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if call dscal( p, one/temp, shape, 1) if (maxi .le. 0) goto 200 100 continue call dcopy( p, shape, 1, w , 1) call dcopy( G, scale, 1, w(p1), 1) call sgnrng( p+G, w, 1, smin, smax) if (smin .le. zero) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) do k = 1, G sum = zero do j = 1, p if (w(j) .le. z(j,k) .and. z(j,k) .ge. w(j)*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if sum = sum + z(j,k)/w(j) end do temp = sum/(pro(k)*dble(n*p)) scale(k) = temp do j = 1, p if (temp .le. z(j,k) .and. z(j,k) .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if shape(j) = shape(j) + z(j,k)/temp end do end do inner = inner + 1 call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .ge. BIGLOG) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if call dscal( p, one/temp, shape, 1) err = zero do j = 1, p err = max(abs(w(j)-shape(j))/(one+shape(j)), err) end do do k = 1, G err = max(abs(scale(k)-w(p+k))/(one+scale(k)), err) end do if (err .ge. tol .and. inner .lt. maxi) goto 100 200 continue lwork = l tol = err maxi = inner return end subroutine esvii ( x, mu, sigsq, pro, n, p, G, Vinv, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G double precision hood, Vinv c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), sigsq(G), pro(G[+1]) double precision mu(p,*), sigsq(*), pro( * ) integer i, j, k, nz double precision sum, temp, const, tmin, tmax double precision prok, sigsqk, sigmin double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMAX parameter (RTMAX = 1.340780792994260d154) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c----------------------------------------------------------------------------- call sgnrng( G, sigsq, 1, sigmin, temp) if (sigmin .le. zero) then hood = FLMAX return end if do k = 1, G c prok = pro(k) sigsqk = sigsq(k) const = dble(p)*(pi2log+log(sigsq(k))) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) if (abs(temp) .ge. RTMAX) then hood = FLMAX return end if if (abs(temp) .gt. RTMIN) sum = sum + temp*temp end do c z(i,k) = prok*exp(-(const+sum/sigsqk)/two) if (sigsqk .lt. one .and. sum .ge. sigsqk*FLMAX) then hood = FLMAX return end if z(i,k) = -(const+sum/sigsqk)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine hcvii ( x, n, p, ic, ng, ns, ALPHA, v, nd, d) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, ic(n), ng, ns, nd c double precision x(n,p), v(p). d(*), ALPHA double precision x(n,*), v(*), d(*), ALPHA integer lg, ld, ll, lo, ls, i, j, k, m integer ni, nj, nij, nopt, niop, njop integer ij, ici, icj, iopt, jopt, iold double precision ALFLOG double precision qi, qj, qij, ri, rj, rij, si, sj double precision tracei, tracej, trcij, trop double precision termi, termj, trmij, tmop double precision dij, dopt, siop, sjop double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision sqrthf parameter (sqrthf = .70710678118654757274d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision EPSMAX parameter (EPSMAX = 2.2204460492503131d-16) double precision ddot external ddot c------------------------------------------------------------------------------ iopt = 0 niop = 0 njop = 0 nopt = 0 tmop = 0.d0 trop = 0.d0 lg = ng ld = (ng*(ng-1))/2 ll = nd-ng lo = nd if (ng .eq. 1) return ALPHA = max(ALPHA,EPSMAX) ALFLOG = log(ALPHA) c call intpr( 'ic', -1, ic, n) c group heads should be first among rows of x i = 1 j = 2 1 continue icj = ic(j) if (icj .ne. j) goto 2 if (j .eq. lg) goto 3 i = j j = j + 1 goto 1 2 continue k = i m = j + 1 do j = m, n icj = ic(j) if (icj .gt. k) then k = k + 1 call dswap( p, x(k,1), n, x(j,1), n) ic(j) = ic(k) ic(k) = icj end if end do 3 continue c set up pointers do j = 1, n i = ic(j) if (i .ne. j) then c update sum of squares k = ic(i) if (k .eq. 1) then ic(i) = j ic(j) = 2 call dscal( p, sqrthf, x(i,1), n) call dscal( p, sqrthf, x(j,1), n) call dcopy( p, x(j,1), n, v, 1) call daxpy( p, (-one), x(i,1), n, v, 1) call daxpy( p, one, x(j,1), n, x(i,1), n) c call dcopy( p, FLMAX, 0, x(j,1), n) c x(j,1) = ddot( p, v, 1, v, 1) / two x(j,1) = ddot( p, v, 1, v, 1) else ic(j) = 0 ni = ic(k) ic(k) = ni + 1 ri = dble(ni) rij = dble(ni+1) qj = one/rij qi = ri*qj si = sqrt(qi) sj = sqrt(qj) call dcopy( p, x(j,1), n, v, 1) call dscal( p, si, v, 1) call daxpy( p, (-sj), x(i,1), n, v, 1) c x(k,1) = qi*x(k,1) + qj*ddot(p, v, 1, v, 1) x(k,1) = x(k,1) + ddot(p, v, 1, v, 1) call dscal( p, si, x(i,1), n) call daxpy( p, sj, x(j,1), n, x(i,1), n) c call dcopy( p, FLMAX, 0, x(j,1), n) end if else ic(j) = 1 end if end do c store terms also so as not to recompute them do k = 1, ng i = ic(k) if (i .ne. 1) then ni = ic(i) ri = dble(ni) c x(i,2) = ri*log(x(i,1)+ALPHA) x(i,2) = ri*log((x(i,1)+ALPHA)/ri) end if end do c call intpr( 'ic', -1, ic, n) c call dblepr( 'trace', -1, x(1,1), n) c call dblepr( 'term', -1, x(1,2), n) c compute change in likelihood and determine minimum dopt = FLMAX ij = 0 do j = 2, ng nj = ic(j) if (nj .eq. 1) then tracej = zero termj = ALFLOG rj = one else tracej = x(nj,1) termj = x(nj,2) nj = ic(nj) rj = dble(nj) end if do i = 1, (j-1) ni = ic(i) if (ni .eq. 1) then tracei = zero termi = ALFLOG ri = one else tracei = x(ni,1) termi = x(ni,2) ni = ic(ni) ri = dble(ni) end if nij = ni + nj rij = dble(nij) qij = one/rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) call dcopy(p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) c trcij = (qi*tracei + qj*tracej) + qij*ddot(p,v,1,v,1) trcij = (tracei + tracej) + ddot(p,v,1,v,1) c trmij = rij*log(trcij+ALPHA) trmij = rij*log((trcij+ALPHA)/rij) dij = trmij - (termi + termj) ij = ij + 1 d(ij) = dij if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij nopt = nij niop = ni njop = nj siop = si sjop = sj iopt = i jopt = j end if end do end do c call dblepr( 'dij', -1, d, (l*(l-1))/2) if (ns .eq. 1) then if (iopt .lt. jopt) then x(1,1) = dble(iopt) x(1,2) = dble(jopt) else x(1,1) = dble(jopt) x(1,2) = dble(iopt) end if d(1) = dopt return end if if (niop .ne. 1) ic(ic(iopt)) = 0 if (njop .ne. 1) ic(ic(jopt)) = 0 ls = 1 100 continue c if (.false.) then c ij = 1 c jj = 1 c do j = 2, n c nj = ic(j) c if (nj .ne. 0 .and. abs(nj) .le. n) then c call dblepr( 'dij', -1, d(ij), jj) c ij = ij + jj c jj = jj + 1 c end if c end do c end if call dscal( p, siop, x(iopt,1), n) call daxpy( p, sjop, x(jopt,1), n, x(iopt,1), n) if (jopt .ne. lg) then call wardsw( jopt, lg, d) call dcopy( p, x(lg,1), n, x(jopt,1), n) m = ic(jopt) ic(jopt) = ic(lg) ic(lg) = m end if ic(iopt) = lg ic(lg) = nopt x(lg,1) = trop x(lg,2) = tmop d(lo) = dopt lo = lo - 1 d(lo) = dble(iopt) lo = lo - 1 d(lo) = dble(jopt) lo = lo - 1 lg = lg - 1 ld = ld - lg iold = iopt iopt = -1 jopt = -1 dopt = FLMAX ni = nopt ri = dble(ni) tracei = trop termi = tmop ij = ((iold-1)*(iold-2))/2 if (iold .gt. 1) then do j = 1, (iold - 1) nj = ic(j) if (nj .ne. 1) then tracej = x(nj,1) termj = x(nj,2) nj = ic(nj) rj = dble(nj) else tracej = zero termj = ALFLOG rj = one end if nij = ni + nj rij = dble(nij) qij = one/rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) call dcopy( p, x(iold,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) c trcij = (qi*tracei + qj*tracej) + qij*ddot(p,v,1,v,1) trcij = (tracei + tracej) + ddot(p,v,1,v,1) c trmij = rij*log(trcij+ALPHA) trmij = rij*log((trcij+ALPHA)/rij) dij = trmij - (termi + termj) ij = ij + 1 d(ij) = dij if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij iopt = j jopt = iold nopt = nij niop = ni njop = nj sjop = si siop = sj end if end do end if if (iold .lt. lg) then i = iold ij = ij + i do j = (iold + 1), lg nj = ic(j) if (nj .ne. 1) then tracej = x(nj,1) termj = x(nj,2) nj = ic(nj) rj = dble(nj) else tracej = zero termj = ALFLOG rj = one end if nij = ni + nj rij = dble(nij) qij = one /rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) call dcopy( p, x(iold,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) c trcij = (qi*tracei + qj*tracej) + qij*ddot(p,v,1,v,1) trcij = (tracei + tracej) + ddot(p,v,1,v,1) c trmij = rij*log(trcij+ALPHA) trmij = rij*log((trcij+ALPHA)/rij) dij = trmij - (termi + termj) d(ij) = dij if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij iopt = iold jopt = j nopt = nij niop = ni njop = nj siop = si sjop = sj end if ij = ij + i i = j end do end if c update d and find max jopt = 2 iopt = 1 dopt = d(1) if (lg .eq. 2) goto 900 ij = 1 dopt = d(1) do i = 2, ld qi = d(i) if (qi .le. dopt) then ij = i dopt = qi end if end do if (ij .gt. 1) then do i = 2, ij iopt = iopt + 1 if (iopt .ge. jopt) then jopt = jopt + 1 iopt = 1 end if end do end if i = ic(iopt) j = ic(jopt) if (iopt .ne. iold .and. jopt .ne. iold) then if (i .ne. 1) then tracei = x(i,1) termi = x(i,2) niop = ic(i) ri = dble(niop) else tracei = zero termi = ALFLOG niop = 1 ri = one end if if (j .ne. 1) then tracej = x(j,1) termj = x(j,2) njop = ic(j) rj = dble(njop) else tracej = zero termj = ALFLOG njop = 1 rj = one end if nopt = niop + njop rij = dble(nopt) qij = one/rij qi = ri*qij qj = rj*qij siop = sqrt(qi) sjop = sqrt(qj) call dcopy( p, x(iopt,1), n, v, 1) call dscal( p, sjop, v, 1) call daxpy( p, (-siop), x(jopt,1), n, v, 1) c trop = (qi*tracei + qj*tracej) + qij*ddot(p,v,1,v,1) trop = (tracei + tracej) + ddot(p,v,1,v,1) c tmop = rij*log(trop+ALPHA) tmop = rij*log((trop+ALPHA)/rij) end if ls = ls + 1 if (ls .eq. ns) goto 900 goto 100 900 continue d(lo) = dopt lo = lo - 1 d(lo) = dble(iopt) lo = lo - 1 d(lo) = dble(jopt) do i = 1, ng ic(i) = i end do lo = nd - 1 ld = lo si = d(lo) lo = lo - 1 sj = d(lo) ic(int(sj)) = ng if (si .lt. sj) then x(1,1) = si x(1,2) = sj else x(1,1) = sj x(1,2) = si end if lg = ng + 1 do k = 2, ns lo = lo - 1 d(ld) = d(lo) ld = ld - 1 lo = lo - 1 i = int(d(lo)) ici = ic(i) lo = lo - 1 j = int(d(lo)) icj = ic(j) if (ici .gt. icj) ic(i) = icj ic(j) = ic(lg-k) if (ici .lt. icj) then x(k,1) = dble(ici) x(k,2) = dble(icj) else x(k,1) = dble(icj) x(k,2) = dble(ici) end if end do ld = nd lo = 1 do k = 1, ns si = d(lo) d(lo) = d(ld) d(ld) = si ld = ld - 1 lo = lo + 1 end do return end subroutine mevii ( EQPRO, x, n, p, G, Vinv, z, maxi, tol, eps, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi double precision Vinv, eps, tol c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), sigsq(G), pro(G[+1]) double precision mu(p,*), sigsq(*), pro( * ) integer nz, iter, i, j, k double precision sumz, sum, temp, const, term, zsum double precision sigmin, sigsqk, hold, hood, err double precision prok, tmin, tmax, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c----------------------------------------------------------------------------- if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 zsum = one do k = 1, G sumz = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do if (.not. EQPRO) pro(k) = sumz / dble(n) zsum = min(sumz,zsum) if (sumz .gt. rteps) then call dscal( p, (one/sumz), mu(1,k), 1) sigsqk = zero do i = 1, n sum = zero do j = 1, p temp = abs(x(i,j) - mu(j,k)) if (temp .gt. RTMIN) sum = sum + temp*temp end do if (sqrt(z(i,k))*sqrt(sum) .gt. RTMIN) * sigsqk = sigsqk + z(i,k)*sum z(i,k) = sum end do sigsq(k) = (sigsqk/sumz)/dble(p) else sigsq(k) = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if call sgnrng( G, sigsq, 1, sigmin, temp) if (sigmin .le. eps) then tol = err eps = FLMAX maxi = iter return end if do k = 1, G c temp = pro(k) sigsqk = sigsq(k) const = dble(p)*(pi2log+log(sigsqk)) do i = 1, n c z(i,k) = temp*exp(-(const+z(i,k)/sigsqk)/two) z(i,k) = -(const+z(i,k)/sigsqk)/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do const = zero - tmax sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) + const if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)-const) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine meviip( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof double precision Vinv, eps, tol c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), sigsq(G), pro(G[+1]) double precision mu(p,*), sigsq(*), pro( * ) integer nz, iter, i, j, k double precision sumz, sum, temp, const, term, zsum double precision sigmin, sigsqk, hold, hood, err double precision prok, tmin, tmax, rteps double precision pmupmu, cmu, cgam, rmu, rgam double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision ddot, dlngam external ddot, dlngam double precision dummy(1) c----------------------------------------------------------------------------- if (pshrnk .lt. zero) pshrnk = zero if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 pmupmu = ddot(p,pmu,1,pmu,1) 100 continue iter = iter + 1 zsum = one do k = 1, G sumz = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do if (.not. EQPRO) pro(k) = sumz / dble(n) zsum = min(sumz,zsum) if (sumz .gt. rteps) then call dscal( p, (one/sumz), mu(1,k), 1) sigsqk = pscale do i = 1, n sum = zero do j = 1, p temp = abs(x(i,j) - mu(j,k)) if (temp .gt. RTMIN) sum = sum + temp*temp end do if (sqrt(z(i,k))*sqrt(sum) .gt. RTMIN) * sigsqk = sigsqk + z(i,k)*sum end do temp = pmupmu + ddot(p, mu(1,k), 1, mu(1,k), 1) temp = temp - two*ddot(p,mu(1,k),1,pmu,1) const = sumz+pshrnk sigsqk = sigsqk + ((sumz*pshrnk)/const) * temp c sigsq(k) = sigsqk/(pdof+(sumz+one)*dble(p)+two) temp = pdof+sumz*dble(p)+two if (pshrnk .gt. zero) temp = temp + dble(p) sigsq(k) = sigsqk/temp call dscal( p, sumz/const, mu(1,k), 1) call daxpy( p, pshrnk/const, pmu, 1, mu(1,k), 1) else sigsq(k) = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if call sgnrng( G, sigsq, 1, sigmin, temp) if (sigmin .le. eps) then tol = err eps = FLMAX maxi = iter return end if do k = 1, G sigsqk = sigsq(k) const = dble(p)*(pi2log+log(sigsqk)) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + temp*temp end do z(i,k) = -(const+sum/sigsqk)/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do const = zero - tmax sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) + const if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)-const) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter if (pshrnk .gt. zero) then cmu = dble(p)*(log(pshrnk)-pi2log)/two const = pdof/two cgam = const*log(pscale/two)-dlngam(const) rmu = zero rgam = zero do k = 1, G term = log(sigsq(k)) temp = pmupmu + ddot( p, mu(1,k), 1, mu(1,k), 1) temp = temp - two*ddot( p, mu(1,k), 1, pmu, 1) rmu = rmu + (pshrnk*temp)/sigsq(k) rgam = rgam + ((pdof+3.d0)*term - (pscale/sigsq(k))) end do rmu = -rmu /two rgam = -rgam/two pdof = (dble(G)*cmu+rmu) + (dble(G)*cgam+rgam) else pdof = FLMAX end if return end subroutine msvii ( x, z, n, p, G, mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision x(n,p), z(n,G), mu(p,G), sigsq(G), pro(G) double precision x(n,*), z(n,*), mu(p,*), sigsq(*), pro(*) integer i, j, k double precision sum, sumz, temp, sigsqk double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision dummy(1) c----------------------------------------------------------------------------- do k = 1, G sumz = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sumz / dble(n) if (sumz .ge. one .or. one .le. sumz*FLMAX) then call dscal( p, (one/sumz), mu(1,k), 1) sigsqk = zero do i = 1, n sum = zero do j = 1, p temp = abs(x(i,j) - mu(j,k)) if (temp .gt. RTMIN) sum = sum + temp*temp end do if (sqrt(z(i,k))*sqrt(sum) .gt. RTMIN) * sigsqk = sigsqk + z(i,k)*sum end do temp = sumz*dble(p) if (temp .ge. one .or. sigsqk .le. temp*FLMAX) then sigsq(k) = sigsqk/temp else sigsq(k) = FLMAX end if else sigsq(k) = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do return end subroutine msviip( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof c double precision x(n,p), z(n,G) double precision x(n,*), z(n,*) c double precision mu(p,G), sigsq(G), pro(G) double precision mu(p,*), sigsq(*), pro(*) integer i, j, k double precision sumz, sum, temp double precision sigsqk, const, pmupmu double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision ddot external ddot double precision dummy(1) c----------------------------------------------------------------------------- if (pshrnk .lt. zero) pshrnk = zero pmupmu = ddot(p,pmu,1,pmu,1) do k = 1, G sumz = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sumz / dble(n) if (sumz .ge. one .or. one .lt. sumz*FLMAX) then call dscal( p, (one/sumz), mu(1,k), 1) sigsqk = pscale do i = 1, n sum = zero do j = 1, p temp = abs(x(i,j) - mu(j,k)) if (temp .gt. RTMIN) sum = sum + temp*temp end do if (sqrt(z(i,k))*sqrt(sum) .gt. RTMIN) * sigsqk = sigsqk + z(i,k)*sum end do temp = pmupmu + ddot(p, mu(1,k), 1, mu(1,k), 1) temp = temp - two*ddot(p,mu(1,k),1,pmu,1) const = sumz+pshrnk sigsqk = sigsqk + ((sumz*pshrnk)/const) * temp temp = pdof+sumz*dble(p)+two if (pshrnk .gt. zero) temp = temp + dble(p) sigsq(k) = sigsqk/temp call dscal( p, sumz/const, mu(1,k), 1) call daxpy( p, pshrnk/const, pmu, 1, mu(1,k), 1) else sigsq(k) = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do return end subroutine esvvi ( x, mu, scale, shape, pro, n, p, G, * Vinv, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G double precision hood, Vinv c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), scale(G), shape(p,G), pro(G[+1]) double precision mu(p,*), scale(*), shape(p,*), pro( * ) integer i, j, k, nz double precision sum, temp, const, tmin, tmax double precision smin, smax, prok, scalek double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMAX parameter (RTMAX = 1.340780792994260d154) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c----------------------------------------------------------------------------- call sgnrng( G, scale, 1, smin, smax) if (smin .le. zero) then hood = FLMAX return end if do k = 1, G call sgnrng( p, shape(1,k), 1, smin, smax) if (smin .le. zero) then hood = FLMAX return end if temp = sqrt(scale(k)) do j = 1, p shape(j,k) = temp*sqrt(shape(j,k)) end do end do do k = 1, G c prok = pro(k) scalek = scale(k) const = dble(p)*(pi2log+log(scalek)) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) if (shape(j,k) .lt. one .and. * abs(temp) .ge. shape(j,k)*FLMAX) then hood = FLMAX return end if temp = temp/shape(j,k) if (abs(temp) .gt. RTMIN) sum = sum + temp*temp end do c z(i,k) = prok*exp(-(const+sum)/two) z(i,k) = -(const+sum)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine mevvi ( EQPRO, x, n, p, G, Vinv, z, maxi, tol, eps, * mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi double precision Vinv, eps, tol double precision x(n,*), z(n, * ) double precision mu(p,*), scale(*), shape(p,*), pro( * ) integer nz, iter, i, j, k double precision sum, temp, term, scalek, epsmin double precision hold, hood, err, smin, smax, const double precision prok, tmin, tmax, zsum, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c---------------------------------------------------------------------------- if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G end if tol = max(tol,zero) eps = max(eps,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 zsum = one do k = 1, G c call dcopy( p, zero, 0, shape(1,k), 1) c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, shape(1,k), 1) call dcopy( p, dummy, 0, mu(1,k), 1) sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) z(i,k) = sqrt(temp) end do pro(k) = sum c pro(k) now contains n_k zsum = min(zsum,sum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do j = 1, p sum = zero do i = 1, n temp = z(i,k)*(x(i,j) - mu(j,k)) sum = sum + temp*temp end do shape(j,k) = shape(j,k) + sum end do end if end do if (zsum .le. rteps) then call dscal( G, one/dble(n), pro, 1) tol = zsum eps = -FLMAX maxi = iter return end if epsmin = FLMAX do k = 1, G call sgnrng(p, shape(1,k), 1, smin, smax) epsmin = min(smin,epsmin) if (smin .le. zero) then scale(k) = zero else temp = zero do j = 1, p temp = temp + log(shape(j,k)) end do temp = temp/dble(p) if (temp .gt. BIGLOG) then call dscal( G, one/dble(n), pro, 1) tol = zsum eps = FLMAX maxi = iter return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if scale(k) = temp/pro(k) epsmin = min(temp,epsmin) if (temp .le. eps) then call dscal( G, one/dble(n), pro, 1) tol = zsum eps = FLMAX maxi = iter return end if call dscal( p, one/temp, shape(1,k), 1) end if end do if (.not. EQPRO) then call dscal( G, one/dble(n), pro, 1) else if (Vinv .le. zero) then call dscal( G, one/dble(G), pro, 1) end if if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if if (epsmin .le. eps) then tol = err eps = -FLMAX maxi = iter return end if call sgnrng( G, scale, 1, smin, smax) if (smin .le. eps) then tol = err eps = FLMAX maxi = iter return end if do k = 1, G call sgnrng( p, shape(1,k), 1, smin, smax) if (smin .le. eps) then tol = err eps = FLMAX maxi = iter return end if end do do k = 1, G scalek = scale(k) const = dble(p)*(pi2log + log(scalek)) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + (temp*temp)/shape(j,k) end do c z(i,k) = pro(k)*exp(-(const+(sum/scalek))/two) z(i,k) = -(const+(sum/scalek))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine mevvip( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, * mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof double precision Vinv, eps, tol c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), scale(G), shape(p,G), pro(G[+1]) double precision mu(p,*), scale(*), shape(p,*), pro( * ) integer nz, iter, i, j, k double precision sumz, sum, temp, term, scalek, epsmin double precision hold, hood, err, smin, smax, const double precision prok, tmin, tmax, zsum, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c----------------------------------------------------------------------------- if (pshrnk .lt. zero) pshrnk = zero if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 zsum = one do k = 1, G c call dcopy( p, pscale, 0, shape(1,k), 1) c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = pscale call dcopy( p, dummy, 0, shape(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) sumz = zero do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) z(i,k) = sqrt(temp) end do pro(k) = sumz zsum = min(zsum,sumz) if (sumz .gt. rteps) then call dscal( p, (one/sumz), mu(1,k), 1) term = pshrnk+sumz const = (pshrnk*sumz)/term do j = 1, p sum = zero do i = 1, n temp = z(i,k)*(x(i,j) - mu(j,k)) sum = sum + temp*temp end do shape(j,k) = shape(j,k) + sum temp = pmu(j) - mu(j,k) shape(j,k) = shape(j,k) + const*(temp*temp) end do call dscal( p, sumz/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) end if end do if (zsum .le. rteps) then call dscal( G, one/dble(n), pro, 1) tol = zsum eps = -FLMAX maxi = iter return end if c pro(k) now contains n_k epsmin = FLMAX term = pdof+two if (pshrnk .gt. zero) term = term + one do k = 1, G call sgnrng(p, shape(1,k), 1, smin, smax) epsmin = min(smin,epsmin) if (smin .eq. zero) then scale(k) = zero else sum = zero do j = 1, p sum = sum + log(shape(j,k)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then call dscal( G, one/dble(n), pro, 1) tol = zsum eps = FLMAX maxi = iter return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if c pro(k) contains n_k scale(k) = temp/(pro(k)+term) epsmin = min(temp,epsmin) if (temp .le. eps) then call dscal( G, one/dble(n), pro, 1) tol = zsum eps = FLMAX maxi = iter return end if call dscal( p, one/temp, shape(1,k), 1) end if end do if (.not. EQPRO) then call dscal( G, one/dble(n), pro, 1) else if (Vinv .le. zero) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if if (epsmin .le. eps) then tol = err eps = FLMAX maxi = iter return end if call sgnrng( G, scale, 1, smin, smax) if (smin .le. eps) then tol = err eps = FLMAX maxi = iter return end if do k = 1, G call sgnrng( p, shape(1,k), 1, smin, smax) if (smin .le. eps) then tol = err eps = FLMAX maxi = iter return end if end do do k = 1, G scalek = scale(k) const = dble(p)*(pi2log + log(scalek)) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + (temp*temp)/shape(j,k) end do c z(i,k) = pro(k)*exp(-(const+(sum/scalek))/two) z(i,k) = -(const+(sum/scalek))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine msvvi ( x, z, n, p, G, mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision x(n,p), z(n,G) double precision x(n,*), z(n,*) c double precision mu(p,G), scale(G), shape(p,G), pro(G) double precision mu(p,*), scale(*), shape(p,*), pro(*) integer i, j, k double precision sum, temp, smin, smax double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c----------------------------------------------------------------------------- do k = 1, G dummy(1) = zero c call dcopy( p, zero, 0, shape(1,k), 1) call dcopy( p, dummy, 0, shape(1,k), 1) sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) z(i,k) = sqrt(temp) end do pro(k) = sum if (sum .ge. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) else c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do c pro(k) now contains n_k do k = 1, G if (mu(1,k) .ne. FLMAX) then do j = 1, p sum = zero do i = 1, n temp = z(i,k)*(x(i,j) - mu(j,k)) sum = sum + temp*temp end do shape(j,k) = shape(j,k) + sum end do else c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) end if end do do k = 1, G call sgnrng(p, shape(1,k), 1, smin, smax) if (smin .le. zero) then scale(k) = zero c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if if (smax .eq. FLMAX) then scale(k) = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if sum = zero do j = 1, p sum = sum + log(shape(j,k)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then scale(k) = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if if (temp .lt. SMALOG) then temp = zero scale(k) = zero c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if temp = exp(temp) if (pro(k) .lt. one .and. temp .ge. pro(k)*FLMAX) then scale(k) = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if scale(k) = temp/pro(k) if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if call dscal( p, one/temp, shape(1,k), 1) 100 continue end do call dscal( G, one/dble(n), pro, 1) return end subroutine msvvip( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof c double precision x(n,p), z(n,G) double precision x(n,*), z(n,*) c double precision mu(p,G), scale(G), shape(p,G), pro(G) double precision mu(p,*), scale(*), shape(p,*), pro(*) integer i, j, k double precision sumz, sum, temp, term double precision smin, smax, const double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c----------------------------------------------------------------------------- if (pshrnk .lt. zero) pshrnk = zero do k = 1, G c call dcopy( p, pscale, 0, shape(1,k), 1) dummy(1) = pscale call dcopy( p, dummy, 0, shape(1,k), 1) sumz = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) z(i,k) = sqrt(temp) end do pro(k) = sumz if (sumz .ge. one .or. one .le. sumz*FLMAX) then call dscal( p, (one/sumz), mu(1,k), 1) term = pshrnk+sumz const = (pshrnk*sumz)/term do j = 1, p sum = zero do i = 1, n temp = z(i,k)*(x(i,j) - mu(j,k)) sum = sum + temp*temp end do shape(j,k) = shape(j,k) + sum temp = pmu(j) - mu(j,k) shape(j,k) = shape(j,k) + const*(temp*temp) end do call dscal( p, sumz/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) else c call dcopy( p, FLMAX, 0, mu(1,k), 1) c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) call dcopy( p, dummy, 0, shape(1,k), 1) end if end do c pro(k) now contains n_k do k = 1, G call sgnrng(p, shape(1,k), 1, smin, smax) if (smin .le. zero) then scale(k) = zero c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) else if (smax .eq. FLMAX) then scale(k) = FLMAX else sum = zero do j = 1, p sum = sum + log(shape(j,k)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then scale(k) = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) else if (temp .lt. SMALOG) then temp = zero scale(k) = zero c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) else temp = exp(temp) c pro(k) contains n_k term = pro(k) + pdof + two if (pshrnk .gt. zero) term = term + one scale(k) = temp/term if (temp .ge. one .or. one .le. temp*FLMAX) then call dscal( p, one/temp, shape(1,k), 1) else c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) end if end if end if end do call dscal( G, one/dble(n), pro, 1) return end subroutine esvvv ( CHOL, x, mu, Sigma, pro, n, p, G, Vinv, * w, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c character CHOL logical CHOL c integer n, p, G integer n, p, G double precision hood, Vinv c double precision x(n,p), w(p), z(n,G[+1]) double precision x(n,*), w(*), z(n, * ) c double precision mu(p,G), Sigma(p,p,G), pro(G[+1]) double precision mu(p,*), Sigma(p,p,*), pro( * ) integer nz, p1, info, i, j, k double precision const, detlog, temp, prok, tmin, tmax double precision umin, umax, sum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMAX parameter (RTMAX = 1.340780792994260d154) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision ddot external ddot double precision dummy(1) c----------------------------------------------------------------------------- p1 = p + 1 c if (CHOL .eq. 'N') then if (.not. CHOL) then do k = 1, G call dpotrf( 'U', p, Sigma(1,1,k), p, info) w(1) = dble(info) if (info .ne. 0) then hood = FLMAX return end if end do end if do k = 1, G call absrng( p, Sigma(1,1,k), p1, umin, umax) if (umax .le. one .and. umax .ge. umin*RTMAX) then w(1) = zero hood = FLMAX return end if if (umax .ge. one .and. umin .le. umax*RTMIN) then w(1) = zero hood = FLMAX return end if end do do k = 1, G detlog = zero do j = 1, p detlog = detlog + log(abs(Sigma(j,j,k))) end do const = dble(p)*pi2log/two + detlog c prok = pro(k) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dtrsv( 'U', 'T', 'N', p, Sigma(1,1,k), p, w, 1) temp = ddot( p, w, 1, w, 1)/two c z(i,k) = prok*exp(-(const+temp)) z(i,k) = -(const+temp) end do end do w(1) = zero if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then w(1) = zero hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do w(1) = zero return end subroutine hcvvv ( x, n, p, ic, ng, ns, ALPHA, BETA, * v, u, s, r, nd, d) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, ic(n), ng, ns, nd double precision ALPHA, BETA c double precision x(n,p+1), v(p), u(p,p), s(p,p) c double precision r(p,p), d(ng*(ng-1)/2) double precision x(n,*), v(*), u(p,*), s(p,*) double precision r(p,*), d(*) integer psq, pm1, pp1 integer i, j, k, l, m, ij, iold integer lg, ld, ll, lo, ls integer ici, icj, ni, nj, nij integer nopt, niop, njop, iopt, jopt double precision trcij, trmij, trop, tmop double precision traci, tracj, termi, termj double precision qi, qj, qij, si, sj, sij, ri, rj, rij double precision dij, dopt, siop, sjop double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision rthalf parameter (rthalf = .7071067811865476d0) double precision ddot, vvvtij external ddot, vvvtij double precision BETA0, ALPHA0, ABLOG common /VVVMCL/ BETA0, ALPHA0, ABLOG save /VVVMCL/ double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision EPSMAX parameter (EPSMAX = 2.2204460492503131d-16) double precision dummy(1) c----------------------------------------------------------------------------- iopt = 0 niop = 0 nopt = 0 tmop = 0.d0 trop = 0.d0 lg = ng ld = (ng*(ng-1))/2 ll = nd-ng lo = nd psq = p*p pm1 = p-1 pp1 = p+1 if (ng .eq. 1) return ALPHA = max(ALPHA,EPSMAX) BETA0 = BETA ALPHA0 = ALPHA ABLOG = log(BETA*ALPHA) c call intpr( 'ic', -1, ic, n) c group heads should be first among rows of x i = 1 j = 2 1 continue icj = ic(j) if (icj .ne. j) goto 2 if (j .eq. ng) goto 3 i = j j = j + 1 goto 1 2 continue k = i m = j + 1 do j = m, n icj = ic(j) if (icj .gt. k) then k = k + 1 call dswap( p, x(k,1), n, x(j,1), n) ic(j) = ic(k) ic(k) = icj end if end do 3 continue c set up pointers if (ng .eq. n) goto 4 do j = n, ng+1, -1 icj = ic(j) i = ic(icj) ic(icj) = j if (i .ne. icj) then ic(j) = i else ic(j) = j end if end do 4 continue c call intpr( 'ic', -1, ic, n) c initialize by simulating merges do k = 1, ng j = ic(k) if (j .ne. k) then c non-singleton c call dcopy( psq, zero, 0, r, 1) dummy(1) = zero call dcopy( psq, dummy, 0, r, 1) trcij = zero l = 1 10 continue m = l + 1 qj = one/dble(m) qi = dble(l)*qj si = sqrt(qi) sj = sqrt(qj) call dcopy( p, x(j,1), n, v, 1) call dscal( p, si, v, 1) call daxpy( p, (-sj), x(k,1), n, v, 1) trcij = trcij + ddot( p, v, 1, v, 1) call dscal( p, si, x(k,1), n) call daxpy( p, sj, x(j,1), n, x(k,1), n) call mclrup( m, p, v, r, p) l = m i = ic(j) if (i .eq. j) goto 20 j = i goto 10 20 continue c d(ll+k) = trcij c copy triangular factor into the rows of x j = k m = p do i = 1, min(l-1,p) j = ic(j) call dcopy( m, r(i,i), p, x(j,i), n) m = m - 1 end do ij = j if (l .ge. p) then do m = p, l icj = ic(j) ic(j) = -k j = icj end do end if ic(ij) = n+l x(k, pp1) = zero if (l .ge. 2) then x( k, pp1) = trcij trmij = vvvtij( l, p, r, sj, trcij) x(ic(k),pp1) = trmij end if else ic(k) = 1 c d(ll+k) = zero end if end do c call intpr( 'ic', -1, ic, n) c call dblepr( '', -1, x(1,pp1), n) c call dblepr( 'trac', -1, d(ll+1), ng) c call dblepr( 'term', -1, term, n) c compute change in likelihood and determine minimum dopt = FLMAX ij = 0 do j = 2, ng icj = ic(j) nj = 1 if (icj .eq. 1) then tracj = zero termj = ABLOG do i = 1, (j-1) ni = 1 ici = ic(i) if (ici .eq. 1) then nij = 2 rij = two si = rthalf sj = rthalf sij = rthalf call dcopy( p, x(i,1), n, v, 1) call daxpy( p, (-one), x(j,1), n, v, 1) call dscal( p, rthalf, v, 1) c trcij = half*ddot( p, v, 1, v, 1) trcij = ddot( p, v, 1, v, 1) call dcopy( p, v, 1, u, p) c trmij = rij*log(BETA*trcij+ALPHA) trmij = two*log(BETA*(trcij+ALPHA)/two) termi = ABLOG else m = p l = ici 110 continue call dcopy( m, x(l,ni), n, u(ni,ni), p) ni = ni + 1 m = m - 1 l = ic(l) if (l .le. n) goto 110 ni = l - n c traci = d(ll+i) c traci = trac(i) c termi = vvvtrm(i,ni,n,p,ic,x,traci) c termi = term(i) traci = x( i , pp1) termi = x(ic(i), pp1) ri = dble(ni) nij = ni + 1 rij = dble(nij) qij = one/rij qi = ri*qij si = sqrt(qi) sj = sqrt(qij) sij = sj call dcopy(p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) trcij = traci + ddot(p,v,1,v,1) call mclrup( nij, p, v, u, p) trmij = vvvtij( nij, p, u, sij, trcij) end if dij = trmij - (termi + termj) ij = ij + 1 d(ij) = dij if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij nopt = nij niop = ni njop = nj siop = si sjop = sj iopt = i jopt = j m = p do k = 1, min(nij-1,p) call dcopy( m, u(k,k), p, r(k,k), p) m = m - 1 end do end if end do else m = p l = icj 120 continue call dcopy( m, x(l,nj), n, s(nj,nj), p) nj = nj + 1 m = m - 1 l = ic(l) if (l .le. n) goto 120 nj = l - n c tracj = d(ll+j) c termj = vvvtrm(j,nj,n,p,ic,x,tracj) tracj = x( j , pp1) termj = x( ic(j), pp1) rj = dble(nj) do i = 1, (j-1) m = p do k = 1, min(nj-1,p) call dcopy( m, s(k,k), p, u(k,k), p) m = m - 1 end do ni = 1 ici = ic(i) if (ici .eq. 1) then nij = nj + 1 rij = dble(nij) qij = one/rij qi = qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) sij = sqrt(qij) call dcopy(p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) trcij = tracj + ddot(p,v,1,v,1) termi = ABLOG else m = p l = ici k = nj + 1 130 continue call dcopy( m, x(l,ni), n, v, 1) call mclrup( k, m, v, u(ni,ni), p) ni = ni + 1 m = m - 1 l = ic(l) if (l .le. n) goto 130 ni = l - n c traci = d(ll+i) c termi = vvvtrm(i,ni,n,p,ic,x,traci) traci = x( i , pp1) termi = x(ic(i), pp1) ri = dble(ni) nij = ni + nj rij = dble(nij) qij = one/rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) sij = sqrt(qij) call dcopy(p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) trcij = ( traci + tracj) + ddot(p,v,1,v,1) end if call mclrup( nij, p, v, u, p) trmij = vvvtij( nij, p, u, sij, trcij) dij = trmij - (termi + termj) ij = ij + 1 d(ij) = dij if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij nopt = nij niop = ni njop = nj siop = si sjop = sj iopt = i jopt = j m = p do k = 1, min(nij-1,p) call dcopy( m, u(k,k), p, r(k,k), p) m = m - 1 end do end if end do end if end do c if (.false.) then c i = 1 c ij = 1 c do j = 2, ng c call dblepr( 'dij', -1, d(ij), i) c ij = ij + i c i = j c end do c end if if (ns .eq. 1) then if (iopt .lt. jopt) then x(1,1) = iopt x(1,2) = jopt else x(1,1) = jopt x(1,2) = iopt end if d(1) = dopt return end if ls = 1 200 continue call dcopy( p, x(iopt,1), n, v, 1) call dscal( p, siop, v, 1) call daxpy( p, sjop, x(jopt,1), n, v, 1) if (jopt .ne. lg) then call wardsw( jopt, lg, d) call dcopy( p, x(lg,1), n, x(jopt,1), n) m = ic(jopt) icj = ic(lg) if (icj .ne. 1) x( jopt, pp1) = x( lg, pp1) ic(jopt) = icj ic(lg) = m end if if (niop .eq. 1) then ic(iopt) = lg else l = ic(iopt) do k = 1, min(niop-1,p) m = l l = ic(l) end do if (l .lt. n) call intpr("l .lt. n", 8, l, 1) ic(m) = lg end if l = ic(iopt) do k = 1, min(nopt-1,p) call dcopy( p, r(1,1), p, x(l,1), n) m = l l = ic(l) end do ic(m) = nopt + n c call intpr('ic', 2, ic, n) c term(iopt) = tmop c trac(iopt) = trop x(iopt, pp1) = zero if (nopt .ge. 2) then x(iopt,pp1) = trop x(ic(iopt),pp1) = tmop endif call dcopy( p, v, 1, x(iopt,1), n) d(lo) = dopt lo = lo - 1 d(lo) = dble(iopt) lo = lo - 1 d(lo) = dble(jopt) lo = lo - 1 lg = lg - 1 ld = ld - lg iold = iopt dopt = FLMAX ni = nopt ri = dble(ni) termi = tmop traci = trop ij = ((iold-1)*(iold-2))/2 if (iold .gt. 1) then do j = 1, (iold-1) c call dcopy(psq, zero, 0, u, 1) dummy(1) = zero call dcopy(psq, dummy, 0, u, 1) m = p do k = 1, min(ni-1,p) call dcopy(m, r(k,k), p, u(k,k), p) m = m - 1 end do nj = 1 icj = ic(j) if (icj .eq. 1) then nij = ni + 1 rij = dble(nij) qij = one/rij qi = ri*qij si = sqrt(qi) sj = sqrt(qij) sij = sj call dcopy(p, x(j,1), n, v, 1) call dscal( p, si, v, 1) call daxpy( p, (-sj), x(iold,1), n, v, 1) trcij = traci + ddot(p,v,1,v,1) tracj = zero termj = ABLOG else m = p l = icj k = ni + 1 310 continue call dcopy( m, x(l,nj), n, v, 1) call mclrup( k, m, v, u(nj,nj), p) k = k + 1 nj = nj + 1 m = m - 1 l = ic(l) if (l .le. n) goto 310 nj = l - n c call vvvget(j,nj,n,p,ic,x,tracj,termj) tracj = x( j ,pp1) termj = x(ic(j),pp1) rj = dble(nj) nij = ni + nj rij = dble(nij) qij = one/rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) sij = sqrt(qij) call dcopy(p, x(j,1), n, v, 1) call dscal( p, si, v, 1) call daxpy( p, (-sj), x(iold,1), n, v, 1) trcij = ( traci + tracj) + ddot(p,v,1,v,1) end if call mclrup( nij, p, v, u, p) trmij = vvvtij( nij, p, u, sij, trcij) dij = trmij - (termi + termj) ij = ij + 1 d(ij) = dij if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij nopt = nij niop = nj njop = ni siop = sj sjop = si iopt = j jopt = iold m = p do k = 1, min(nij-1,p) call dcopy(m, u(k,k), p, s(k,k), p) m = m - 1 end do end if end do end if if (iold .lt. lg) then i = iold ij = ij + i do j = (iold+1), lg c call dcopy(psq, zero, 0, u, 1) dummy(1) = zero call dcopy(psq, dummy, 0, u, 1) m = p do k = 1, min(ni-1,p) call dcopy(m, r(k,k), p, u(k,k), p) m = m - 1 end do nj = 1 icj = ic(j) if (icj .eq. 1) then nij = ni + 1 rij = dble(nij) qij = one/rij qi = ri*qij si = sqrt(qi) sj = sqrt(qij) sij = sj call dcopy(p, x(j,1), n, v, 1) call dscal( p, si, v, 1) call daxpy( p, (-sj), x(iold,1), n, v, 1) trcij = traci + ddot(p,v,1,v,1) termj = ABLOG else m = p l = icj k = ni + 1 410 continue call dcopy( m, x(l,nj), n, v, 1) call mclrup( k, m, v, u(nj,nj), p) k = k + 1 nj = nj + 1 m = m - 1 l = ic(l) if (l .le. n) goto 410 nj = l - n c call vvvget(j,nj,n,p,ic,x,tracj,termj) tracj = x( j ,pp1) termj = x(ic(j),pp1) rj = dble(nj) nij = ni + nj rij = dble(nij) qij = one/rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) sij = sqrt(qij) call dcopy(p, x(j,1), n, v, 1) call dscal( p, si, v, 1) call daxpy( p, (-sj), x(iold,1), n, v, 1) trcij = ( traci + tracj) + ddot(p,v,1,v,1) end if call mclrup( nij, p, v, u, p) trmij = vvvtij( nij, p, u, sij, trcij) dij = trmij - (termi + termj) d(ij) = dij ij = ij + i i = j if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij nopt = nij niop = ni njop = nj siop = si sjop = sj iopt = iold jopt = j m = p do k = 1, min(nij-1,p) call dcopy(m, u(k,k), p, s(k,k), p) m = m - 1 end do end if end do end if c update d and find max jopt = 2 iopt = 1 dopt = d(1) if (lg .eq. 2) goto 900 ij = 1 do i = 2, ld qi = d(i) if (qi .le. dopt) then ij = i dopt = qi end if end do c call dblepr("d", 1, d, nd) c call dblepr("d", 1, d, ld) if (ij .gt. 1) then do i = 2, ij iopt = iopt + 1 if (iopt .ge. jopt) then jopt = jopt + 1 iopt = 1 end if end do end if dummy(1) = zero do k = 1, p c call dcopy( p, zero, 0, r(1,k), 1) call dcopy( p, dummy, 0, r(1,k), 1) end do if (iopt .ne. iold .and. jopt .ne. iold) then i = iopt j = jopt nj = 1 icj = ic(j) ni = 1 ici = ic(i) if (icj .eq. 1) then termj = ABLOG if (ici .eq. 1) then nij = 2 rij = two si = rthalf sj = rthalf call dcopy(p, x(i,1), n, v, 1) call daxpy( p, (-one), x(j,1), n, v, 1) call dscal( p, rthalf, v, 1) trcij = ddot( p, v, 1, v, 1) call dcopy( p, v, 1, r, p) termi = ABLOG else m = p l = ici 610 continue call dcopy( m, x(l,ni), n, r(ni,ni), p) ni = ni + 1 m = m - 1 l = ic(l) if (l .le. n) goto 610 ni = l - n c call vvvget(i,ni,n,p,ic,x,traci,termi) traci = x( i , pp1) termi = x(ic(i), pp1) ri = dble(ni) nij = ni + 1 rij = dble(nij) qij = one/rij qi = ri*qij si = sqrt(qi) sj = sqrt(qij) call dcopy(p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) trcij = traci + ddot( p, v, 1, v, 1) call mclrup( nij, p, v, r, p) end if else m = p l = icj 620 continue call dcopy( m, x(l,nj), n, r(nj,nj), p) nj = nj + 1 m = m - 1 l = ic(l) if (l .le. n) goto 620 nj = l - n c call vvvget(j,nj,n,p,ic,x,tracj,termj) tracj = x( j , pp1) termj = x(ic(j), pp1) rj = dble(nj) if (ici .eq. 1) then nij = nj + 1 rij = dble(nij) qij = one/rij qj = rj*qij si = sqrt(qij) sj = sqrt(qj) call dcopy(p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) trcij = tracj + ddot( p, v, 1, v, 1) termi = ABLOG else m = p l = ici k = nj + 1 630 continue call dcopy( m, x(l,ni), n, v, 1) call mclrup( k, m, v, r(ni,ni), p) ni = ni + 1 m = m - 1 l = ic(l) if (l .le. n) goto 630 ni = l - n c call vvvget(i,ni,n,p,ic,x,traci,termi) traci = x( i , pp1) termi = x(ic(i), pp1) ri = dble(ni) nij = ni + nj rij = dble(nij) qij = one/rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) call dcopy(p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) trcij = ( traci + tracj) + ddot( p,v,1,v,1) end if call mclrup( nij, p, v, r, p) end if trop = trcij tmop = dopt + (termi + termj) nopt = nij niop = ni njop = nj siop = si sjop = sj else m = p do k = 1, min(nopt-1,p) call dcopy(m, s(k,k), p, r(k,k), p) m = m - 1 end do l = ic(iopt) if (l .ne. 1) then 710 continue if (l .le. n) then l = ic(l) goto 710 end if niop = l-n else niop = 1 end if l = ic(jopt) if (l .ne. 1) then 720 continue if (l .le. n) then l = ic(l) goto 720 end if njop = l-n else njop = 1 end if nopt = niop + njop end if ls = ls + 1 if (ls .eq. ns) goto 900 goto 200 900 continue d(lo) = dopt lo = lo - 1 d(lo) = dble(iopt) lo = lo - 1 d(lo) = dble(jopt) do i = 1, ng ic(i) = i end do lo = nd - 1 ld = lo si = d(lo) lo = lo - 1 sj = d(lo) ic(int(sj)) = ng if (si .lt. sj) then x(1,1) = si x(1,2) = sj else x(1,1) = sj x(1,2) = si end if lg = ng + 1 do k = 2, ns lo = lo - 1 d(ld) = d(lo) ld = ld - 1 lo = lo - 1 i = int(d(lo)) ici = ic(i) lo = lo - 1 j = int(d(lo)) icj = ic(j) if (ici .gt. icj) ic(i) = icj ic(j) = ic(lg-k) if (ici .lt. icj) then x(k,1) = dble(ici) x(k,2) = dble(icj) else x(k,1) = dble(icj) x(k,2) = dble(ici) end if end do ld = nd lo = 1 do k = 1, ns si = d(lo) d(lo) = d(ld) d(ld) = si ld = ld - 1 lo = lo + 1 end do return end double precision function vvvtij( l, p, r, s, trac) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer l, p double precision r(p,*), s, trac double precision detlog double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision det2mc external det2mc double precision BETA, ALPHA, ABLOG common /VVVMCL/ BETA, ALPHA, ABLOG save /VVVMCL/ double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) if (l .le. p) then vvvtij = log(BETA*(trac+ALPHA)/dble(l)) else if (trac .eq. zero) then vvvtij = log((ALPHA*BETA)/dble(l)) else detlog = det2mc( p, r, s) if (detlog .eq. -FLMAX) then vvvtij = log(BETA*(trac+ALPHA)/dble(l)) else if (detlog .le. zero) then vvvtij = log(exp(detlog)+BETA*(trac+ALPHA)/dble(l)) else vvvtij = log(one+exp(-detlog)*(BETA*(trac+ALPHA)/dble(l))) * + detlog end if end if end if vvvtij = dble(l)*vvvtij return end double precision function det2mc( n, u, s) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer k, n double precision q, s double precision u(n,*) double precision zero, two parameter (zero = 0.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) det2mc = zero do k = 1, n q = u(k,k)*s if (abs(q) .le. zero) then det2mc = -FLMAX return end if det2mc = det2mc + log(abs(q)) end do det2mc = two*det2mc return end subroutine mevvv ( EQPRO, x, n, p, G, Vinv, z, maxi, tol, eps, * mu, U, pro, w, S) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi double precision Vinv, eps, tol c double precision x(n,p), z(n,G), w(p) double precision x(n,*), z(n,*), w(*) c double precision mu(p,G), U(p,p,G), pro(G), S(p,p) double precision mu(p,*), U(p,p,*), pro(*), S(p,*) integer nz, p1, iter, i, j, k, l, j1 double precision piterm, hold, rcmin, rteps double precision temp, term, cs, sn, umin, umax double precision sumz, sum, detlog, const, hood, err double precision prok, tmin, tmax, zsum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if piterm = dble(p)*pi2log/two p1 = p + 1 eps = max(eps,zero) rteps = sqrt(eps) tol = max(tol,zero) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX c zero out the lower triangle do k = 1, G do j = 1, p do l = 1, p S(l,j) = U(l,j,k) end do end do i = 1 do j = 2, p c call dcopy( p-i, zero, 0, S(j,i), 1) dummy(1) = zero call dcopy( p-i, dummy, 0, S(j,i), 1) i = j end do do j = 1, p do l = 1, p U(l,j,k) = S(l,j) end do end do end do iter = 0 100 continue iter = iter + 1 zsum = one do k = 1, G do j = 1, p do l = 1, p S(l,j) = U(l,j,k) end do end do dummy(1) = zero do j = 1, p c call dcopy( j, zero, 0, S(1,j), 1) call dcopy( j, dummy, 0, S(1,j), 1) end do c call dcopy( p, zero, 0, mu(1,k), 1) call dcopy( p, dummy, 0, mu(1,k), 1) sumz = zero do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do if (.not. EQPRO) pro(k) = sumz / dble(n) zsum = min(sumz,zsum) if (sumz .gt. rteps) then call dscal( p, (one/sumz), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( S(j,j), w(j), cs, sn) call drot( p-j, S(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( S(p,p), w(p), cs, sn) end do do j = 1, p call dscal( j, one/sqrt(sumz), S(1,j), 1) end do else c call dcopy( p, FLMAX, 0, z(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, z(1,k), 1) end if do j = 1, p do l = 1, p U(l,j,k) = S(l,j) end do end do end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if rcmin = FLMAX do k = 1, G do j = 1, p do l = 1, p S(l,j) = U(l,j,K) end do end do call absrng( p, S, p1, umin, umax) rcmin = min(umin/(one+umax),rcmin) end do if (rcmin .le. rteps) then tol = rcmin eps = FLMAX maxi = iter return end if do k = 1, G do j = 1, p do l = 1, p S(l,j) = U(l,j,K) end do end do c temp = pro(k) detlog = zero do j = 1, p detlog = detlog + log(abs(S(j,j))) end do const = piterm+detlog do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dtrsv( 'U', 'T', 'N', p, S, p, w, 1) sum = ddot( p, w, 1, w, 1)/two c z(i,k) = temp*exp(-(const+sum)) z(i,k) = -(const+sum) end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 c w(1) = rcmin tol = err eps = hood maxi = iter return end subroutine mevvvp( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, mu, U, pro, w, S) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi c double precision pshrnk, pmu(p), pscale(p,p), pdof double precision pshrnk, pmu(*), pscale(p,*), pdof double precision Vinv, eps, tol c double precision x(n,p), z(n,G), w(p) double precision x(n,*), z(n,*), w(*) c double precision mu(p,G), U(p,p,G), pro(G), S(p,p) double precision mu(p,*), U(p,p,*), pro(*), S(p,*) integer nz, p1, iter, i, j, k, l, j1 double precision piterm, hold, rcmin, rteps double precision temp, term, cs, sn, umin, umax double precision sum, sumz, detlog, const, hood, err double precision prok, tmin, tmax double precision cmu, cgam, rmu, rgam, zsum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision twolog parameter (twolog = 0.6931471805599453d0) double precision pilog parameter (pilog = 1.144729885849400d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision ddot, dlngam external ddot, dlngam double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if piterm = dble(p)*pi2log/two p1 = p + 1 eps = max(eps,zero) rteps = sqrt(eps) tol = max(tol,zero) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 zsum = one do k = 1, G do j = 1, p do l = 1, p S(l,j) = U(l,j,k) end do end do do j = 1, p call dcopy( p, pscale(1,j), 1, S(1,j), 1) end do sumz = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do if (.not. EQPRO) pro(k) = sumz / dble(n) zsum = min(sumz,zsum) if (sumz .gt. rteps) then call dscal( p, (one/sumz), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( S(j,j), w(j), cs, sn) call drot( p-j, S(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( S(p,p), w(p), cs, sn) end do call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) const = sumz+pshrnk temp = (sumz*pshrnk)/const call dscal( p, sqrt(temp), w, 1) j = 1 do j1 = 2, p call drotg( S(j,j), w(j), cs, sn) call drot( p-j, S(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( S(p,p), w(p), cs, sn) do j = 1, p temp = pdof+sumz+dble(p)+two call dscal( j, one/sqrt(temp), S(1,j), 1) end do call dscal( p, sumz/const, mu(1,k), 1) call daxpy( p, pshrnk/const, pmu, 1, mu(1,k), 1) else c call dcopy( p, FLMAX, 0, z(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, z(1,k), 1) end if do j = 1, p do l = 1, p U(l,j,k) = S(l,j) end do end do end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if rcmin = FLMAX do k = 1, G do j = 1, p do l = 1, p S(l,j) = U(l,j,K) end do end do call absrng( p, S, p1, umin, umax) rcmin = min(umin/(one+umax),rcmin) end do if (rcmin .le. rteps) then tol = rcmin eps = FLMAX maxi = iter return end if rmu = zero rgam = zero do k = 1, G c temp = pro(k) do j = 1, p do l = 1, p S(l,j) = U(l,j,k) end do end do detlog = zero do j = 1, p detlog = detlog + log(abs(S(j,j))) end do rmu = rmu - detlog rgam = rgam - (pdof+dble(p)+one)*detlog const = piterm+detlog do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dtrsv( 'U', 'T', 'N', p, S, p, w, 1) sum = ddot( p, w, 1, w, 1)/two c z(i,k) = temp*exp(-(const+sum)) z(i,k) = -(const+sum) end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 c w(1) = rcmin tol = err eps = hood maxi = iter cmu = dble(p)*(log(pshrnk) - pi2log)/two sum = zero do k = 1, G do j = 1, p do l = 1, p S(l,j) = U(l,j,k) end do end do call daxpy( p, (-one), mu(1,k), 1, pmu, 1) call dtrsv('U','T','N',p, S, p, pmu, 1) sum = sum + ddot( p, pmu, 1, pmu, 1) end do rmu = rmu - pshrnk*sum/two sum = zero term = zero temp = zero do j = 1, p call dcopy( p, pscale(j,1), p, pmu, 1) c call dtrsv('U','T','N', p, U, p, pmu, 1) i = p-j+1 c call dtrsv('U','T','N', i, U(j,j,k),i,pmu(j),1) call dtrsv('U','T','N', i, S(j,j), p, pmu(j), 1) sum = sum + ddot(i, pmu(j), 1, pmu(j), 1) temp = temp + log(abs(pscale(j,j))) term = term + dlngam((pdof+one-dble(j))/two) end do rgam = rgam - sum/two const = -dble(p)*(pdof*twolog+(dble(p)-one)*pilog/two) cgam = (const-pdof*temp)/two-term pdof = (dble(G)*cmu+rmu) + (dble(G)*cgam+rgam) return end subroutine msvvv ( x, z, n, p, G, w, mu, U, pro, S) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision x(n,p), z(n,G), w(p) double precision x(n,*), z(n,*), w(*) c double precision mu(p,G), U(p,p,G), pro(G), S(p,p) double precision mu(p,*), U(p,p,*), pro(*), S(p,*) integer i, j, k, l, j1 double precision sum, temp, cs, sn double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision dummy(1) c----------------------------------------------------------------------------- dummy(1) = zero do k = 1, G c call dcopy( p, zero, 0, mu(1,k), 1) call dcopy( p, dummy, 0, mu(1,k), 1) do j = 1, p c call dcopy( j, zero, 0, U(1,j,k), 1) c call dcopy( j, zero, 0, S(1,j), 1) call dcopy( j, dummy, 0, S(1,j), 1) end do sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sum / dble(n) if (sum .ge. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( S(j,j), w(j), cs, sn) call drot( p-j, S(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( S(p,p), w(p), cs, sn) end do temp = sqrt(sum) if (temp .ge. one .or. one .lt. temp*FLMAX) then do j = 1, p call dscal( j, one/temp, S(1,j), 1) end do else do j = 1, p c call dcopy( j, FLMAX, 0, S(1,j), 1) dummy(1) = FLMAX call dcopy( j, dummy, 0, S(1,j), 1) end do end if else c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) end if do j = 1, p do l = 1, p U(l,j,k) = S(l,j) end do end do end do return end subroutine msvvvp( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * w, mu, U, pro, S) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision pshrnk, pmu(p), pscale(p,p), pdof double precision pshrnk, pmu(*), pscale(p,*), pdof c double precision x(n,p), z(n,G), w(p) double precision x(n,*), z(n,*), w(*) c double precision mu(p,G), U(p,p,G), pro(G), S(p,p) double precision mu(p,*), U(p,p,*), pro(*), S(p,*) c------------------------------------------------------------------------------ c c x double (input) (n,p) matrix of observations. c z double (input/output) (n,G[+1]) conditional probabilities. c n integer (input) number of observations. c p integer (input) dimension of the data. c G integer (input) number of Gaussian clusters in the mixture. c mu double (output) (p,G) mean for each group. c U double (output) (p,p,G) c pro double (output) (G) mixing proportions (used even if equal). c w double (scratch) (max(p,G)) integer i, j, k, l, j1 double precision sumz, temp, cs, sn, const double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision dummy(1) c------------------------------------------------------------------------------ do k = 1, G do j = 1, p do l = 1, p S(l,j) = U(l,j,k) end do end do do j = 1, p call dcopy( p, pscale(1,j), 1, S(1,j), 1 ) end do sumz = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sumz / dble(n) if (sumz .ge. one .or. one .lt. sumz*FLMAX) then call dscal( p, (one/sumz), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( S(j,j), w(j), cs, sn) call drot( p-j, S(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( S(p,p), w(p), cs, sn) end do call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) const = sumz+pshrnk temp = (sumz*pshrnk)/const call dscal( p, sqrt(temp), w, 1) j = 1 do j1 = 2, p call drotg( S(j,j), w(j), cs, sn) call drot( p-j, S(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( S(p,p), w(p), cs, sn) temp = pdof+sumz+dble(p)+one if (pshrnk .gt. zero) temp = temp + one do j = 1, p call dscal( j, one/sqrt(temp), S(1,j), 1) end do call dscal( p, sumz/const, mu(1,k), 1) call daxpy( p, pshrnk/const, pmu, 1, mu(1,k), 1) else c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if do j = 1, p do l = 1, p U(l,j,k) = S(l,j) end do end do end do return end subroutine mvn1d ( x, n, mu, sigsq, hood) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n integer n double precision mu, sigsq, hood c double precision x(n) double precision x(*) c------------------------------------------------------------------------------ c c x double (input) (n) matrix of observations (destroyed). c n integer (input) number of observations. c mu double (output) mean. c sigsq double (output) variance. c hood double (output) loglikelihood. double precision dn double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision ddot external ddot c------------------------------------------------------------------------------ dn = dble(n) mu = ddot( n, one/dn, 0, x, 1) sigsq = zero call daxpy( n, (-one), mu, 0, x, 1) sigsq = ddot( n, x, 1, x, 1)/dn if (sigsq .eq. zero) then hood = FLMAX else hood = -dn*(pi2log + (one + log(sigsq)))/two end if return end subroutine mvn1p ( x, n, pshrnk, pmu, pscale, pdof, * mu, sigsq, hood) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n double precision pshrnk, pmu, pscale, pdof double precision mu, sigsq, hood c double precision x(n) double precision x(*) integer i double precision dn, const, term, temp, xbar double precision cmu, cgam, rmu, rgam double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision ddot, dlngam external ddot, dlngam c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero dn = dble(n) xbar = ddot( n, one/dn, 0, x, 1) const = pshrnk + dn mu = (dn/const)*xbar + (pshrnk/const)*pmu sigsq = zero do i = 1, n temp = xbar - x(i) sigsq = sigsq + temp*temp end do temp = xbar - pmu sigsq = sigsq + pscale + dn*(pshrnk/const)*(temp*temp) temp = pdof + dn + two if (pshrnk .gt. zero) temp = temp + one sigsq = sigsq / temp if (sigsq .eq. zero) then hood = FLMAX else call daxpy( n, (-one), mu, 0, x, 1) temp = ddot( n, x, 1, x, 1) if (sigsq .lt. one .and. temp .ge. sigsq*FLMAX) then hood = FLMAX return end if temp = temp/sigsq hood = -(dn*(pi2log + log(sigsq)) + temp)/two end if if (pshrnk .gt. zero) then cmu = (pi2log-log(pshrnk))/two term = pdof/two cgam = term*log(pscale/two) - dlngam(term) temp = pmu - mu const = log(sigsq) rmu = -(const - (pshrnk/sigsq)*(temp*temp))/two rgam = -(term+one)*const - (pscale/sigsq)/two pdof = (cmu+rmu) + (cgam+rgam) else pdof = FLMAX end if return end subroutine mnxiip( x, n, p, pshrnk, pmu, pscale, pdof, * mu, sigsq, hood) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, p integer n, p c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof double precision sigsq, hood c double precision x(n,p), mu(p) double precision x(n,*), mu(*) integer i, j double precision dnp, scl, temp, term, sum double precision dmudmu, pmupmu, cmu, cgam, rmu, rgam double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision ddot, dlngam external ddot, dlngam c------------------------------------------------------------------------------ dnp = dble(n*p) scl = one/dble(n) do j = 1, p mu(j) = ddot( n, scl, 0, x(1,j), 1) end do sum = zero do i = 1, n do j = 1, p temp = x(i,j) - mu(j) sum = sum + temp*temp end do end do pmupmu = ddot(p,pmu,1,pmu,1) dmudmu = ddot(p,mu,1,mu,1) temp = dmudmu + pmupmu temp = temp - two*ddot(p,mu,1,pmu,1) term = pshrnk + dble(n) scl = (pshrnk*dble(n))/term sigsq = pscale + scl*temp + sum temp = pdof + dble(n*p) + two if (pshrnk .gt. zero) temp = temp + dble(p) sigsq = sigsq/temp call dscal( p, dble(n)/term, mu, 1) call daxpy( p, pshrnk/term, pmu, 1, mu, 1) if (sigsq .eq. zero) then hood = FLMAX else sum = zero do i = 1, n do j = 1, p temp = x(i,j) - mu(j) sum = sum + temp*temp end do end do hood = -(sum/sigsq + dnp*(pi2log + log(sigsq)))/two end if if (pshrnk .gt. zero) then dmudmu = ddot(p,mu,1,mu,1) cmu = dble(p)*(log(pshrnk)-pi2log)/two temp = (dmudmu+pmupmu) - two*ddot(p,pmu,1,mu,1) term = log(sigsq) rmu = -(dble(p)*term + (pshrnk*temp)/sigsq)/two temp = pdof/two cgam = temp*log(pscale/two) - dlngam(temp) rgam = -(temp+one)*term - pscale/(two*sigsq) pdof = (cmu+rmu) + (cgam+rgam) else pdof = FLMAX end if return end subroutine mvnxii( x, n, p, mu, sigsq, hood) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, p integer n, p double precision sigsq, hood c double precision x(n,p), mu(p) double precision x(n,*), mu(*) integer j double precision dnp, scl double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision ddot external ddot c------------------------------------------------------------------------------ dnp = dble(n*p) scl = one/dble(n) do j = 1, p mu(j) = ddot( n, scl, 0, x(1,j), 1) end do sigsq = zero do j = 1, p call daxpy( n, (-one), mu(j), 0, x(1,j), 1) sigsq = sigsq + ddot( n, x(1,j), 1, x(1,j), 1) end do sigsq = sigsq/dnp if (sigsq .eq. zero) then hood = FLMAX else hood = -dnp*(pi2log + (one + log(sigsq)))/two end if return end subroutine mnxxip( x, n, p, pshrnk, pmu, pscale, pdof, * mu, scale, shape, hood) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, p integer n, p c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof double precision scale, hood c double precision x(n,p), mu(p), shape(p) double precision x(n,*), mu(*), shape(*) integer i, j double precision sum, temp, smin, smax double precision term, const, scl double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ temp = one/dble(n) do j = 1, p mu(j) = ddot( n, temp, 0, x(1,j), 1) shape(j) = zero end do do j = 1, p sum = zero do i = 1, n temp = x(i,j) - mu(j) sum = sum + temp*temp end do shape(j) = shape(j) + sum end do term = pshrnk + dble(n) scl = (pshrnk*dble(n))/term do j = 1, p temp = pmu(j) - mu(j) shape(j) = shape(j) + scl*(temp*temp) + pscale end do call dscal( p, dble(n)/term, mu, 1) call daxpy( p, pshrnk/term, pmu, 1, mu, 1) call sgnrng(p, shape, 1, smin, smax) if (smin .le. zero) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) scale = zero hood = FLMAX return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .ge. BIGLOG) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) scale = FLMAX hood = FLMAX return end if if (temp .le. SMALOG) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) scale = zero hood = FLMAX return end if temp = exp(temp) term = pdof + dble(n) + two if (pshrnk .gt. zero) term = term + one scale = temp/term if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) hood = FLMAX return end if call dscal( p, one/temp, shape, 1) const = dble(p)*(pi2log+log(scale)) hood = zero do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j) sum = sum + (temp*temp)/shape(j) end do hood = hood - (const+(sum/scale))/two end do c log posterior computation not yet available pdof = FLMAX return end subroutine mvnxxi( x, n, p, mu, scale, shape, hood) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, p integer n, p double precision scale, hood c double precision x(n,p), mu(p), shape(p) double precision x(n,*), mu(*), shape(*) integer i, j double precision dn, scl, sum, temp, smin, smax double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ dn = dble(n) scl = one/dn do j = 1, p mu(j) = ddot( n, scl, 0, x(1,j), 1) shape(j) = zero end do do j = 1, p sum = zero do i = 1, n temp = x(i,j) - mu(j) sum = sum + temp*temp end do shape(j) = shape(j) + sum end do call sgnrng(p, shape, 1, smin, smax) if (smin .le. zero) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) scale = zero hood = FLMAX return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) scale = FLMAX hood = FLMAX return end if if (temp .lt. SMALOG) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) scale = zero hood = FLMAX return end if temp = exp(temp) scale = temp/dn if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) hood = FLMAX return end if call dscal( p, one/temp, shape, 1) hood = -dble(n*p)*(one + pi2log + log(scale))/two return end subroutine mnxxxp( x, n, p, w, * pshrnk, pmu, pscale, pdof, * mu, U, hood) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, p integer n, p c double precision pshrnk, pmu(p), pscale(p,p), pdof double precision pshrnk, pmu(*), pscale(p,*), pdof double precision hood c double precision x(n,p), w(p), mu(p), U(p,p) double precision x(n,*), w(*), mu(*), U(p,*) integer i, j, j1 double precision dnp, scl, detlog, sum, term, temp double precision umin, umax, cs, sn, const double precision cmu, cgam, rmu, rgam double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision twolog parameter (twolog = 0.6931471805599453d0) double precision pilog parameter (pilog = 1.144729885849400d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision ddot, dlngam external ddot, dlngam c------------------------------------------------------------------------------ dnp = dble(n*p) scl = one/dble(n) do j = 1, p mu(j) = ddot( n, scl, 0, x(1,j), 1) call dcopy( p, pscale(1,j), 1, U(1,j), 1) end do c mu contains ybar; U contains Cholesky factor of inverse Wishart scale do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu, 1, w, 1) j = 1 do j1 = 2, p call drotg( U(j,j), w(j), cs, sn) call drot( p-j, U(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( U(p,p), w(p), cs, sn) end do call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu, 1, w, 1) term = (pshrnk*dble(n))/(pshrnk+dble(n)) call dscal( p, sqrt(term), w, 1) j = 1 do j1 = 2, p call drotg( U(j,j), w(j), cs, sn) call drot( p-j, U(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( U(p,p), w(p), cs, sn) scl = pdof + dble(n+p+1) if (pshrnk .gt. zero) scl = scl + one scl = one/sqrt(scl) do j = 1, p call dscal( j, scl, U(1,j), 1) end do term = pshrnk + dble(n) call dscal( p, dble(n)/term, mu, 1) call daxpy( p, pshrnk/term, pmu, 1, mu, 1) call absrng( p, U, p+1, umin, umax) c rcond = umin / (one + umax) if (umin .eq. zero) then hood = FLMAX return end if detlog = zero do j = 1, p detlog = detlog + log(abs(U(j,j))) end do const = dble(n)*(detlog + dble(p)*pi2log/two) sum = zero do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu, 1, w, 1) call dtrsv( 'U', 'T', 'N', p, U, p, w, 1) sum = sum + ddot(p, w, 1, w, 1) end do hood = -(const+sum/two) cmu = dble(p)*(log(pshrnk) - pi2log)/two call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu, 1, w, 1) call dtrsv( 'U', 'T', 'N', p, U, p, w, 1) temp = ddot( p, w, 1, w, 1) sum = zero term = zero do j = 1, p term = term + dlngam((pdof+dble(1-j))/two) call dcopy( p, pscale(j,1), p, pmu, 1) c call dtrsv('U','T','N', p, U, p, pmu, 1) i = p-j+1 c call dtrsv('U','T','N', i, U(j,j),i,pmu(j),1) call dtrsv('U','T','N', i, U(j,j),p,pmu(j),1) sum = sum + ddot(i, pmu(j), 1, pmu(j), 1) end do if (pshrnk .gt. zero) then rmu = -(detlog+pshrnk*temp/two) const = -dble(p)*(pdof*twolog+(dble(p)-one)*pilog/two) cgam = (const/two-pdof*detlog) - term rgam = -((pdof+dble(p)+one)*detlog + sum/two) pdof = (cmu+cgam) + (rmu+rgam) else pdof = FLMAX end if return end subroutine mvnxxx( x, n, p, mu, U, hood) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, p integer n, p double precision hood c double precision x(n,p), mu(p), U(p,p) double precision x(n,*), mu(*), U(p,*) integer i, j, j1 double precision dn, dnp, scl double precision umin, umax, cs, sn double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ dn = dble(n) dnp = dble(n*p) scl = one/dn do j = 1, p mu(j) = ddot( n, scl, 0, x(1,j), 1) c call dcopy( p, zero, 0, U(1,j), 1) dummy(1) = zero call dcopy( p, dummy, 0, U(1,j), 1) end do do i = 1, n call daxpy( p, (-one), mu, 1, x(i,1), n) j = 1 do j1 = 2, p call drotg( U(j,j), x(i,j), cs, sn) call drot( p-j, U(j,j1), p, x(i,j1), n, cs, sn) j = j1 end do call drotg( U(p,p), x(i,p), cs, sn) end do scl = sqrt(scl) do j = 1, p call dscal( j, scl, U(1,j), 1) end do call absrng( p, U, p+1, umin, umax) c rcond = umin / (one + umax) if (umin .eq. zero) then hood = FLMAX else hood = zero do j = 1, p hood = hood + log(abs(U(j,j))) end do hood = -dn*(hood + dble(p)*(pi2log + one)/two) end if c c do j = 1, p c do i = 1, j c x(i,j) = ddot(i,U(1,i),1,U(1,j),1) c if (i .ne. j) x(j,i) = x(i,j) c end do c end do c do j = 1, p c call dcopy( p, x(1,j), 1, U(1,j), 1) c end do return end c Luca: add to check if compile ok subroutine hceee ( x, n, p, ic, ng, ns, io, jo, v, s, u, r) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt c Gaussian model-based clustering algorithm in clusters share a common c variance (shape, volume, and orientation are the same for all clusters). implicit NONE integer n, p, ic(n), ng, ns, io(*), jo(*) c double precision x(n,p), v(p), s(p,p), u(p,p), r(p,p) double precision x(n,*), v(*), s(*), u(*), r(*) c------------------------------------------------------------------------------ c c x double (input/output) On input, the (n by p) matrix containing c the observations. On output, the first two columns c and ns rows contain the determinant and trace of the c sum of the sample cross product matrices. Columns 3 and 4 c contain the merge indices if p .ge. 4 c n integer (input) number of observations c p integer (input) dimension of the data c ic integer (input) (n) Initial partitioning of the data; groups must c be numbered consecutively. c ng integer (input) Number of groups in initial partition. c ns integer (input) Desired number of stages of clustering. c io,jo integer (output [p .le. 3]) If p .lt. 3, both io and jo must be of c length ns and contain the indices of the merged pairs on c output. If p .eq. 3, jo must be of length ns and contains c an index of each merged on output pair. Otherwise io and c jo are not used and can be of length 1. c v double (scratch/output) (p) On output, algorithm breakpoints; c tells where the algorithm switches from using trace c to trace + det, and from trace + det to det as criterion. c s double (scratch/output) (p,p) On output the first column contains c the initial trace and determinant of the sum of sample c cross product matrices. c u,r double (scratch) (p,p) integer q, i, j, k, l, m, i1, i2, l1, l2 integer ni, nj, nij, lw, ls, lg, ici, icj integer nopt, iopt, jopt, idet, jdet, ndet double precision DELOG double precision ri, rj, rij, dij, tij, zij double precision trc0, trc1, trcw, det0, det1, detw double precision si, sj, siop, sjop, sidt, sjdt double precision dopt, zopt, dijo, tijo, tdet double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision ddot, detmc2 external ddot, detmc2 double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision EPSMIN parameter (EPSMIN = 1.1102230246251565d-16) double precision dummy(1) c------------------------------------------------------------------------------ i1 = 0 i2 = 0 trcw = 0.d0 tijo = 0.d0 tdet = 0.d0 sjdt = 0.d0 sidt = 0.d0 dijo = 0.d0 ndet = 0 jdet = 0 idet = 0 iopt = 0 nopt = 0 jopt = 0 lw = p*p c call intpr('ic', -1, ic, n) c form scaled column sums call dscal( n*p, one/sqrt(dble(n)), x, 1) si = one/sqrt(dble(p)) sj = si / dble(n) c call dcopy( p, zero, 0, v, 1) dummy(1) = zero call dcopy( p, dummy, 0, v, 1) do k = 1, n call daxpy( p, sj, x(k,1), n, v, 1) end do trc0 = zero c call dcopy( lw, zero, 0, r, 1) dummy(1) = zero call dcopy( lw, dummy, 0, r, 1) do k = 1, n call dcopy( p, v, 1, s, 1) call daxpy( p, (-si), x(k,1), n, s, 1) trc0 = trc0 + ddot( p, s, 1, s, 1) call mclrup( (k+1), p, s, r, p) end do det0 = detmc2( p, r) DELOG = log(trc0+EPSMIN) c group heads should be first among rows of x i = 1 j = 2 1 continue icj = ic(j) if (icj .ne. j) goto 2 if (j .eq. ng) goto 3 i = j j = j + 1 goto 1 2 continue k = i m = j + 1 do j = m, n icj = ic(j) if (icj .gt. k) then k = k + 1 call dswap( p, x(k,1), n, x(j,1), n) ic(j) = ic(k) ic(k) = icj end if end do 3 continue c call intpr( 'ic', -1, ic, n) c call dcopy( lw, zero, 0, r, 1) dummy(1) = zero call dcopy( lw, dummy, 0, r, 1) q = 1 do j = 1, n i = ic(j) if (i .ne. j) then c update trace and Cholesky factor as if a merge q = q + 2 ni = ic(i) ri = dble(ni) rij = dble(ni+1) sj = sqrt(one/rij) si = sqrt(ri)*sj call dcopy( p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) trcw = trcw + ddot(p, v, 1, v, 1) call mclrup( q, p, v, r, p) ic(j) = 0 ic(i) = ic(i) + 1 call dscal( p, si, x(i,1), n) call daxpy( p, sj, x(j,1), n, x(i,1), n) c call dcopy( p, FLMAX, 0, x(j,1), n) c update column sum in jth row else ic(j) = 1 end if end do c call intpr('ic', -1, ic, n) trc1 = trcw if (q .lt. p) then detw = -FLMAX else detw = detmc2( p, r) end if det1 = detw ls = 1 lg = ng l1 = 0 l2 = 0 100 continue if (q .ge. p) then c if (.false.) c * call intpr('PART 2 --------------------------', -1, ls, 0) if (detw .lt. DELOG) then goto 200 else goto 300 end if end if dopt = FLMAX do j = 2, lg nj = ic(j) rj = dble(nj) do i = 1, (j-1) ni = ic(i) ri = dble(ni) nij = ni + nj rij = dble(nij) si = sqrt(ri/rij) sj = sqrt(rj/rij) call dcopy( p, x(i,1), n, s, 1) call dscal( p, sj, s, 1) call daxpy( p, (-si), x(j,1), n, s, 1) tij = trcw + ddot(p, s, 1, s, 1) zij = max(tij,EPSMIN) if (zij .le. dopt) then dopt = zij nopt = nij siop = si sjop = sj iopt = i jopt = j call dcopy( p, s, 1, v, 1) end if end do end do trcw = dopt if (ls .eq. ns) goto 900 call dscal( p, siop, x(iopt,1), n) call daxpy( p, sjop, x(jopt,1), n, x(iopt,1), n) if (jopt .ne. lg) then call dcopy( p, x(lg,1), n, x(jopt,1), n) ic(jopt) = ic(lg) end if ic(iopt) = nopt x(lg,1) = detw x(lg,2) = trcw if (p .ge. 4) then x(lg,3) = dble(iopt) x(lg,4) = dble(jopt) else if (p .eq. 3) then x(lg,3) = dble(iopt) jo(ls) = jopt else io(ls) = iopt jo(ls) = jopt end if c update the Cholesky factor q = q + 1 call mclrup( q, p, v, r, p) ls = ls + 1 lg = lg - 1 goto 100 200 continue q = q + 1 c call intpr('ic', -1, ic, n) dopt = FLMAX zopt = FLMAX do j = 2, lg nj = ic(j) rj = dble(nj) do i = 1, (j-1) ni = ic(i) ri = dble(ni) nij = ni + nj rij = dble(nij) si = sqrt(ri/rij) sj = sqrt(rj/rij) call dcopy( p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) tij = trcw + ddot(p, v, 1, v, 1) call dcopy( lw, r, 1, u, 1) call mclrup( q, p, v, u, p) dij = detmc2( p, u) if (dij .le. dopt) then dopt = dij tdet = tij ndet = nij sidt = si sjdt = sj idet = i jdet = j end if if (tij .eq. zero) then zij = -FLMAX else zij = max(tij,EPSMIN) if (dij .eq. (-FLMAX)) then zij = log(zij) else if (dij .le. zero) then zij = log(exp(dij) + zij) else zij = log(one + zij*exp(-dij)) + dij end if end if if (zij .le. zopt) then zopt = zij dijo = dij tijo = tij nopt = nij siop = si sjop = sj iopt = i jopt = j call dcopy( lw, u, 1, s, 1) end if end do end do if (dopt .lt. DELOG) then if (l1 .eq. 0) l1 = ls trcw = tijo detw = dijo call dcopy( lw, s, 1, r, 1) else l2 = ls trcw = tdet detw = dopt siop = sidt sjop = sjdt nopt = ndet iopt = idet jopt = jdet call dcopy( p, x(iopt,1), n, v, 1) call dscal( p, sjop, v, 1) call daxpy( p, (-siop), x(jopt,1), n, v, 1) call mclrup( q, p, v, r, p) end if if (ls .eq. ns) goto 900 call dscal( p, siop, x(iopt,1), n) call daxpy( p, sjop, x(jopt,1), n, x(iopt,1), n) if (jopt .ne. lg) then call dcopy( p, x(lg,1), n, x(jopt,1), n) ic(jopt) = ic(lg) end if ic(iopt) = nopt x(lg,1) = detw x(lg,2) = trcw if (p .ge. 4) then x(lg,3) = dble(iopt) x(lg,4) = dble(jopt) else if (p .eq. 3) then x(lg,3) = dble(iopt) jo(ls) = jopt else io(ls) = iopt jo(ls) = jopt end if ls = ls + 1 lg = lg - 1 if (detw .ge. DELOG) then c if (.false.) c * call intpr('PART 3 --------------------------', -1, ls, 0) goto 300 end if goto 200 300 continue q = q + 1 detw = FLMAX do j = 2, lg nj = ic(j) rj = dble(nj) do i = 1, (j-1) ni = ic(i) ri = dble(ni) nij = ni + nj rij = dble(nij) si = sqrt(ri/rij) sj = sqrt(rj/rij) call dcopy( p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) call dcopy( lw, r, 1, u, 1) call mclrup( q, p, v, u, p) dij = detmc2( p, u) if (dij .le. detw) then detw = dij nopt = nij siop = si sjop = sj iopt = i jopt = j call dcopy( lw, u, 1, s, 1) end if end do end do c update the trace call dcopy( p, x(iopt,1), n, v, 1) call dscal( p, sjop, v, 1) call daxpy( p, (-siop), x(jopt,1), n, v, 1) trcw = trcw + ddot( p, v, 1, v, 1) if (ls .eq. ns) goto 900 call dcopy( lw, s, 1, r, 1) call dscal( p, siop, x(iopt,1), n) call daxpy( p, sjop, x(jopt,1), n, x(iopt,1), n) if (jopt .ne. lg) then call dcopy( p, x(lg,1), n, x(jopt,1), n) ic(jopt) = ic(lg) end if ic(iopt) = nopt x(lg,1) = detw x(lg,2) = trcw if (p .ge. 4) then x(lg,3) = dble(iopt) x(lg,4) = dble(jopt) else if (p .eq. 3) then x(lg,3) = dble(iopt) jo(ls) = jopt else io(ls) = iopt jo(ls) = jopt end if ls = ls + 1 lg = lg - 1 goto 300 900 continue x(lg,1) = detw x(lg,2) = trcw if (p .ge. 4) then if (iopt .lt. jopt) then x(lg,3) = dble(iopt) x(lg,4) = dble(jopt) else x(lg,3) = dble(jopt) x(lg,4) = dble(iopt) end if else if (p .eq. 3) then if (iopt .lt. jopt) then x(lg,3) = dble(iopt) jo(ls) = jopt else x(lg,3) = dble(jopt) jo(ls) = iopt end if else if (iopt .lt. jopt) then io(ls) = iopt jo(ls) = jopt else io(ls) = jopt jo(ls) = iopt end if end if c decode do k = 1, ng ic(k) = k end do m = ng + 1 if (p .ge. 4) then l = m do k = 1, ns l = l - 1 i = int(x(l,3)) ici = ic(i) j = int(x(l,4)) icj = ic(j) if (ici .gt. icj) ic(i) = icj ic(j) = ic(m - k) if (ici .lt. icj) then x(l,3) = dble(ici) x(l,4) = dble(icj) else x(l,3) = dble(icj) x(l,4) = dble(ici) end if end do else if (p .eq. 3) then l = m do k = 1, ns l = l - 1 i = int(x(l,3)) ici = ic(i) j = jo(k) icj = ic(j) if (ici .gt. icj) ic(i) = icj ic(j) = ic(m - k) if (ici .lt. icj) then x(l,3) = dble(ici) jo(k) = icj else x(l,3) = dble(icj) jo(k) = ici end if end do else do k = 1, ns i = io(k) ici = ic(i) j = jo(k) icj = ic(j) if (ici .gt. icj) ic(i) = icj ic(j) = ic(m - k) if (ici .lt. icj) then io(k) = ici jo(k) = icj else io(k) = icj jo(k) = ici end if end do end if l = 2 m = min(p,4) do k = ng, lg, -1 if (k .le. l) goto 950 call dswap( m, x(k,1), n, x(l,1), n) l = l + 1 end do 950 continue x(1,1) = det1 x(1,2) = trc1 v(1) = dble(l1) v(2) = dble(l2) s(1) = det0 s(2) = trc0 return end mclust/src/Makevars0000644000175000017500000000006013475427014014201 0ustar nileshnileshPKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) mclust/src/mclustaddson.f0000644000175000017500000021773013643634507015376 0ustar nileshnilesh* ===================================================================== subroutine transpose(X, p) * * Compute transpose of a matrix * * ===================================================================== implicit NONE integer :: p, i, j double precision :: X(p,p), temp do j = 2, p do i = 1, j-1 temp = X(i,j) X(i,j) = X(j,i) X(j,i) = temp end do end do return end * ===================================================================== subroutine crossprodf(X, Y, n, p, q, XTY) * * Given matrices X and Y of dimension (n x p) and (n x q) computes * the matrix of cross-product, i.e. X' Y * * ===================================================================== implicit NONE integer n, p, q double precision X(n,p), Y(n,q), XTY(p,q) * Compute X'Y using DGEMM blas subroutine call DGEMM('T', 'N', p, q, n, 1.d0, X, n, Y, n, 0.d0, XTY, p) end * ====================================================================== subroutine covwf ( X, Z, n, p, G, M, S, W ) * * Given data matrix X(n x p) and weight matrix Z(n x G) computes * weighted means M(p x G), weighted covariance matrices S(p x p x G) * and weighted scattering matrices W(p x p x G) * * ====================================================================== implicit none integer :: n, p, G double precision :: X(n,p), Z(n,G) double precision :: M(p,G), S(p,p,G), W(p,p,G) integer :: j, k double precision :: sumZ(G), temp(n,p) * compute X'Z using BLAS call dgemm('T', 'N', p, G, n, 1.d0, X, n, Z, n, 0.d0, M, p) * compute row sums of Z sumZ = sum(Z, DIM = 1) do k = 1,G * compute means call dscal(p, (1.d0/sumZ(k)), M(:,k), 1) do j = 1,p * compute sqrt(Z) * (X - M) temp(:,j) = sqrt(Z(:,k)) * (X(:,j) - M(j,k)) end do * compute scattering matrix call dgemm('T', 'N', p, p, n, 1.d0, temp, n, temp, n, * 0.d0, W(:,:,k), p) * compute covariance matrix S(:,:,k) = W(:,:,k)/sumZ(k) end do return end ************************************************************************ **** EVV model ************************************************************************ * ====================================================================== subroutine msevv (x,z, n,p,G, mu,O,U,scale,shape,pro, lwork,info, * eps) * Maximization step for model EEV * ====================================================================== implicit none integer :: n, p, G double precision :: x(n,p), z(n,G) double precision :: mu(p,G), O(p,p,*), U(p,p,*), pro(G) double precision :: scale(G), shape(p,G) double precision :: sumz(G) integer :: i, j, k, info, lwork, l double precision :: temp(p), wrk(lwork), eps, dummy(1) double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) * * double precision :: BIGLOG * parameter (BIGLOG = 709.d0) * * double precision :: SMALOG * parameter (SMALOG = -708.d0) *----------------------------------------------------------------------- * colsums of z sumz = sum(z, dim = 1) * a priori probabilities pro = sumz / dble(n) * pro = sumz / sum(sumz) * if there is noise sum(sumz) does not sum to n. See help(mstep) do k = 1,G do j = 1,p mu(j,k) = sum(x(:,j)*z(:,k))/sumz(k) end do do i = 1,n temp = ( x(i,:) - mu(:,k) ) * sqrt(z(i,k)) * U(:,:,k) = U(:,:,k) + * * spread(temp, dim = 2, ncopies = p)* * * spread(temp, dim = 1, ncopies = p) * outer product, Press et al. (1992), p. 970 call dger(p, p, 1.d0, temp, 1, temp, 1, U(:,:,k), p) * more efficient end do * U contains the weighted scattering matrix O(:,:,k) = U(:,:,k) * call dgesvd('O', 'N', p, p, O(:,:,k), p, shape(:,k), * * dummy, 1, dummy, 1, wrk, lwork, info) call dgesvd('N', 'O', p, p, O(:,:,k), p, shape(:,k), * dummy, 1, dummy, 1, wrk, lwork, info) * O now contains eigenvectors of the scattering matrix * ##### NOTE: O is transposed * shape contains the eigenvalues * check if dgesvd converged (info == 0) if (info .ne. 0) then l = info else scale(k) = exp( sum( log(shape(:,k)) ) )**(1.d0/p) call dscal(p*p, 1.d0/scale(k), U(:,:,k), 1) call dscal(p, 1.d0/scale(k), shape(:,k), 1) * now U is the matrix Ck (Celeux, Govaert 1995, p.787) * and shape is the proper scaled shape (matrix A) end if end do * check very small eigenvalues (singular covariance) if (minval(shape) .le. sqrt(eps) .or. * minval(scale) .le. sqrt(eps)) then shape = FLMAX scale = FLMAX return end if scale(1) = sum(scale) / sum(sumz) return end * ====================================================================== subroutine esevv (x,z, n,p,G,Gnoise, mu,O,scale,shape,pro, Vinv, * loglik, eps) * Expectation step for model EVV * ====================================================================== implicit none integer :: n, p, G, Gnoise double precision :: x(n,p), z(n,Gnoise) double precision :: mu(p,G), O(p,p,G), scale, shape(p,G) double precision :: Vinv, pro(Gnoise) double precision :: temp1(p), temp2(p), temp3 integer :: i, k, j double precision :: const, logdet, loglik, eps double precision :: log2pi parameter (log2pi = 1.837877066409345d0) double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot double precision :: dummy(1) * double precision :: smalog * parameter (smalog = -708.d0) *----------------------------------------------------------------------- * check very small eigenvalues (singular covariance) if (minval(shape) .le. sqrt(eps) .or. scale .le. sqrt(eps)) then loglik = FLMAX return end if const = (-dble(p)/2.d0)*log2pi do k = 1,G logdet = 0.d0 do j = 1,p logdet = logdet + ( log(shape(j,k)) + log(scale) ) end do * compute mahalanobis distance for each observation * ##### NOTE: O is transposed do i = 1,n temp1 = ( x(i,:) - mu(:,k) ) * call dcopy(p, 0.d0, 0, temp2, 1) dummy(1) = 0.d0 call dcopy(p, dummy, 0, temp2, 1) call dgemv('N', p, p, 1.d0, * O(:,:,k), p, temp1, 1, 0.d0, temp2, 1) temp2 = temp2/sqrt(scale*shape(:,k)) temp3 = ddot(p, temp2, 1, temp2, 1) * temp3 contains the mahalanobis distance * z(i,k) = const - logdet/2.d0 - temp3/2.d0 + log(pro(k)) z(i,k) = const - logdet/2.d0 - temp3/2.d0 * help(cdens) --> The densities are not scaled by mixing proportions end do * z contains the log-density log(N(x|theta_k)) end do if ( pro(1) .lt. 0.d0 ) return * cdens function * noise component if (Vinv .gt. 0.d0) then * call dcopy( n, log(Vinv), 0, z(:,Gnoise), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(:,Gnoise), 1) end if * now column Gnoise of z contains log(Vinv) do i = 1,n z(i,:) = z(i,:) + log( pro(:) ) * Numerical Recipes pag.844 temp3 = maxval(z(i,:)) temp1(1) = temp3 + log( sum(exp(z(i,:) - temp3)) ) loglik = loglik + temp1(1) * ##### NOTE: do we need to check if (z - zmax) is too small? z(i,:) = exp( z(i,:) - temp1(1) ) * re-normalize probabilities temp3 = sum( z(i,:) ) call dscal( Gnoise, 1.d0/temp3, z(i,:), 1 ) end do return end * ====================================================================== subroutine meevv (x,z, n,p,G,Gnoise, mu,O,U,scale,shape,pro,Vinv, * loglik, eqpro,itmax,tol,eps, * niterout,errout,lwork,info) * Maximization-expectation algorithm for model EVV * ====================================================================== implicit none logical :: eqpro integer :: n, p, G, Gnoise double precision :: x(n,p), z(n,Gnoise) double precision :: mu(p,G), O(p,p,G),scale(G),shape(p,G) double precision :: Vinv, pro(Gnoise) double precision :: U(p,p,G), sumz(Gnoise) double precision :: temp1(p), temp2(p), temp3, scsh(p) * double precision :: temp(*) integer :: i, j, k, info, lwork, l, itmax, niterout double precision :: tol, eps, errout, rteps double precision :: const, logdet, loglik, lkprev, wrk(lwork) double precision :: log2pi parameter (log2pi = 1.837877066409345d0) double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot double precision :: dummy(1) * double precision :: smalog * parameter (smalog = -708.d0) *----------------------------------------------------------------------- l = 0 rteps = sqrt(eps) niterout = 0 errout = FLMAX lkprev = FLMAX/2 loglik = FLMAX const = (-dble(p)/2.d0)*log2pi * WHILE loop using goto statement 100 continue niterout = niterout + 1 sumz = sum(z, dim = 1) if ( eqpro ) then if ( Vinv .gt. 0 ) then pro(Gnoise) = sumz(Gnoise) / dble(n) pro(1:G) = ( 1 - pro(Gnoise) ) / dble(G) sumz = pro * dble(n) else pro = 1 / dble(G) sumz = pro * dble(n) end if else pro = sumz / dble(n) end if * re-initialise U * call dcopy(p*p*G, 0.d0, 0, U, 1) dummy(1) = 0.d0 call dcopy(p*p*G, dummy, 0, U, 1) * M step.......................................................... do k = 1,G do j = 1,p mu(j,k) = sum(x(:,j)*z(:,k))/sumz(k) end do do i = 1,n temp1 = ( x(i,:) - mu(:,k) ) * sqrt(z(i,k)) call dger(p, p, 1.d0, temp1, 1, temp1, 1, U(:,:,k), p) end do * U contains the weighted scattering matrix O(:,:,k) = U(:,:,k) call dgesvd('N', 'O', p, p, O(:,:,k), p, shape(:,k), * dummy, 1, dummy, 1, wrk, lwork, info) * O now contains eigenvectors of the scattering matrix * ##### NOTE: O is transposed * shape contains the eigenvalues * check if dgesvd converged (info == 0) if (info .ne. 0) then l = info return else scale(k) = exp( sum( log(shape(:,k)) ) )**(1.d0/dble(p)) call dscal(p*p, 1.d0/scale(k), U(:,:,k), 1) call dscal(p, 1.d0/scale(k), shape(:,k), 1) * now U is the matrix Ck (Celeux, Govaert 1995, p.787) * and shape is the proper scaled shape (matrix A) end if end do if ( Vinv .gt. 0.d0 ) then scale(1) = sum(scale) / sum(sumz(1:G)) else scale(1) = sum(scale)/dble(n) end if * if noise lambda = num/sum_{k=1}^{G} n_k; pag. 787 Celeux, Govaert * ................................................................ * check very small eigenvalues (singular covariance) if (minval(shape) .le. rteps .or. minval(scale) .le. rteps) then loglik = FLMAX return end if * E step.......................................................... do k = 1,G logdet = 0.d0 do j = 1,p logdet = logdet + ( log(shape(j,k)) + log(scale(1)) ) end do * compute mahalanobis distance for each observation * ##### NOTE: O is transposed scsh = sqrt(scale(1)*shape(:,k)) do i = 1,n temp1 = ( x(i,:) - mu(:,k) ) * call dcopy(p, 0.d0, 0, temp2, 1) dummy(1) = 0.d0 call dcopy(p, dummy, 0, temp2, 1) call dgemv('N', p, p, 1.d0, * O(:,:,k), p, temp1, 1, 0.d0, temp2, 1) temp2 = temp2/scsh temp3 = ddot(p, temp2, 1, temp2, 1) * temp3 contains the mahalanobis distance z(i,k) = const - logdet/2.d0 - temp3/2.d0 + log(pro(k)) end do * z contains the log-density log(N(x|theta_k)) + log(p_k) end do * noise component if (Vinv .gt. 0.d0) then * call dcopy( n, log(Vinv) + log(pro(Gnoise)), 0, z(:,Gnoise), 1) z(:,Gnoise) = log(Vinv) + log( pro(Gnoise) ) end if * now column Gnoise of z contains log(Vinv) + log(p_0) * with p_0 the proportion of noise loglik = 0.d0 do i = 1,n * Numerical Recipes pag.844 temp3 = maxval(z(i,:)) temp1(1) = temp3 + log( sum(exp(z(i,:) - temp3)) ) loglik = loglik + temp1(1) * ##### NOTE: do we need to check if (z - zmax) is too small? z(i,:) = exp( z(i,:) - temp1(1) ) * re-normalize probabilities temp3 = sum( z(i,:) ) call dscal( Gnoise, 1.d0/temp3, z(i,:), 1 ) end do * ................................................................ errout = abs(loglik - lkprev)/(1.d0 + abs(loglik)) * errout = abs(loglik - lkprev) lkprev = loglik * temp(niterout) = loglik * Chris F (June 2015): pro should not be computed in the E-step * sumz = sum(z, dim = 1) * if ( eqpro ) then * if ( Vinv .gt. 0 ) then * pro(Gnoise) = sumz(Gnoise) / dble(n) * pro(1:G) = ( 1 - pro(Gnoise) ) / dble(G) * sumz = pro * dble(n) * else * pro = 1 / dble(G) * sumz = pro * dble(n) * end if * else * pro = sumz / dble(n) * end if * check if empty components * if ( minval(pro) .lt. rteps ) then if ( any(sumz .lt. rteps, 1) ) then loglik = -FLMAX return end if * WHILE condition if ( errout .gt. tol .and. niterout .lt. itmax ) goto 100 return end ************************************************************************ **** VEE model ************************************************************************ * ====================================================================== subroutine msvee (x,z, n,p,G, mu,U,C,scale,pro, lwork,info, * itmax,tol, niterin,errin) * Maximization step for model VEE * ====================================================================== implicit none integer :: n, p, G double precision :: x(n,p), z(n,G) double precision :: mu(p,G), U(p,p,G), C(p,p), pro(G) * ### NOTE: shape and orientation parameters are computed in R double precision :: scale(G) double precision :: sumz(G) integer :: i, j, k, info, lwork, l * integer :: dummy double precision :: temp1(p), temp2(p,p), temp3 double precision :: wrk(lwork), tol, errin, trgt, trgtprev integer :: itmax, niterin double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot double precision :: dummy(1) *----------------------------------------------------------------------- * colsums of z sumz = sum(z, dim = 1) * a priori probabilities pro = sumz / dble(n) * pro = sumz / sum(sumz) * if there is noise sum(sumz) does not sum to n. See help(mstep) * compute weighted scattering matrix and means do k = 1,G do j = 1,p mu(j,k) = sum(x(:,j)*z(:,k))/sumz(k) end do do i = 1,n temp1 = ( x(i,:) - mu(:,k) ) * sqrt(z(i,k)) call dger(p, p, 1.d0, temp1, 1, temp1, 1, U(:,:,k), p) end do * U contains the weighted scattering matrix * check if U is positive definite (see help of dpotrf) * (through Choleski is more efficient) temp2 = U(:,:,k) call dpotrf('U', p, temp2, p, info) if ( info .ne. 0 ) then if ( info .lt. 0) then l = info return else if ( info .gt. 0 ) then info = 0 scale = FLMAX return end if end if end do * covariance matrix components estimation niterin = 0 errin = FLMAX trgt = FLMAX trgtprev = FLMAX/2 * WHILE loop using goto statement 100 continue niterin = niterin + 1 * initialise C * call dcopy(p*p, 0.d0, 0, C, 1) dummy(1) = 0.d0 call dcopy(p*p, dummy, 0, C, 1) * ### NOTE: scale is initialised in R do k = 1,G C = C + U(:,:,k)/scale(k) end do * C contains the numerator of matrix C in pag.785, Celeux, Govaert temp2 = C call dsyev('N', 'U', p, temp2, p, temp1, wrk, lwork, info) temp1 = temp1(p:1:-1) * temp1 contains the (decreasing) ordered eigenvalues of C * check if dsyev converged or illegal value if ( info .ne. 0 ) then l = info return end if temp3 = exp( sum(log(temp1)) )**(1/dble(p)) * temp3 is the denominator of C C = C/temp3 * C is now the actual matrix C of pag.785 * compute the inverse of C via Choleski temp2 = C call dpotrf('U', p, temp2, p, info) if ( info .ne. 0 ) then if ( info .lt. 0) then l = info return else if ( info .gt. 0 ) then info = 0 scale = FLMAX return end if end if call dpotri('U', p, temp2, p, info) if ( info .ne. 0 ) return do j = 2,p do k = 1,(j-1) temp2(j,k) = temp2(k,j) end do end do * temp2 is now the inverse of C scale = 0.d0 do k = 1,G do j = 1,p scale(k) = scale(k) + ddot(p, U(j,:,k), 1, temp2(:,j), 1) end do scale(k) = scale(k) / (dble(p)*sumz(k)) end do * scale contains now the lambdas (pag.784 of Celeux, Govaert) * evaluate target function * trgt = dble(n)*dble(p) + dble(p)*SUM(log(scale)*sumz) trgt = sum(sumz)*dble(p) + dble(p)*SUM(log(scale)*sumz) * error errin = abs(trgt - trgtprev)/(1.d0 + abs(trgt)) trgtprev = trgt * WHILE condition if ( errin .gt. tol .and. niterin .lt. itmax ) goto 100 return end * ====================================================================== subroutine esvee (x,z, n,p,G,Gnoise, mu,O,scale,shape,pro, Vinv, * loglik, eps) * Expectation step for model VEE * ====================================================================== implicit none integer :: n,p,G,Gnoise double precision :: x(n,p), z(n,Gnoise), pro(Gnoise), Vinv double precision :: mu(p,G), O(p,p), scale(G), shape(p) double precision :: temp1(p), temp2(p), temp3 integer :: i, k, j double precision :: const, logdet, loglik, eps double precision :: log2pi parameter (log2pi = 1.837877066409345d0) double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot double precision :: dummy(1) * double precision :: smalog * parameter (smalog = -708.d0) *----------------------------------------------------------------------- * check very small eigenvalues (cannot compute E step) if ( minval(shape) .le. sqrt(eps) .or. * minval(scale) .le. sqrt(eps) ) then loglik = FLMAX return end if const = (-dble(p)/2.d0)*log2pi do k = 1,G logdet = 0.d0 do j = 1,p logdet = logdet + ( log(shape(j)) + log(scale(k)) ) end do * compute mahalanobis distance for each observation * ##### NOTE: O is transposed do i = 1,n temp1 = ( x(i,:) - mu(:,k) ) * call dcopy(p, 0.d0, 0, temp2, 1) dummy(1) = 0.d0 call dcopy(p, dummy, 0, temp2, 1) call dgemv('N', p, p, 1.d0, * O, p, temp1, 1, 0.d0, temp2, 1) temp2 = temp2/sqrt(scale(k)*shape) temp3 = ddot(p, temp2, 1, temp2, 1) * temp3 contains the mahalanobis distance * z(i,k) = const - logdet/2.d0 - temp3/2.d0 + log(pro(k)) z(i,k) = const - logdet/2.d0 - temp3/2.d0 * help(cdens) --> The densities are not scaled by mixing proportions end do * z contains the log-density log(N(x|theta_k)) end do if ( pro(1) .lt. 0.d0 ) return * cdens function * noise component if (Vinv .gt. 0.d0) then * call dcopy( n, log(Vinv), 0, z(:,Gnoise), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(:,Gnoise), 1) end if * now column Gnoise of z contains log(Vinv) do i = 1,n z(i,:) = z(i,:) + log( pro(:) ) * Numerical Recipes pag.844 temp3 = maxval(z(i,:)) temp1(1) = temp3 + log( sum(exp(z(i,:) - temp3)) ) loglik = loglik + temp1(1) z(i,:) = exp( z(i,:) - temp1(1) ) * re-normalize probabilities temp3 = sum( z(i,:) ) call dscal( Gnoise, 1.d0/temp3, z(i,:), 1 ) end do return end * ====================================================================== subroutine mevee ( x,z, n,p,G,Gnoise, mu,C,U,scale,shape,pro,Vinv, * loglik, eqpro,itmaxin,tolin,itmaxout,tolout,eps, * niterin,errin,niterout,errout,lwork,info ) * Maximization-expectation algorithm for model VEE * ====================================================================== implicit none logical :: eqpro integer :: n,p,G,Gnoise double precision :: x(n,p), z(n,Gnoise), pro(Gnoise), Vinv double precision :: mu(p,G), C(p,p), scale(G), shape(p) double precision :: U(p,p,G), sumz(Gnoise) double precision :: temp1(p), temp2(p,p), temp3, temp4(p) integer :: i, j, k, info, lwork, l * integer :: dummy integer :: itmaxin, itmaxout, niterin, niterout double precision :: tolin, tolout, errin, errout, eps, rteps double precision :: const, logdet, loglik, lkprev, wrk(lwork) double precision :: trgt, trgtprev double precision :: log2pi parameter (log2pi = 1.837877066409345d0) double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot double precision :: dummy(1) * double precision :: smalog * parameter (smalog = -708.d0) *----------------------------------------------------------------------- l = 0 rteps = sqrt(eps) niterout = 0 errout = FLMAX lkprev = FLMAX/2 loglik = FLMAX const = (-dble(p)/2.d0)*log2pi * WHILE loop for EM algorithm 100 continue niterout = niterout + 1 sumz = sum(z, dim = 1) if ( eqpro ) then if ( Vinv .gt. 0 ) then pro(Gnoise) = sumz(Gnoise) / dble(n) pro(1:G) = ( 1 - pro(Gnoise) ) / dble(G) sumz = pro * dble(n) else pro = 1 / dble(G) sumz = pro * dble(n) end if else pro = sumz / dble(n) end if * re-initialise U * call dcopy(p*p*G, 0.d0, 0, U, 1) dummy(1) = 0.d0 call dcopy(p*p*G, dummy, 0, U, 1) * compute weighted scattering matrix and means do k = 1,G do j = 1,p mu(j,k) = sum(x(:,j)*z(:,k))/sumz(k) end do do i = 1,n temp1 = ( x(i,:) - mu(:,k) ) * sqrt(z(i,k)) call dger(p, p, 1.d0, temp1, 1, temp1, 1, U(:,:,k), p) end do * U contains the weighted scattering matrix * check if U is positive definite (see help of dpotrf) * (through Choleski is more efficient) temp2 = U(:,:,k) call dpotrf('U', p, temp2, p, info) if ( info .ne. 0 ) then if ( info .lt. 0) then l = info return else if ( info .gt. 0 ) then info = 0 loglik = FLMAX return end if end if end do * M step.......................................................... * covariance matrix components estimation niterin = 0 errin = FLMAX trgt = FLMAX trgtprev = FLMAX/2 * initialise scale * call dcopy(G, 1.d0, 0, scale, 1) dummy(1) = 1.d0 call dcopy(G, dummy, 0, scale, 1) * WHILE loop for M step 110 continue niterin = niterin + 1 * initialise C * call dcopy(p*p, 0.d0, 0, C, 1) dummy(1) = 0.d0 call dcopy(p*p, dummy, 0, C, 1) do k = 1,G C = C + U(:,:,k)/scale(k) end do * C contains the numerator of matrix C in pag.785, Celeux, Govaert temp2 = C call dsyev('N', 'U', p, temp2, p, temp1, wrk, lwork, info) temp1 = temp1(p:1:-1) * temp1 contains the (decreasing) ordered eigenvalues of C * check if dsyev converged or illegal value if ( info .ne. 0 ) then l = info return end if temp3 = exp( sum(log(temp1)) )**(1/dble(p)) * temp3 is the denominator of C C = C/temp3 * C is now the actual matrix C of pag.785 * compute the inverse of C via Choleski temp2 = C call dpotrf('U', p, temp2, p, info) if ( info .ne. 0 ) then if ( info .lt. 0) then l = info return else if ( info .gt. 0 ) then info = 0 loglik = FLMAX return end if end if call dpotri('U', p, temp2, p, info) if ( info .ne. 0 ) return do j = 2,p do k = 1,(j-1) temp2(j,k) = temp2(k,j) end do end do * temp2 is now the inverse of C scale = 0.d0 do k = 1,G do j = 1,p scale(k) = scale(k) + ddot(p, U(j,:,k), 1, temp2(:,j), 1) end do scale(k) = scale(k) / (dble(p)*sumz(k)) end do * scale contains now the lambdas (pag.784 of Celeux, Govaert) * evaluate target function * trgt = dble(n)*dble(p) + dble(p)*SUM(log(scale)*sumz) trgt = sum(sumz(1:G))*dble(p) + dble(p)*SUM(log(scale)*sumz(1:G)) * error errin = abs(trgt - trgtprev)/(1.d0 + abs(trgt)) trgtprev = trgt * WHILE condition for M step if ( errin .gt. tolin .and. niterin .lt. itmaxin ) goto 110 * ................................................................ * eigenvalues of C shape = temp1 / temp3 * check very small eigenvalues (singular covariance) if (minval(shape) .le. rteps .or. minval(scale) .le. rteps) then loglik = FLMAX return end if * E step.......................................................... do k = 1,G logdet = 0.d0 do j = 1,p logdet = logdet + log(shape(j)) + log(scale(k)) end do * compute mahalanobis distance for each observation do i = 1,n temp1 = ( x(i,:) - mu(:,k) ) * call dcopy(p, 0.d0, 0, temp4, 1) dummy(1) = 0.d0 call dcopy(p, dummy, 0, temp4, 1) call dgemv('N', p, p, 1.d0, * temp2, p, temp1, 1, 0.d0, temp4, 1) temp4 = temp4/scale(k) temp3 = ddot(p, temp4, 1, temp1, 1) * temp3 contains the mahalanobis distance z(i,k) = const - logdet/2.d0 - temp3/2.d0 + log(pro(k)) * z(i,k) = const - logdet/2.d0 - temp3/2.d0 end do * z contains the log-density log(N(x|theta_k)) + log(p_k) end do * if ( pro(1) .lt. 0.d0 ) return * cdens function * noise component if (Vinv .gt. 0.d0) then z(:,Gnoise) = log(Vinv) + log( pro(Gnoise) ) end if * now column Gnoise of z contains log(Vinv) + log(p_0) * with p_0 the proportion of noise loglik = 0.d0 do i = 1,n * Numerical Recipes pag.844 temp3 = maxval(z(i,:)) temp1(1) = temp3 + log( sum(exp(z(i,:) - temp3)) ) loglik = loglik + temp1(1) z(i,:) = exp( z(i,:) - temp1(1) ) * re-normalize probabilities temp3 = sum( z(i,:) ) call dscal( Gnoise, 1.d0/temp3, z(i,:), 1 ) end do * ................................................................ errout = abs(loglik - lkprev)/(1.d0 + abs(loglik)) * errout = abs(loglik - lkprev) lkprev = loglik * temp(niterout) = loglik * Chris F (June 2015): pro should not be computed in the E-step * sumz = sum(z, dim = 1) * if ( eqpro ) then * if ( Vinv .gt. 0 ) then * pro(Gnoise) = sumz(Gnoise) / dble(n) * pro(1:G) = ( 1 - pro(Gnoise) ) / dble(G) * sumz = pro * dble(n) * else * pro = 1 / dble(G) * sumz = pro * dble(n) * end if * else * pro = sumz / dble(n) * end if * check if empty components if ( minval(sumz) .lt. rteps ) then loglik = -FLMAX return end if * WHILE condition EM if ( errout .gt. tolout .and. niterout .lt. itmaxout ) goto 100 return end ************************************************************************ **** EVE model ************************************************************************ * ====================================================================== subroutine mseve (x,z, n,p,G, mu,U,O,scale,shape,pro, lwork,info, * itmax,tol, niterin,errin, eps) * Maximization step for model EVE * ====================================================================== implicit none integer :: n, p, G double precision :: x(n,p), z(n,G) double precision :: mu(p,G), U(p,p,G), pro(G), O(p,p) double precision :: scale, shape(p,G) double precision :: sumz(G), omega(G) integer :: i, j, k, info, lwork double precision :: temp1(p,p), temp2(p,p), temp3(p,p), temp4(p) double precision :: wrk(lwork), tol, errin, trgt, trgtprev, eps integer :: itmax, niterin double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot *----------------------------------------------------------------------- * colsums of z sumz = sum(z, dim = 1) * a priori probabilities pro = sumz / dble(n) * pro = sumz / sum(sumz) * if there is noise sum(sumz) does not sum to n. See help(mstep) * compute weighted scattering matrix and means do k = 1,G do j = 1,p mu(j,k) = sum(x(:,j)*z(:,k))/sumz(k) end do do i = 1,n temp1(:,1) = ( x(i,:) - mu(:,k) ) * sqrt(z(i,k)) call dger(p, p, 1.d0, temp1(:,1), 1, temp1(:,1), 1, * U(:,:,k), p) end do * U contains the weighted scattering matrix * compute the eigenvalues of U to be stored in omega temp2 = U(:,:,k) call dsyev('N', 'U', p, temp2, p, temp1(:,1), wrk, lwork, info) * now temp1 contains all the eigenvalues of U * check if dsyev converged and positive definite if ( info .ne. 0 ) then return else if ( minval(temp1(:,1)) .lt. sqrt(eps) ) then info = 0 scale = FLMAX return end if end if omega(k) = temp1(p,1) end do * omega contains the largest eigenvalue of each scattering matrix niterin = 0 errin = FLMAX trgt = FLMAX trgtprev = FLMAX/2 * covariance matrix components estimation * we consider algorithm MM 1 and MM 2 of Browne, McNicholas 2013 * with a modification in computing the orientation matrix in the MM 2 step * shape (matrix A) and orientation (matrix D) initialised in R * shape = matrix(1, p,G) * O = diag(p) * WHILE loop using goto statement 100 continue * ### NOTE: O is transposed niterin = niterin + 1 temp2 = 0.d0 temp3 = 0.d0 * temp3 will contain matrix F * Algorithm MM 1 ...................................... do k = 1,G do j = 1,p * temp1(j,:) = O(:,j) / shape(j,k) temp1(j,:) = O(j,:) / shape(j,k) end do * temp1 contains inv(A)t(D) call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, U(:,:,k),p, * 0.d0, temp2,p ) * temp2 contains inv(A) %*% t(D) %*% W temp1 = temp2 - omega(k)*temp1 temp3 = temp3 + temp1 * temp3 contains the matrix F end do * compute matrices P and R where svd(F) = P %*% B %*% t(R) call dgesvd('A','A', p,p, temp3,p, temp4, temp1,p, temp2,p, * wrk, lwork, info) * now temp1 contains matrix P, temp2 contains matrix t(R) * temp4 contains the singular values * check if dgesvd converged if ( info .ne. 0 ) return * NOTE: t(O) = t( R %*% t(P) ) = P %*% t(R) call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, temp2,p, * 0.d0, O,p ) * O contains TRANSPOSED matrix D of Browne, McNicholas * ..................................................... * Algorithm MM 2 ...................................... * call dgemm( 'T','T', p,p,p, 1.d0, temp2,p, temp1,p, * * 0.d0, O,p ) call transpose(O, p) * O contains matrix D of Browne, McNicholas * Algorithm MM 2 temp1 = 0.d0 temp3 = 0.d0 do k = 1,G call dgemm( 'N','N', p,p,p, 1.d0, U(:,:,k),p, O,p, * 0.d0, temp1,p ) * temp1 contains W %*% D do j = 1,p temp2(:,j) = temp1(:,j) / shape(j,k) end do * temp2 contains W %*% D %*% inv(A) temp1 = temp2 - maxval( 1/shape(:,k) )*temp1 temp3 = temp3 + temp1 * temp3 contains the matrix F end do * compute matrices P and R where svd(F) = P %*% B %*% t(R) call dgesvd('A','A', p,p, temp3,p, temp4, temp1,p, temp2,p, * wrk, lwork, info) * now temp1 contains matrix P, temp2 contains matrix t(R) * temp4 contains the singular values * check if dgesvd converged if ( info .ne. 0 ) return * NOTE: t(O) = R %*% t(P) * call dgemm( 'T','T', p,p,p, 1.d0, temp2,p, temp1,p, * * 0.d0, O,p ) O = 0.d0 call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, temp2,p, * 0.d0, O,p ) * NOTE: we compute the TRANSPOSED of the matrix in the output in the paper call transpose(O, p) * O contains TRANSPOSED matrix D of Browne, McNicholas * ..................................................... * compute shape (matrix A) and target function trgt = 0.d0 do k = 1,G temp1 = 0.d0 call dgemm( 'N','N', p,p,p, 1.d0, O,p, U(:,:,k),p, * 0.d0, temp1,p ) * temp1 contains t(D) %*% W do j = 1,p shape(j,k) = ddot(p, temp1(j,:), 1, O(j,:), 1) end do shape(:,k) = shape(:,k)/ * exp( sum( log(shape(:,k)) ) )**(1.d0/dble(p)) * now shape contains matrix A of Celeux, Govaert pag. 785 * check positive values if ( minval(shape(:,k)) .lt. sqrt(eps) ) then info = 0 shape = FLMAX return end if temp4(1) = 0.d0 do j = 1,p * temp2(:,j) = O(:,j) * 1.d0/shape(j,k) temp2(:,j) = O(j,:) * 1.d0/shape(j,k) temp4(1) = temp4(1) + ddot(p, temp1(j,:), 1, temp2(:,j), 1) end do trgt = trgt + temp4(1) end do * error errin = abs(trgt - trgtprev)/(1.d0 + abs(trgt)) trgtprev = trgt * WHILE condition if ( errin .gt. tol .and. niterin .lt. itmax ) goto 100 scale = trgt / ( sum(sumz)*dble(p) ) return end * ====================================================================== subroutine eseve (x,z, n,p,G,Gnoise, mu,O,scale,shape,pro, Vinv, * loglik, eps) * Expectation step for model EVE * ====================================================================== implicit none integer :: n, p, G, Gnoise double precision :: x(n,p), z(n,Gnoise) double precision :: mu(p,G), O(p,p), scale, shape(p,G) double precision :: Vinv, pro(Gnoise) double precision :: temp1(p), temp2(p), temp3, temp4(n) integer :: i, k, j double precision :: const, logdet, loglik, eps double precision :: log2pi parameter (log2pi = 1.837877066409345d0) double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot double precision :: dummy(1) * double precision :: smalog * parameter (smalog = -708.d0) *----------------------------------------------------------------------- * check very small eigenvalues (singular covariance) if (minval(shape) .le. sqrt(eps) .or. scale .le. sqrt(eps)) then loglik = FLMAX return end if const = (-dble(p)/2.d0)*log2pi do k = 1,G logdet = 0.d0 do j = 1,p logdet = logdet + ( log(shape(j,k)) + log(scale) ) end do * compute mahalanobis distance for each observation * ##### NOTE: O is transposed do i = 1,n temp1 = ( x(i,:) - mu(:,k) ) * call dcopy(p, 0.d0, 0, temp2, 1) dummy(1) = 0.d0 call dcopy(p, dummy, 0, temp2, 1) call dgemv('N', p, p, 1.d0, * O, p, temp1, 1, 0.d0, temp2, 1) temp2 = temp2/sqrt(scale*shape(:,k)) temp3 = ddot(p, temp2, 1, temp2, 1) temp4(i) = temp3 * temp3 contains the mahalanobis distance * z(i,k) = const - logdet/2.d0 - temp3/2.d0 + log(pro(k)) z(i,k) = const - logdet/2.d0 - temp3/2.d0 * help(cdens) --> The densities are not scaled by mixing proportions end do * z contains the log-density log(N(x|theta_k)) end do if ( pro(1) .lt. 0.d0 ) return * cdens function * noise component if (Vinv .gt. 0.d0) then * call dcopy( n, log(Vinv), 0, z(:,Gnoise), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(:,Gnoise), 1) end if * now column Gnoise of z contains log(Vinv) do i = 1,n z(i,:) = z(i,:) + log( pro ) * Numerical Recipes pag.844 temp3 = maxval(z(i,:)) temp1(1) = temp3 + log( sum(exp(z(i,:) - temp3)) ) loglik = loglik + temp1(1) * ##### NOTE: do we need to check if (z - zmax) is too small? z(i,:) = exp( z(i,:) - temp1(1) ) * re-normalize probabilities temp3 = sum( z(i,:) ) call dscal( Gnoise, 1.d0/temp3, z(i,:), 1 ) end do return end * ====================================================================== subroutine meeve ( x,z, n,p,G,Gnoise, mu,O,U,scale,shape,pro,Vinv, * loglik, eqpro,itmaxin,tolin,itmaxout,tolout,eps, * niterin,errin,niterout,errout,lwork,info ) * Maximization-expectation algorithm for model EVE * ====================================================================== implicit none logical :: eqpro integer :: n,p,G,Gnoise double precision :: x(n,p), z(n,Gnoise), pro(Gnoise), Vinv double precision :: mu(p,G), O(p,p), scale, shape(p,G) double precision :: U(p,p,G), sumz(Gnoise), omega(G) double precision :: temp1(p,p), temp2(p,p), temp3(p,p), temp4(p) integer :: i, j, k, info, lwork integer :: itmaxin, itmaxout, niterin, niterout double precision :: tolin, tolout, errin, errout, eps, rteps double precision :: const, logdet, loglik, lkprev, wrk(lwork) double precision :: trgt, trgtprev double precision :: log2pi parameter (log2pi = 1.837877066409345d0) double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot double precision :: dummy(1) * double precision :: smalog * parameter (smalog = -708.d0) *----------------------------------------------------------------------- rteps = sqrt(eps) niterout = 0 errout = FLMAX lkprev = FLMAX/2 loglik = FLMAX const = (-dble(p)/2.d0)*log2pi * WHILE loop for EM algorithm 100 continue niterout = niterout + 1 sumz = sum(z, dim = 1) if ( eqpro ) then if ( Vinv .gt. 0 ) then pro(Gnoise) = sumz(Gnoise) / dble(n) pro(1:G) = ( 1 - pro(Gnoise) ) / dble(G) sumz = pro * dble(n) else pro = 1 / dble(G) sumz = pro * dble(n) end if else pro = sumz / dble(n) end if * re-initialise U * call dcopy(p*p*G, 0.d0, 0, U, 1) dummy(1) = 0.d0 call dcopy(p*p*G, dummy, 0, U, 1) * compute weighted scattering matrix and means do k = 1,G do j = 1,p mu(j,k) = sum(x(:,j)*z(:,k))/sumz(k) end do do i = 1,n temp1(:,1) = ( x(i,:) - mu(:,k) ) * sqrt(z(i,k)) call dger(p, p, 1.d0, temp1(:,1), 1, temp1(:,1), 1, * U(:,:,k), p) end do * U contains the weighted scattering matrix * compute the eigenvalues of U to be stored in omega temp2 = U(:,:,k) call dsyev('N', 'U', p, temp2, p, temp1(:,1), wrk, lwork, info) * now temp1 contains all the eigenvalues of U * check if dsyev converged and positive definite if ( info .ne. 0 ) then return else if ( minval(temp1(:,1)) .lt. rteps ) then info = 0 scale = FLMAX return end if end if omega(k) = temp1(p,1) end do * omega contains the largest eigenvalue of each scattering matrix * M step.......................................................... niterin = 0 errin = FLMAX trgt = FLMAX trgtprev = FLMAX/2 * covariance matrix components estimation * we consider algorithm MM 1 and MM 2 of Browne, McNicholas 2013 * with a modification in computing the orientation matrix in the MM 2 step * shape (matrix A) and orientation (matrix D) initialised in R * shape = matrix(1, p,G) * O = diag(p) * ### NOTE: we don't re-initialize shape and orientation at each * outer iteration of the EM algorithm * WHILE loop for M step 110 continue * ### NOTE: O is transposed niterin = niterin + 1 temp2 = 0.d0 temp3 = 0.d0 * temp3 will contain matrix F * Algorithm MM 1 ...................................... do k = 1,G do j = 1,p * temp1(j,:) = O(:,j) / shape(j,k) temp1(j,:) = O(j,:) / shape(j,k) end do * temp1 contains inv(A)t(D) call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, U(:,:,k),p, * 0.d0, temp2,p ) * temp2 contains inv(A) %*% t(D) %*% W temp1 = temp2 - omega(k)*temp1 temp3 = temp3 + temp1 * temp3 contains the matrix F end do * compute matrices P and R where svd(F) = P %*% B %*% t(R) call dgesvd('A','A', p,p, temp3,p, temp4, temp1,p, temp2,p, * wrk, lwork, info) * now temp1 contains matrix P, temp2 contains matrix t(R) * temp4 contains the singular values * check if dgesvd converged if ( info .ne. 0 ) return * NOTE: t(O) = t( R %*% t(P) ) = P %*% t(R) call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, temp2,p, * 0.d0, O,p ) * O contains TRANSPOSED matrix D of Browne, McNicholas * ..................................................... * Algorithm MM 2 ...................................... * call dgemm( 'T','T', p,p,p, 1.d0, temp2,p, temp1,p, * * 0.d0, O,p ) call transpose(O, p) * O contains matrix D of Browne, McNicholas * Algorithm MM 2 temp1 = 0.d0 temp3 = 0.d0 do k = 1,G call dgemm( 'N','N', p,p,p, 1.d0, U(:,:,k),p, O,p, * 0.d0, temp1,p ) * temp1 contains W %*% D do j = 1,p temp2(:,j) = temp1(:,j) / shape(j,k) end do * temp2 contains W %*% D %*% inv(A) temp1 = temp2 - maxval( 1/shape(:,k) )*temp1 temp3 = temp3 + temp1 * temp3 contains the matrix F end do * compute matrices P and R where svd(F) = P %*% B %*% t(R) call dgesvd('A','A', p,p, temp3,p, temp4, temp1,p, temp2,p, * wrk, lwork, info) * now temp1 contains matrix P, temp2 contains matrix t(R) * temp4 contains the singular values * check if dgesvd converged if ( info .ne. 0 ) return * NOTE: t(O) = R %*% t(P) * call dgemm( 'T','T', p,p,p, 1.d0, temp2,p, temp1,p, * * 0.d0, O,p ) O = 0.d0 * NOTE: we compute the TRANSPOSED of the matrix in the output in the paper call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, temp2,p, * 0.d0, O,p ) call transpose(O, p) * O contains TRANSPOSED matrix D of Browne, McNicholas * ..................................................... * compute shape (matrix A) and target function trgt = 0.d0 do k = 1,G temp1 = 0.d0 call dgemm( 'N','N', p,p,p, 1.d0, O,p, U(:,:,k),p, * 0.d0, temp1,p ) * temp1 contains t(D) %*% W do j = 1,p shape(j,k) = ddot(p, temp1(j,:), 1, O(j,:), 1) end do shape(:,k) = shape(:,k)/ * exp( sum( log(shape(:,k)) ) )**(1.d0/dble(p)) * now shape contains matrix A of Celeux, Govaert pag. 785 * check positive values if ( minval(shape(:,k)) .lt. rteps ) then info = 0 loglik = FLMAX return end if temp4(1) = 0.d0 do j = 1,p * temp2(:,j) = O(:,j) * 1.d0/shape(j,k) temp2(:,j) = O(j,:) * 1.d0/shape(j,k) temp4(1) = temp4(1) + ddot(p, temp1(j,:), 1, temp2(:,j), 1) end do trgt = trgt + temp4(1) end do * error errin = abs(trgt - trgtprev)/(1.d0 + abs(trgt)) trgtprev = trgt * WHILE condition M step if ( errin .gt. tolin .and. niterin .lt. itmaxin ) goto 110 scale = trgt / ( sum(sumz(1:G))*dble(p) ) * ................................................................ * E step.......................................................... const = (-dble(p)/2.d0)*log2pi do k = 1,G logdet = 0.d0 do j = 1,p logdet = logdet + ( log(shape(j,k)) + log(scale) ) end do * compute mahalanobis distance for each observation * ##### NOTE: O is transposed do i = 1,n temp1(:,1) = ( x(i,:) - mu(:,k) ) * call dcopy(p, 0.d0, 0, temp2(:,1), 1) dummy(1) = 0.d0 call dcopy(p, dummy, 0, temp2(:,1), 1) call dgemv('N', p, p, 1.d0, * O, p, temp1(:,1), 1, 0.d0, temp2(:,1), 1) temp2(:,1) = temp2(:,1)/sqrt(scale*shape(:,k)) temp3(1,1) = ddot(p, temp2(:,1), 1, temp2(:,1), 1) * temp3 contains the mahalanobis distance z(i,k) = const - logdet/2.d0 - temp3(1,1)/2.d0 + log(pro(k)) * z(i,k) = const - logdet/2.d0 - temp3(1,1)/2.d0 end do * z contains the log-density log(N(x|theta_k)) + log(p_k) end do * noise component if (Vinv .gt. 0.d0) then z(:,Gnoise) = log(Vinv) + log( pro(Gnoise) ) end if * now column Gnoise of z contains log(Vinv) + log(p_0) loglik = 0.d0 do i = 1,n * Numerical Recipes pag.844 temp3(1,1) = maxval(z(i,:)) temp1(1,1) = temp3(1,1) + log( sum(exp(z(i,:) - temp3(1,1))) ) loglik = loglik + temp1(1,1) * ##### NOTE: do we need to check if (z - zmax) is too small? z(i,:) = exp( z(i,:) - temp1(1,1) ) * re-normalize probabilities temp3(1,1) = sum( z(i,:) ) call dscal( Gnoise, 1.d0/temp3(1,1), z(i,:), 1 ) end do * ................................................................ errout = abs(loglik - lkprev)/(1.d0 + abs(loglik)) lkprev = loglik * Chris F (June 2015): pro should not be computed in the E-step * sumz = sum(z, dim = 1) * if ( eqpro ) then * if ( Vinv .gt. 0 ) then * pro(Gnoise) = sumz(Gnoise) / dble(n) * pro(1:G) = ( 1 - pro(Gnoise) ) / dble(G) * sumz = pro * dble(n) * else * pro = 1 / dble(G) * sumz = pro * dble(n) * end if * else * pro = sumz / dble(n) * end if * check if empty components if ( minval(sumz) .lt. rteps ) then loglik = -FLMAX return end if * WHILE condition EM if ( errout .gt. tolout .and. niterout .lt. itmaxout ) goto 100 return end ************************************************************************ **** VVE model ************************************************************************ * ====================================================================== subroutine msvve (x,z, n,p,G, mu,U,O,scale,shape,pro, lwork,info, * itmax,tol, niterin,errin, eps) * Maximization step for model VVE * ====================================================================== implicit none integer :: n, p, G double precision :: x(n,p), z(n,G) double precision :: mu(p,G), U(p,p,G), pro(G), O(p,p) double precision :: scale(G), shape(p,G) double precision :: sumz(G), omega(G) integer :: i, j, k, info, lwork double precision :: temp1(p,p), temp2(p,p), temp3(p,p), temp4(p) double precision :: wrk(lwork), tol, errin, trgt, trgtprev, eps integer :: itmax, niterin double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot *----------------------------------------------------------------------- * colsums of z sumz = sum(z, dim = 1) * a priori probabilities pro = sumz / dble(n) * pro = sumz / sum(sumz) * if there is noise sum(sumz) does not sum to n. See help(mstep) * compute weighted scattering matrix and means do k = 1,G do j = 1,p mu(j,k) = sum(x(:,j)*z(:,k))/sumz(k) end do do i = 1,n temp1(:,1) = ( x(i,:) - mu(:,k) ) * sqrt(z(i,k)) call dger(p, p, 1.d0, temp1(:,1), 1, temp1(:,1), 1, * U(:,:,k), p) end do * U contains the weighted scattering matrix * compute the eigenvalues of U to be stored in omega temp2 = U(:,:,k) call dsyev('N', 'U', p, temp2, p, temp1(:,1), wrk, lwork, info) * now temp1 contains all the eigenvalues of U * check if dsyev converged and positive definite if ( info .ne. 0 ) then return else if ( minval(temp1(:,1)) .lt. sqrt(eps) ) then info = 0 scale = FLMAX return end if end if omega(k) = temp1(p,1) end do * omega contains the largest eigenvalue of each scattering matrix niterin = 0 errin = FLMAX trgt = FLMAX trgtprev = FLMAX/2 * covariance matrix components estimation * we consider algorithm MM 1 and MM 2 of Browne, McNicholas 2013 * with a modification in computing the orientation matrix in the MM 2 step * shape (matrix A) and orientation (matrix D) initialised in R * shape = matrix(1, p,G) * O = diag(p) * WHILE loop using goto statement 100 continue * ### NOTE: O is transposed niterin = niterin + 1 temp2 = 0.d0 temp3 = 0.d0 * temp3 will contain matrix F * Algorithm MM 1 ...................................... do k = 1,G do j = 1,p * temp1(j,:) = O(:,j) / shape(j,k) temp1(j,:) = O(j,:) / shape(j,k) end do * temp1 contains inv(A)t(D) call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, U(:,:,k),p, * 0.d0, temp2,p ) * temp2 contains inv(A) %*% t(D) %*% W temp1 = temp2 - omega(k)*temp1 temp3 = temp3 + temp1 * temp3 contains the matrix F end do * compute matrices P and R where svd(F) = P %*% B %*% t(R) call dgesvd('A','A', p,p, temp3,p, temp4, temp1,p, temp2,p, * wrk, lwork, info) * now temp1 contains matrix P, temp2 contains matrix t(R) * temp4 contains the singular values * check if dgesvd converged if ( info .ne. 0 ) return * NOTE: t(P %*% t(R)) = R %*% t(P) call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, temp2,p, * 0.d0, O,p ) * O contains TRANSPOSED matrix D of Browne, McNicholas * ..................................................... * Algorithm MM 2 ...................................... call transpose(O, p) * O contains matrix D of Browne, McNicholas * Algorithm MM 2 temp1 = 0.d0 temp3 = 0.d0 do k = 1,G call dgemm( 'N','N', p,p,p, 1.d0, U(:,:,k),p, O,p, * 0.d0, temp1,p ) * temp1 contains W %*% D do j = 1,p temp2(:,j) = temp1(:,j) / shape(j,k) end do * temp2 contains W %*% D %*% inv(A) temp1 = temp2 - maxval( 1/shape(:,k) )*temp1 temp3 = temp3 + temp1 * temp3 contains the matrix F end do * compute matrices P and R where svd(F) = P %*% B %*% t(R) call dgesvd('A','A', p,p, temp3,p, temp4, temp1,p, temp2,p, * wrk, lwork, info) * now temp1 contains matrix P, temp2 contains matrix t(R) * temp4 contains the singular values * check if dgesvd converged if ( info .ne. 0 ) return * NOTE: t(O) = R %*% t(P) O = 0.d0 call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, temp2,p, * 0.d0, O,p ) call transpose(O, p) * O contains TRANSPOSED matrix D of Browne, McNicholas * ..................................................... * compute shape (matrix A) and target function trgt = 0.d0 do k = 1,G temp1 = 0.d0 call dgemm( 'N','N', p,p,p, 1.d0, O,p, U(:,:,k),p, * 0.d0, temp1,p ) * temp1 contains t(D) %*% W do j = 1,p shape(j,k) = ddot(p, temp1(j,:), 1, O(j,:), 1) end do * shape(:,k) = shape(:,k)/ * * exp( sum( log(shape(:,k)) ) )**(1.d0/dble(p)) shape(:,k) = shape(:,k)/sumz(k) * now shape contains matrix A (scale*A) of Celeux, Govaert pag. 785 * compute scale parameter and shape matrix A scale(k) = exp( sum( log(shape(:,k)) ) )**(1/dble(p)) shape(:,k) = shape(:,k)/scale(k) * check positive values if ( minval(shape(:,k)) .lt. sqrt(eps) ) then info = 0 shape = FLMAX return end if temp4(1) = 0.d0 do j = 1,p * temp2(:,j) = O(:,j) * 1.d0/shape(j,k) temp2(:,j) = O(j,:) * 1.d0/shape(j,k) temp4(1) = temp4(1) + ddot(p, temp1(j,:), 1, temp2(:,j), 1) end do trgt = trgt + temp4(1) end do * error errin = abs(trgt - trgtprev)/(1.d0 + abs(trgt)) trgtprev = trgt * WHILE condition if ( errin .gt. tol .and. niterin .lt. itmax ) goto 100 return end * ====================================================================== subroutine esvve (x,z, n,p,G,Gnoise, mu,O,scale,shape,pro, Vinv, * loglik, eps) * Expectation step for model VVE * ====================================================================== implicit none integer :: n, p, G, Gnoise double precision :: x(n,p), z(n,Gnoise) double precision :: mu(p,G), O(p,p), scale(G), shape(p,G) double precision :: Vinv, pro(Gnoise) double precision :: temp1(p), temp2(p), temp3, temp4(n) integer :: i, k, j double precision :: const, logdet, loglik, eps double precision :: log2pi parameter (log2pi = 1.837877066409345d0) double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot double precision :: dummy(1) * double precision :: smalog * parameter (smalog = -708.d0) *----------------------------------------------------------------------- * check very small eigenvalues (singular covariance) if ( minval(shape) .le. sqrt(eps) .or. * minval(scale) .le. sqrt(eps) ) then loglik = FLMAX return end if const = (-dble(p)/2.d0)*log2pi do k = 1,G logdet = 0.d0 do j = 1,p logdet = logdet + ( log(shape(j,k)) + log(scale(k)) ) end do * compute mahalanobis distance for each observation * ##### NOTE: O is transposed do i = 1,n temp1 = ( x(i,:) - mu(:,k) ) * call dcopy(p, 0.d0, 0, temp2, 1) dummy(1) = 0.d0 call dcopy(p, dummy, 0, temp2, 1) call dgemv('N', p, p, 1.d0, * O, p, temp1, 1, 0.d0, temp2, 1) temp2 = temp2/sqrt(scale(k)*shape(:,k)) temp3 = ddot(p, temp2, 1, temp2, 1) temp4(i) = temp3 * temp3 contains the mahalanobis distance * z(i,k) = const - logdet/2.d0 - temp3/2.d0 + log(pro(k)) z(i,k) = const - logdet/2.d0 - temp3/2.d0 * help(cdens) --> The densities are not scaled by mixing proportions end do * z contains the log-density log(N(x|theta_k)) end do if ( pro(1) .lt. 0.d0 ) return * cdens function * noise component if (Vinv .gt. 0.d0) then * call dcopy( n, log(Vinv), 0, z(:,Gnoise), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(:,Gnoise), 1) end if * now column Gnoise of z contains log(Vinv) do i = 1,n z(i,:) = z(i,:) + log( pro ) * Numerical Recipes pag.844 temp3 = maxval(z(i,:)) temp1(1) = temp3 + log( sum(exp(z(i,:) - temp3)) ) loglik = loglik + temp1(1) * ##### NOTE: do we need to check if (z - zmax) is too small? z(i,:) = exp( z(i,:) - temp1(1) ) * re-normalize probabilities temp3 = sum( z(i,:) ) call dscal( Gnoise, 1.d0/temp3, z(i,:), 1 ) end do return end * ====================================================================== subroutine mevve ( x,z, n,p,G,Gnoise, mu,O,U,scale,shape,pro,Vinv, * loglik, eqpro,itmaxin,tolin,itmaxout,tolout,eps, * niterin,errin,niterout,errout,lwork,info) * Maximization-expectation algorithm for model VVE * ====================================================================== implicit none logical :: eqpro integer :: n,p,G,Gnoise double precision :: x(n,p), z(n,Gnoise), pro(Gnoise), Vinv double precision :: mu(p,G), O(p,p), scale(G), shape(p,G) double precision :: U(p,p,G), sumz(Gnoise), omega(G) double precision :: temp1(p,p), temp2(p,p), temp3(p,p), temp4(p) integer :: i, j, k, info, lwork integer :: itmaxin, itmaxout, niterin, niterout double precision :: tolin, tolout, errin, errout, eps, rteps double precision :: const, logdet, loglik, lkprev, wrk(lwork) double precision :: trgt, trgtprev double precision :: log2pi parameter (log2pi = 1.837877066409345d0) double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot double precision :: dummy(1) * double precision :: smalog * parameter (smalog = -708.d0) *----------------------------------------------------------------------- rteps = sqrt(eps) niterout = 0 errout = FLMAX lkprev = FLMAX/2 loglik = FLMAX const = (-dble(p)/2.d0)*log2pi * WHILE loop for EM algorithm 100 continue niterout = niterout + 1 sumz = sum(z, dim = 1) if ( eqpro ) then if ( Vinv .gt. 0 ) then pro(Gnoise) = sumz(Gnoise) / dble(n) pro(1:G) = ( 1 - pro(Gnoise) ) / dble(G) sumz = pro * dble(n) else pro = 1 / dble(G) sumz = pro * dble(n) end if else pro = sumz / dble(n) end if * re-initialise U * call dcopy(p*p*G, 0.d0, 0, U, 1) dummy(1) = 0.d0 call dcopy(p*p*G, dummy, 0, U, 1) * compute weighted scattering matrix and means do k = 1,G do j = 1,p mu(j,k) = sum(x(:,j)*z(:,k))/sumz(k) end do do i = 1,n temp1(:,1) = ( x(i,:) - mu(:,k) ) * sqrt(z(i,k)) call dger(p, p, 1.d0, temp1(:,1), 1, temp1(:,1), 1, * U(:,:,k), p) end do * U contains the weighted scattering matrix * compute the eigenvalues of U to be stored in omega temp2 = U(:,:,k) call dsyev('N', 'U', p, temp2, p, temp1(:,1), wrk, lwork, info) * now temp1 contains all the eigenvalues of U * check if dsyev converged and positive definite if ( info .ne. 0 ) then return else if ( minval(temp1(:,1)) .lt. rteps ) then info = 0 scale = FLMAX return end if end if omega(k) = temp1(p,1) end do * omega contains the largest eigenvalue of each scattering matrix * M step.......................................................... niterin = 0 errin = FLMAX trgt = FLMAX trgtprev = FLMAX/2 * covariance matrix components estimation * we consider algorithm MM 1 and MM 2 of Browne, McNicholas 2013 * with a modification in computing the orientation matrix in the MM 2 step * shape (matrix A) and orientation (matrix D) initialised in R * shape = matrix(1, p,G) * O = diag(p) * ### NOTE: we don't re-initialize shape and orientation at each * outer iteration of the EM algorithm * WHILE loop for M step 110 continue * ### NOTE: O is transposed niterin = niterin + 1 temp2 = 0.d0 temp3 = 0.d0 * temp3 will contain matrix F * Algorithm MM 1 ...................................... do k = 1,G do j = 1,p * temp1(j,:) = O(:,j) / shape(j,k) temp1(j,:) = O(j,:) / shape(j,k) end do * temp1 contains inv(A)t(D) call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, U(:,:,k),p, * 0.d0, temp2,p ) * temp2 contains inv(A) %*% t(D) %*% W temp1 = temp2 - omega(k)*temp1 temp3 = temp3 + temp1 * temp3 contains the matrix F end do * compute matrices P and R where svd(F) = P %*% B %*% t(R) call dgesvd('A','A', p,p, temp3,p, temp4, temp1,p, temp2,p, * wrk, lwork, info) * now temp1 contains matrix P, temp2 contains matrix t(R) * temp4 contains the singular values * check if dgesvd converged if ( info .ne. 0 ) return * NOTE: t(P %*% t(R)) = R %*% t(P) call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, temp2,p, * 0.d0, O,p ) * O contains TRANSPOSED orientation (matrix D of Browne, McNicholas) * ..................................................... * Algorithm MM 2 ...................................... call transpose(O, p) * O contains matrix D of Browne, McNicholas * Algorithm MM 2 temp1 = 0.d0 temp3 = 0.d0 do k = 1,G call dgemm( 'N','N', p,p,p, 1.d0, U(:,:,k),p, O,p, * 0.d0, temp1,p ) * temp1 contains W %*% D do j = 1,p temp2(:,j) = temp1(:,j) / shape(j,k) end do * temp2 contains W %*% D %*% inv(A) temp1 = temp2 - maxval( 1/shape(:,k) )*temp1 temp3 = temp3 + temp1 * temp3 contains the matrix F end do * compute matrices P and R where svd(F) = P %*% B %*% t(R) call dgesvd('A','A', p,p, temp3,p, temp4, temp1,p, temp2,p, * wrk, lwork, info) * now temp1 contains matrix P, temp2 contains matrix t(R) * temp4 contains the singular values * check if dgesvd converged if ( info .ne. 0 ) return * NOTE: t(O) = R %*% t(P) O = 0.d0 call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, temp2,p, * 0.d0, O,p ) call transpose(O, p) * O contains TRANSPOSED matrix D of Browne, McNicholas * ..................................................... * compute shape (matrix A) and target function trgt = 0.d0 do k = 1,G temp1 = 0.d0 call dgemm( 'N','N', p,p,p, 1.d0, O,p, U(:,:,k),p, * 0.d0, temp1,p ) * temp1 contains t(D) %*% W do j = 1,p shape(j,k) = ddot(p, temp1(j,:), 1, O(j,:), 1) end do * shape(:,k) = shape(:,k)/ * * exp( sum( log(shape(:,k)) ) )**(1.d0/dble(p)) shape(:,k) = shape(:,k)/sumz(k) * now shape contains matrix A (scale*A) of Celeux, Govaert pag. 785 * compute scale parameter and shape matrix A scale(k) = exp( sum( log(shape(:,k)) ) )**(1/dble(p)) shape(:,k) = shape(:,k)/scale(k) * check positive values if (minval(shape(:,k)) .lt. rteps .or. * scale(k) .lt. rteps) then info = 0 loglik = FLMAX return end if temp4(1) = 0.d0 do j = 1,p * temp2(:,j) = O(:,j) * 1.d0/shape(j,k) temp2(:,j) = O(j,:) * 1.d0/shape(j,k) temp4(1) = temp4(1) + ddot(p, temp1(j,:), 1, temp2(:,j), 1) end do trgt = trgt + temp4(1) end do * error errin = abs(trgt - trgtprev)/(1.d0 + abs(trgt)) trgtprev = trgt * WHILE condition M step if ( errin .gt. tolin .and. niterin .lt. itmaxin ) goto 110 * do k = 1,G * scale(k) = exp( sum( log(shape(:,k)) ) )**(1/dble(p)) * shape(:,k) = shape(:,k)/scale(k) * end do * ................................................................ * E step.......................................................... const = (-dble(p)/2.d0)*log2pi do k = 1,G logdet = 0.d0 do j = 1,p logdet = logdet + ( log(shape(j,k)) + log(scale(k)) ) end do * compute mahalanobis distance for each observation * ##### NOTE: O is transposed do i = 1,n temp1(:,1) = ( x(i,:) - mu(:,k) ) * call dcopy(p, 0.d0, 0, temp2(:,1), 1) dummy(1) = 0.d0 call dcopy(p, dummy, 0, temp2(:,1), 1) call dgemv('N', p, p, 1.d0, * O, p, temp1(:,1), 1, 0.d0, temp2(:,1), 1) temp2(:,1) = temp2(:,1)/sqrt(scale(k)*shape(:,k)) temp3(1,1) = ddot(p, temp2(:,1), 1, temp2(:,1), 1) * temp3 contains the mahalanobis distance z(i,k) = const - logdet/2.d0 - temp3(1,1)/2.d0 + log(pro(k)) * z(i,k) = const - logdet/2.d0 - temp3(1,1)/2.d0 end do * z contains the log-density log(N(x|theta_k)) + log(p_k) end do * noise component if (Vinv .gt. 0.d0) then z(:,Gnoise) = log(Vinv) + log( pro(Gnoise) ) end if * now column Gnoise of z contains log(Vinv) + log(p_0) loglik = 0.d0 do i = 1,n * Numerical Recipes pag.844 temp3(1,1) = maxval(z(i,:)) temp1(1,1) = temp3(1,1) + log( sum(exp(z(i,:) - temp3(1,1))) ) loglik = loglik + temp1(1,1) * ##### NOTE: do we need to check if (z - zmax) is too small? z(i,:) = exp( z(i,:) - temp1(1,1) ) * re-normalize probabilities temp3(1,1) = sum( z(i,:) ) call dscal( Gnoise, 1.d0/temp3(1,1), z(i,:), 1 ) end do * ................................................................ errout = abs(loglik - lkprev)/(1.d0 + abs(loglik)) lkprev = loglik * Chris F (June 2015): pro should not be computed in the E-step * sumz = sum(z, dim = 1) * if ( eqpro ) then * if ( Vinv .gt. 0 ) then * pro(Gnoise) = sumz(Gnoise) / dble(n) * pro(1:G) = ( 1 - pro(Gnoise) ) / dble(G) * sumz = pro * dble(n) * else * pro = 1 / dble(G) * sumz = pro * dble(n) * end if * else * pro = sumz / dble(n) * end if * check if empty components if ( minval(sumz) .lt. rteps ) then loglik = -FLMAX return end if * WHILE condition EM if ( errout .gt. tolout .and. niterout .lt. itmaxout ) goto 100 return end mclust/src/init.c0000644000175000017500000004331613507677506013640 0ustar nileshnilesh#include #include // for NULL #include /* Routines registration obtained with tools::package_native_routine_registration_skeleton("~/R/mclust") FIXME: Check these declarations against the C/Fortran source code. */ /* .Fortran calls */ extern void F77_NAME(dmvnorm)(void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(covwf)(void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(crossprodf)(void *, void *, void *, void *, void *, void *); extern void F77_NAME(d2norm)(void *, void *, void *, void *); extern void F77_NAME(es1e)(void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(es1v)(void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(eseee)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(eseei)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(eseev)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(eseii)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(eseve)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(esevi)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(esevv)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(esvee)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(esvei)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(esvev)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(esvii)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(esvve)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(esvvi)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(esvvv)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(hc1e)(void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(hc1v)(void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(hceee)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(hceii)(void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(hcvii)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(hcvvv)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mcltrw)(void *, void *, void *, void *, void *); extern void F77_NAME(me1e)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(me1ep)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(me1v)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(me1vp)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meeee)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meeeep)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meeei)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meeeip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meeev)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meeevp)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meeii)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meeiip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meeve)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meevi)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meevip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meevv)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevee)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevei)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meveip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevev)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevevp)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevii)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meviip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevve)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevvi)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevvip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevvv)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevvvp)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mnxiip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mnxxip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mnxxxp)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(ms1e)(void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(ms1ep)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(ms1v)(void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(ms1vp)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mseee)(void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mseeep)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mseei)(void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mseeip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mseev)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mseevp)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mseii)(void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mseiip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mseve)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msevi)(void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msevip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msevv)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvee)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvei)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msveip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvev)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvevp)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvii)(void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msviip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvve)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvvi)(void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvvip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvvv)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvvvp)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mvn1d)(void *, void *, void *, void *, void *); extern void F77_NAME(mvn1p)(void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mvnxii)(void *, void *, void *, void *, void *, void *); extern void F77_NAME(mvnxxi)(void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mvnxxx)(void *, void *, void *, void *, void *, void *); extern void F77_NAME(shapeo)(void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(uncholf)(void *, void *, void *, void *, void *); static const R_FortranMethodDef FortranEntries[] = { {"dmvnorm", (DL_FUNC) &F77_NAME(dmvnorm), 8}, {"covwf", (DL_FUNC) &F77_NAME(covwf), 8}, {"crossprodf", (DL_FUNC) &F77_NAME(crossprodf), 6}, {"d2norm", (DL_FUNC) &F77_NAME(d2norm), 4}, {"es1e", (DL_FUNC) &F77_NAME(es1e), 9}, {"es1v", (DL_FUNC) &F77_NAME(es1v), 9}, {"eseee", (DL_FUNC) &F77_NAME(eseee), 12}, {"eseei", (DL_FUNC) &F77_NAME(eseei), 11}, {"eseev", (DL_FUNC) &F77_NAME(eseev), 14}, {"eseii", (DL_FUNC) &F77_NAME(eseii), 10}, {"eseve", (DL_FUNC) &F77_NAME(eseve), 14}, {"esevi", (DL_FUNC) &F77_NAME(esevi), 11}, {"esevv", (DL_FUNC) &F77_NAME(esevv), 14}, {"esvee", (DL_FUNC) &F77_NAME(esvee), 14}, {"esvei", (DL_FUNC) &F77_NAME(esvei), 11}, {"esvev", (DL_FUNC) &F77_NAME(esvev), 14}, {"esvii", (DL_FUNC) &F77_NAME(esvii), 10}, {"esvve", (DL_FUNC) &F77_NAME(esvve), 14}, {"esvvi", (DL_FUNC) &F77_NAME(esvvi), 11}, {"esvvv", (DL_FUNC) &F77_NAME(esvvv), 12}, {"hc1e", (DL_FUNC) &F77_NAME(hc1e), 7}, {"hc1v", (DL_FUNC) &F77_NAME(hc1v), 8}, {"hceee", (DL_FUNC) &F77_NAME(hceee), 12}, {"hceii", (DL_FUNC) &F77_NAME(hceii), 9}, {"hcvii", (DL_FUNC) &F77_NAME(hcvii), 10}, {"hcvvv", (DL_FUNC) &F77_NAME(hcvvv), 14}, {"mcltrw", (DL_FUNC) &F77_NAME(mcltrw), 5}, {"me1e", (DL_FUNC) &F77_NAME(me1e), 12}, {"me1ep", (DL_FUNC) &F77_NAME(me1ep), 16}, {"me1v", (DL_FUNC) &F77_NAME(me1v), 12}, {"me1vp", (DL_FUNC) &F77_NAME(me1vp), 16}, {"meeee", (DL_FUNC) &F77_NAME(meeee), 14}, {"meeeep", (DL_FUNC) &F77_NAME(meeeep), 18}, {"meeei", (DL_FUNC) &F77_NAME(meeei), 14}, {"meeeip", (DL_FUNC) &F77_NAME(meeeip), 18}, {"meeev", (DL_FUNC) &F77_NAME(meeev), 18}, {"meeevp", (DL_FUNC) &F77_NAME(meeevp), 22}, {"meeii", (DL_FUNC) &F77_NAME(meeii), 13}, {"meeiip", (DL_FUNC) &F77_NAME(meeiip), 17}, {"meeve", (DL_FUNC) &F77_NAME(meeve), 26}, {"meevi", (DL_FUNC) &F77_NAME(meevi), 14}, {"meevip", (DL_FUNC) &F77_NAME(meevip), 18}, {"meevv", (DL_FUNC) &F77_NAME(meevv), 22}, {"mevee", (DL_FUNC) &F77_NAME(mevee), 26}, {"mevei", (DL_FUNC) &F77_NAME(mevei), 17}, {"meveip", (DL_FUNC) &F77_NAME(meveip), 21}, {"mevev", (DL_FUNC) &F77_NAME(mevev), 18}, {"mevevp", (DL_FUNC) &F77_NAME(mevevp), 22}, {"mevii", (DL_FUNC) &F77_NAME(mevii), 13}, {"meviip", (DL_FUNC) &F77_NAME(meviip), 17}, {"mevve", (DL_FUNC) &F77_NAME(mevve), 26}, {"mevvi", (DL_FUNC) &F77_NAME(mevvi), 14}, {"mevvip", (DL_FUNC) &F77_NAME(mevvip), 18}, {"mevvv", (DL_FUNC) &F77_NAME(mevvv), 15}, {"mevvvp", (DL_FUNC) &F77_NAME(mevvvp), 19}, {"mnxiip", (DL_FUNC) &F77_NAME(mnxiip), 10}, {"mnxxip", (DL_FUNC) &F77_NAME(mnxxip), 11}, {"mnxxxp", (DL_FUNC) &F77_NAME(mnxxxp), 11}, {"ms1e", (DL_FUNC) &F77_NAME(ms1e), 7}, {"ms1ep", (DL_FUNC) &F77_NAME(ms1ep), 11}, {"ms1v", (DL_FUNC) &F77_NAME(ms1v), 7}, {"ms1vp", (DL_FUNC) &F77_NAME(ms1vp), 11}, {"mseee", (DL_FUNC) &F77_NAME(mseee), 9}, {"mseeep", (DL_FUNC) &F77_NAME(mseeep), 13}, {"mseei", (DL_FUNC) &F77_NAME(mseei), 9}, {"mseeip", (DL_FUNC) &F77_NAME(mseeip), 13}, {"mseev", (DL_FUNC) &F77_NAME(mseev), 12}, {"mseevp", (DL_FUNC) &F77_NAME(mseevp), 16}, {"mseii", (DL_FUNC) &F77_NAME(mseii), 8}, {"mseiip", (DL_FUNC) &F77_NAME(mseiip), 12}, {"mseve", (DL_FUNC) &F77_NAME(mseve), 18}, {"msevi", (DL_FUNC) &F77_NAME(msevi), 9}, {"msevip", (DL_FUNC) &F77_NAME(msevip), 13}, {"msevv", (DL_FUNC) &F77_NAME(msevv), 14}, {"msvee", (DL_FUNC) &F77_NAME(msvee), 16}, {"msvei", (DL_FUNC) &F77_NAME(msvei), 14}, {"msveip", (DL_FUNC) &F77_NAME(msveip), 18}, {"msvev", (DL_FUNC) &F77_NAME(msvev), 14}, {"msvevp", (DL_FUNC) &F77_NAME(msvevp), 18}, {"msvii", (DL_FUNC) &F77_NAME(msvii), 8}, {"msviip", (DL_FUNC) &F77_NAME(msviip), 12}, {"msvve", (DL_FUNC) &F77_NAME(msvve), 18}, {"msvvi", (DL_FUNC) &F77_NAME(msvvi), 9}, {"msvvip", (DL_FUNC) &F77_NAME(msvvip), 13}, {"msvvv", (DL_FUNC) &F77_NAME(msvvv), 10}, {"msvvvp", (DL_FUNC) &F77_NAME(msvvvp), 14}, {"mvn1d", (DL_FUNC) &F77_NAME(mvn1d), 5}, {"mvn1p", (DL_FUNC) &F77_NAME(mvn1p), 9}, {"mvnxii", (DL_FUNC) &F77_NAME(mvnxii), 6}, {"mvnxxi", (DL_FUNC) &F77_NAME(mvnxxi), 7}, {"mvnxxx", (DL_FUNC) &F77_NAME(mvnxxx), 6}, {"shapeo", (DL_FUNC) &F77_NAME(shapeo), 7}, {"uncholf", (DL_FUNC) &F77_NAME(uncholf), 5}, {NULL, NULL, 0} }; void R_init_mclust(DllInfo *dll) { R_registerRoutines(dll, NULL, NULL, FortranEntries, NULL); R_useDynamicSymbols(dll, FALSE); } mclust/src/dmvnorm.f0000644000175000017500000000377113463252677014362 0ustar nileshnilesh* ===================================================================== subroutine dmvnorm ( x, mu, Sigma, n, p, w, hood, logdens) * * Compute log-density of multivariate Gaussian * * ===================================================================== implicit NONE integer n, p double precision hood double precision x(n,p), w(*), logdens(n) double precision mu(p), Sigma(p,p) integer info, i, j double precision detlog, umin, umax, const, temp double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMAX parameter (RTMAX = 1.340780792994260d154) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision ddot external ddot * --------------------------------------------------------------------------- * Cholesky factorization call dpotrf('U', p, Sigma, p, info) if (info .ne. 0) then w(1) = dble(info) hood = FLMAX return end if call absrng( p, Sigma, (p+1), umin, umax) if (umax .le. one .and. umax .ge. umin*RTMAX) then w(1) = zero hood = FLMAX return end if if (umax .ge. one .and. umin .le. umax*RTMIN) then w(1) = zero hood = FLMAX return end if detlog = zero do j = 1, p detlog = detlog + log(abs(Sigma(j,j))) end do const = dble(p)*pi2log/two + detlog do i = 1, n call dcopy(p, x(i,1), n, w, 1) call daxpy(p, (-one), mu(1), 1, w, 1) call dtrsv('U', 'T', 'N', p, Sigma, p, w, 1) temp = ddot(p, w, 1, w, 1)/two logdens(i) = -(const+temp) end do w(1) = zero return end mclust/vignettes/0000755000175000017500000000000014157117042013725 5ustar nileshnileshmclust/vignettes/mclust.Rmd0000644000175000017500000001777714141212044015712 0ustar nileshnilesh--- title: "A quick tour of mclust" author: "Luca Scrucca" date: "`r format(Sys.time(), '%d %b %Y')`" output: rmarkdown::html_vignette: toc: true number_sections: false css: "vignette.css" vignette: > %\VignetteIndexEntry{A quick tour of mclust} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} library(knitr) opts_chunk$set(fig.align = "center", out.width = "90%", fig.width = 6, fig.height = 5, dev.args = list(pointsize=10), par = TRUE, # needed for setting hook collapse = TRUE, # collapse input & output code in chunks warning = FALSE) knit_hooks$set(par = function(before, options, envir) { if(before && options$fig.show != "none") par(family = "sans", mar=c(4.1,4.1,1.1,1.1), mgp=c(3,1,0), tcl=-0.5) }) set.seed(1) # for exact reproducibility ``` # Introduction **mclust** is a contributed R package for model-based clustering, classification, and density estimation based on finite normal mixture modelling. It provides functions for parameter estimation via the EM algorithm for normal mixture models with a variety of covariance structures, and functions for simulation from these models. Also included are functions that combine model-based hierarchical clustering, EM for mixture estimation and the Bayesian Information Criterion (BIC) in comprehensive strategies for clustering, density estimation and discriminant analysis. Additional functionalities are available for displaying and visualizing fitted models along with clustering, classification, and density estimation results. This document gives a quick tour of **mclust** (version `r packageVersion("mclust")`) functionalities. It was written in R Markdown, using the [knitr](https://cran.r-project.org/package=knitr) package for production. See `help(package="mclust")` for further details and references provided by `citation("mclust")`. ```{r, message = FALSE, echo=-2} library(mclust) cat(mclust:::mclustStartupMessage(), sep="") ``` # Clustering ```{r} data(diabetes) class <- diabetes$class table(class) X <- diabetes[,-1] head(X) clPairs(X, class) BIC <- mclustBIC(X) plot(BIC) summary(BIC) mod1 <- Mclust(X, x = BIC) summary(mod1, parameters = TRUE) plot(mod1, what = "classification") table(class, mod1$classification) plot(mod1, what = "uncertainty") ICL <- mclustICL(X) summary(ICL) plot(ICL) LRT <- mclustBootstrapLRT(X, modelName = "VVV") LRT ``` ## Initialisation EM algorithm is used by **mclust** for maximum likelihood estimation. Initialisation of EM is performed using the partitions obtained from agglomerative hierarchical clustering. For details see `help(mclustBIC)` or `help(Mclust)`, and `help(hc)`. ```{r} (hc1 <- hc(X, modelName = "VVV", use = "SVD")) BIC1 <- mclustBIC(X, initialization = list(hcPairs = hc1)) # default summary(BIC1) (hc2 <- hc(X, modelName = "VVV", use = "VARS")) BIC2 <- mclustBIC(X, initialization = list(hcPairs = hc2)) summary(BIC2) (hc3 <- hc(X, modelName = "EEE", use = "SVD")) BIC3 <- mclustBIC(X, initialization = list(hcPairs = hc3)) summary(BIC3) ``` Update BIC by merging the best results: ```{r} BIC <- mclustBICupdate(BIC1, BIC2, BIC3) summary(BIC) plot(BIC) ``` Univariate fit using random starting points obtained by creating random agglomerations (see `help(hcRandomPairs)`) and merging best results: ```{r, echo=-1} set.seed(20181116) data(galaxies, package = "MASS") galaxies <- galaxies / 1000 BIC <- NULL for(j in 1:20) { rBIC <- mclustBIC(galaxies, verbose = FALSE, initialization = list(hcPairs = hcRandomPairs(galaxies))) BIC <- mclustBICupdate(BIC, rBIC) } summary(BIC) plot(BIC) mod <- Mclust(galaxies, x = BIC) summary(mod) ``` # Classification ## EDDA ```{r} data(iris) class <- iris$Species table(class) X <- iris[,1:4] head(X) mod2 <- MclustDA(X, class, modelType = "EDDA") summary(mod2) plot(mod2, what = "scatterplot") plot(mod2, what = "classification") ``` ## MclustDA ```{r} data(banknote) class <- banknote$Status table(class) X <- banknote[,-1] head(X) mod3 <- MclustDA(X, class) summary(mod3) plot(mod3, what = "scatterplot") plot(mod3, what = "classification") ``` ## Cross-validation error ```{r} cv <- cvMclustDA(mod2, nfold = 10) str(cv) unlist(cv[3:6]) cv <- cvMclustDA(mod3, nfold = 10) str(cv) unlist(cv[3:6]) ``` # Density estimation ## Univariate ```{r} data(acidity) mod4 <- densityMclust(acidity) summary(mod4) plot(mod4, what = "BIC") plot(mod4, what = "density", data = acidity, breaks = 15) plot(mod4, what = "diagnostic", type = "cdf") plot(mod4, what = "diagnostic", type = "qq") ``` ## Multivariate ```{r} data(faithful) mod5 <- densityMclust(faithful) summary(mod5) plot(mod5, what = "BIC") plot(mod5, what = "density", type = "hdr", data = faithful, points.cex = 0.5) plot(mod5, what = "density", type = "persp") ``` # Bootstrap inference ```{r} boot1 <- MclustBootstrap(mod1, nboot = 999, type = "bs") summary(boot1, what = "se") summary(boot1, what = "ci") ``` ```{r, echo=-1, fig.width=6, fig.height=7} par(mfrow=c(4,3)) plot(boot1, what = "pro") plot(boot1, what = "mean") ``` ```{r} boot4 <- MclustBootstrap(mod4, nboot = 999, type = "bs") summary(boot4, what = "se") summary(boot4, what = "ci") ``` ```{r, echo=-1} par(mfrow=c(2,2)) plot(boot4, what = "pro") plot(boot4, what = "mean") ``` # Dimension reduction ## Clustering ```{r} mod1dr <- MclustDR(mod1) summary(mod1dr) plot(mod1dr, what = "pairs") plot(mod1dr, what = "boundaries", ngrid = 200) mod1dr <- MclustDR(mod1, lambda = 1) summary(mod1dr) plot(mod1dr, what = "scatterplot") plot(mod1dr, what = "boundaries", ngrid = 200) ``` ## Classification ```{r} mod2dr <- MclustDR(mod2) summary(mod2dr) plot(mod2dr, what = "scatterplot") plot(mod2dr, what = "boundaries", ngrid = 200) mod3dr <- MclustDR(mod3) summary(mod3dr) plot(mod3dr, what = "scatterplot") plot(mod3dr, what = "boundaries", ngrid = 200) ```
# Using colorblind-friendly palettes Most of the graphs produced by **mclust** use colors that by default are defined in the following options: ```{r} mclust.options("bicPlotColors") mclust.options("classPlotColors") ``` The first option controls colors used for plotting BIC, ICL, etc. curves, whereas the second option is used to assign colors for indicating clusters or classes when plotting data. Starting with R version 4.0, the function \code{palette.colors()} can be used for retrieving colors from some pre-defined palettes. For instance ```{r, eval=FALSE} palette.colors(palette = "Okabe-Ito") ``` returns a color-blind-friendly palette for individuals suffering from protanopia or deuteranopia, the two most common forms of inherited color blindness. For earlier versions of R such palette can be defined as: ```{r} cbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999") ``` and then assigned to the **mclust** options as follows: ```{r} bicPlotColors <- mclust.options("bicPlotColors") bicPlotColors[1:14] <- c(cbPalette, cbPalette[1:5]) mclust.options("bicPlotColors" = bicPlotColors) mclust.options("classPlotColors" = cbPalette[-1]) clPairs(iris[,-5], iris$Species) mod <- Mclust(iris[,-5]) plot(mod, what = "BIC") plot(mod, what = "classification") ``` If needed, users can easily define their own palettes following the same procedure outlined above.

# References Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, *The R Journal*, 8/1, pp. 205-233. https://journal.r-project.org/archive/2016/RJ-2016-021/RJ-2016-021.pdf Fraley C. and Raftery A. E. (2002) Model-based clustering, discriminant analysis and density estimation, *Journal of the American Statistical Association*, 97/458, pp. 611-631. Fraley C., Raftery A. E., Murphy T. B. and Scrucca L. (2012) mclust Version 4 for R: Normal Mixture Modeling for Model-Based Clustering, Classification, and Density Estimation. *Technical Report* No. 597, Department of Statistics, University of Washington. ---- ```{r} sessionInfo() ```mclust/vignettes/vignette.css0000644000175000017500000001152413766666463016314 0ustar nileshnilesh@charset "UTF-8"; body { background-color: #fff; margin: 1em auto; max-width: 700px; overflow: visible; padding-left: 2em; padding-right: 2em; font-family: "Open Sans", Helvetica, sans-serif; font-size: 14px; line-height: 1.35; } #header { text-align: center; } #TOC { clear: both; margin: 10px 10px 20px 10px; padding: 4px; width: 400px; border: 1px solid #CCCCCC; border-radius: 5px; background-color: #f6f6f6; font-size: 12px; line-height: 1.3; } #TOC .toctitle { font-weight: bold; font-size: 15px; margin-left: 5px; } #TOC ul { padding-left: 40px; margin-left: -1.5em; margin-top: 5px; margin-bottom: 5px; } #TOC ul ul { margin-left: -2em; } #TOC li { line-height: 16px; list-style: square outside; } table { margin: 1em auto; border-width: 1px; border-color: #DDDDDD; border-top: 1px solid #111; border-bottom: 1px solid #111; } th { border-bottom: 1px solid #111; } table th { border-width: 1px; padding: 5px; } table td { border-width: 1px; line-height: 16px; padding: 5px 5px; } table thead, table tr.even { background-color: #f7f7f7; } p { margin: 0.5em 0; } blockquote { background-color: #f6f6f6; padding: 0.25em 0.75em; } hr { border-style: solid; border: none; border-top: 1px solid #777; margin: 28px 0; } dl { margin-left: 0; } dl dd { margin-bottom: 13px; margin-left: 13px; } dl dt { font-weight: bold; } ul { margin-top: 0; } ul li { list-style: circle outside; } ul ul { margin-bottom: 0; } figure { margin: 0; text-align: center; } div.figure { text-align: center; } img { background-color: #FFFFFF; padding: 2px; border: 1px solid #DDDDDD; border-radius: 3px; /* border: 1px solid #CCCCCC; */ margin: 0 5px; } h1 { padding-top: 10px; padding-bottom: 10px; border-bottom: 3px solid #f7f7f7; margin-top: 0; font-size: 120%; line-height: 10px; color: rgb(33,33,33); } h1.title { font-size: 200%; line-height: 40px; } h2 { padding-top: 10px; padding-bottom: 5px; border-bottom: 3px solid #f7f7f7; margin-left: 4px; font-size: 110%; color: rgb(33,33,33); } h2.title { font-size: 110%; line-height: 10px; } h3 { border-bottom: 2px solid #f7f7f7; padding-top: 10px; margin-left: 8px; font-size: 105%; color: rgb(33,33,33); } h4 { border-bottom: 1px solid #f7f7f7; margin-left: 12px; font-size: 100%; color: rgb(33,33,33); } h4.author { border-bottom: 0; color: rgb(77,77,77); } h4.date { border-bottom: 1px solid #f7f7f7; font-size: 100%; color: rgb(77,77,77); } h5, h6 { border-bottom: 1px solid #ccc; font-size: 105%; color: rgb(33,33,33); } address{ font-weight: bold; color: rgb(77,77,77); margin-left: 8px; font-size: 100%; } a { color: rgb(24,116,205); text-decoration: none; } a:hover { color: rgb(28,134,238); } a:visited { color: rgb(24,116,205); } a:visited:hover { color: rgb(28,134,238); } a[href^="http:"] { text-decoration: underline; } a[href^="https:"] { text-decoration: underline; } pre, code { background-color: #F8F8F8; border-radius: 3px; color: #333333; white-space: pre-wrap; /* Wrap long lines */ } pre { border-radius: 3px; margin: 5px 0px 10px 0px; padding: 10px; } pre:not([class]) { background-color: #F8F8F8; } code { font-family: Consolas, Monaco, monospace; color: rgb(0,0,0); font-size: 85%; } p > code, li > code { padding: 2px 0; } strong { font-weight: bold; } hi { font-weight: bold; color: rgb(28,134,238); } /* Class described in https://benjeffrey.com/posts/pandoc-syntax-highlighting-css Colours from https://gist.github.com/robsimmons/1172277 */ code span.co { color: rgb(112,112,112); font-style: normal; } /* Comment */ code span.do { color: rgb(50,50,50); font-style: normal; } /* Documentation */ code span.an { font-style: italic; } /* Annotation */ code span.cf { font-weight: bold; } /* ControlFlow */ code span.cv { font-style: italic; } /* CommentVar */ code span.dt { color: #4075AD; } /* DataType */ code span.at { color: #4075AD; } /* Function args */ code span.dv { color: rgb(85,85,85); } /* DecVal (decimal values) */ code span.er { color: rgb(166,23,23); font-weight: bold; } /* Error */ code span.in { font-style: italic; } /* Information */ code span.kw { color: rgb(23,74,133); font-weight: bold; } /* Keyword */ code span.fu { color: rgb(23,74,133); font-weight: bold; } /* Function calls */ code span.al { color: rgb(255,255,255); font-weight: bold; } /* Alert */ code span.pp { font-weight: bold; } /* Preprocessor */ code span.cn { color: rgb(0,0,0); font-weight: normal; } /* Logical */ code span.st { color: rgb(85,85,85); font-style: italic; } /* String */ code span.ot { color: rgb(0,0,0); font-style: normal; } /* R code */ code span.wa { font-style: italic; } /* Warning */ mclust/build/0000755000175000017500000000000014157117041013013 5ustar nileshnileshmclust/build/vignette.rds0000644000175000017500000000032014157117041015345 0ustar nileshnileshb```b`a@&0`b fd`a\9%zA)h2b %E i h3JrsФ8`FC䁀 Ik^bnj1fvԂԼ?ZY_Ӄ -3'foHf e2|s  =XQĒD"~smclust/R/0000755000175000017500000000000014156710710012116 5ustar nileshnileshmclust/R/mclustssc.R0000644000175000017500000002453614053123501014263 0ustar nileshnilesh# Semi-Supervised Classification MclustSSC <- function(data, class, G = NULL, modelNames = NULL, prior = NULL, control = emControl(), warn = mclust.options("warn"), verbose = interactive(), ...) { call <- match.call() data <- data.matrix(data) n <- nrow(data) d <- ncol(data) oneD <- if(d==1) TRUE else FALSE # class <- factor(class, exclude = NA) nclass <- nlevels(class) # if(is.null(G)) G <- nclass if(any(G < nclass)) stop("G cannot be smaller than the number of classes") G <- G[G >= nclass][1] # if(is.null(modelNames)) { modelNames <- if(oneD) c("E", "V") else mclust.options("emModelNames") } # if(n <= d) { m <- match(c("EEE","EEV","VEV","VVV"), mclust.options("emModelNames"), nomatch=0) modelNames <- modelNames[-m] } nModelNames <- length(modelNames) if(verbose) { cat("fitting ...\n") flush.console() pbar <- txtProgressBar(min = 0, max = nModelNames, style = 3) on.exit(close(pbar)) ipbar <- 0 } args <- list(data = data, class = class, G = G, verbose = FALSE, ...) Model <- NULL BIC <- rep(as.double(NA), length(modelNames)) for(m in seq(nModelNames)) { mod <- try(do.call("MclustSSC.fit", c(args, list(modelName = modelNames[m]))), silent = TRUE) if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } if(class(mod) == "try-error") next() BIC[m] <- mod$bic if(!is.na(BIC[m]) && BIC[m] >= max(BIC, na.rm = TRUE)) Model <- mod } if(all(is.na(BIC))) { warning("No model(s) can be estimated!!") return() } BIC <- matrix(BIC, nrow = 1, dimnames = list(G, modelNames)) out <- c(list(call = call, data = data, class = class, BIC = BIC, control = control), Model) orderedNames <- c("call", "data", "class", "modelName", "G", "n", "d", "BIC", "loglik", "df", "bic", "parameters", "z", "classification", "prior", "control") out <- structure(out[orderedNames], class = "MclustSSC") return(out) } print.MclustSSC <- function(x, ...) { cat("\'", class(x)[1], "\' model object:\n", sep = "") cat("\n") catwrap("\nAvailable components:\n") print(names(x)) # str(x, max.level = 2, give.attr = FALSE, strict.width = "wrap") invisible(x) } summary.MclustSSC <- function(object, parameters = FALSE, ...) { # collect info nclass <- nlevels(object$class) classes <- levels(object$class) G <- object$G printParameters <- parameters class <- object$class classif <- object$classification err <- classError(class[!is.na(class)], classif[!is.na(class)])$errorRate # n <- c(table(class, useNA = "always")) n <- tabulate(class, nbins = G) names(n) <- levels(object$classification) if(any(is.na(class))) n <- c(n, "" = sum(is.na(class))) tab <- table("Class" = class, "Predicted" = classif, useNA = "ifany") noise <- FALSE # todo: # noise <- if(is.na(object$hypvol)) FALSE else object$hypvol pro <- object$parameters$pro if(is.null(pro)) pro <- 1 names(pro) <- if(noise) c(classes,0) else classes mean <- object$parameters$mean colnames(mean) <- names(pro) if(object$d > 1) { sigma <- object$parameters$variance$sigma } else { sigma <- rep(object$parameters$variance$sigmasq, object$G)[1:object$G] names(sigma) <- names(mean) } obj <- list(n = n, d = object$d, loglik = object$loglik, df = object$df, bic = object$bic, nclass = nclass, classes = classes, G = object$G, modelName = object$modelName, pro = pro, mean = mean, variance = sigma, noise = noise, prior = object$prior, tab = tab, err = err, printParameters = printParameters) class(obj) <- "summary.MclustSSC" return(obj) } print.summary.MclustSSC <- function(x, digits = getOption("digits"), ...) { title <- paste("Gaussian finite mixture model for semi-supervised classification") txt <- paste(rep("-", min(nchar(title), getOption("width"))), collapse = "") catwrap(txt) catwrap(title) catwrap(txt) cat("\n") tab <- data.frame("log-likelihood" = x$loglik, "n" = sum(x$n), "df" = x$df, "BIC" = x$bic, row.names = "", check.names = FALSE) print(tab, digits = digits) tab <- data.frame("n" = x$n, "%" = round(x$n/sum(x$n)*100,2), "Model" = c(rep(x$modelName, x$G), ""), "G" = c(rep(1, x$G), ""), check.names = FALSE, row.names = ifelse(is.na(names(x$n)), "", names(x$n))) tab <- as.matrix(tab) names(dimnames(tab)) <- c("Classes", "") print(tab, quote = FALSE, right = TRUE) if(!is.null(x$prior)) { cat("\nPrior: ") cat(x$prior$functionName, "(", paste(names(x$prior[-1]), x$prior[-1], sep = " = ", collapse = ", "), ")", sep = "") cat("\n") } if(x$printParameters) { cat("\nMixing probabilities:\n") print(x$pro, digits = digits) cat("\nMeans:\n") print(x$mean, digits = digits) cat("\nVariances:\n") if(x$d > 1) { for(g in 1:x$G) { cat(names(x$pro)[g], "\n") print(x$variance[,,g], digits = digits) } } else print(x$variance, digits = digits) if(x$noise) { cat("\nHypervolume of noise component:\n") cat(signif(x$noise, digits = digits), "\n") } } cat("\nClassification summary:\n") print(x$tab) invisible(x) } MclustSSC.fit <- function(data, class, G = NULL, modelName = NULL, prior = NULL, control = emControl(), warn = NULL, verbose = FALSE, ...) { data <- data.matrix(data) n <- nrow(data) p <- ncol(data) class <- factor(class, exclude = NA) nclass <- nlevels(class) known.class <- which(!is.na(class)) unknown.class <- which(is.na(class)) if(is.null(G)) G <- nclass if(is.null(modelName)) stop("modelName must be specified!") # create z matrix by filling with 0/1 for known labels and 1/G for unlabelled data z <- matrix(0.0, nrow = n, ncol = G) for(k in 1:nclass) z[class == levels(class)[k], k] <- 1 z[unknown.class,] <- 1/G z0 <- z[known.class,,drop=FALSE] loglik0 <- -Inf criterion <- TRUE iter <- 0 if(verbose) cat("modelName =", modelName, "\n") # while(criterion) { iter <- iter + 1 fit.m <- do.call("mstep", list(data = data, z = z, modelName = modelName, prior = prior, control = control, warn = warn)) fit.e <- do.call("estep", c(list(data = data, control = control, warn = warn), fit.m)) z <- fit.e$z z[known.class,] <- z0 ldens <- do.call("dens", c(list(data = data[-known.class,,drop=FALSE], logarithm = TRUE), fit.m)) lcdens <- do.call("cdens", c(list(data = data[known.class,,drop=FALSE], logarithm = TRUE), fit.m)) lcdens <- sweep(lcdens, MARGIN = 2, FUN = "+", STATS = log(fit.m$parameters$pro)) loglik <- sum(ldens) + sum(lcdens * z0) criterion <- ( iter < control$itmax[1] & (loglik - loglik0) > control$tol[1] ) loglik0 <- loglik if(verbose) cat("iter =", iter, " loglik =", loglik0, "\n") } fit <- fit.m fit$loglik <- loglik fitclass <- map(fit$z, warn = FALSE) # assign labels of known classes fitclass <- factor(fitclass) labels <- levels(class) if(G > nclass) labels <- c(labels, paste0("class", seq(nclass+1,G))) levels(fitclass) <- labels fit$classification <- fitclass fit$df <- (G-1) + p*nclass + nVarParams(fit$modelName, d = p, G = nclass) fit$bic <- 2*fit$loglik - fit$df*log(n) # return(fit) } plot.MclustSSC <- function(x, what = c("BIC", "classification", "uncertainty"), ...) { object <- x # Argh. Really want to use object anyway if(!inherits(object, "MclustSSC")) stop("object not of class 'MclustSSC'") class(object) <- c(class(object), "Mclust") what <- match.arg(what, several.ok = TRUE) oldpar <- par(no.readonly = TRUE) plot.MclustSSC.bic <- function(...) { dotchart(rev(object$BIC[1,]), pch = 19, xlab = paste("BIC for G =", object$G), ...) } if(interactive() & length(what) > 1) { title <- "Model-based semi-supervised classification plots:" # present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) while(choice != 0) { if(what[choice] == "BIC") plot.MclustSSC.bic(...) if(what[choice] == "classification") plot.Mclust(object, what = "classification", ...) if(what[choice] == "uncertainty") plot.Mclust(object, what = "uncertainty", ...) # re-present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) } } else { if(any(what == "BIC")) plot.MclustSSC.bic(...) if(any(what == "classification")) plot.Mclust(object, what = "classification", ...) if(any(what == "uncertainty")) plot.Mclust(object, what = "uncertainty", ...) } invisible() } predict.MclustSSC <- function(object, newdata, ...) { if(!inherits(object, "MclustSSC")) stop("object not of class 'MclustSSC'") if(missing(newdata)) { newdata <- object$data } newdata <- as.matrix(newdata) if(ncol(object$data) != ncol(newdata)) { stop("newdata must match ncol of object data") } # object$data <- newdata z <- do.call("cdens", c(object, list(logarithm = TRUE))) pro <- object$parameters$pro logpro <- log(pro) - log(sum(pro)) noise <- FALSE # (!is.na(object$hypvol)) z <- if(noise) cbind(z, log(object$parameters$Vinv)) else cbind(z) # drop redundant attributes z <- sweep(z, MARGIN = 2, FUN = "+", STATS = logpro) z <- sweep(z, MARGIN = 1, FUN = "-", STATS = apply(z, 1, logsumexp)) z <- exp(z) cl <- c(levels(object$classification), if(noise) 0) colnames(z) <- cl cl <- factor(cl[apply(z, 1, which.max)], levels = cl) out <- list(classification = cl, z = z) return(out) } mclust/R/options.R0000644000175000017500000000513013735311265013737 0ustar nileshnilesh############################################################################# .mclust <- structure(list( emModelNames = c("EII", "VII", "EEI", "VEI", "EVI", "VVI", "EEE", "VEE", "EVE", "VVE", "EEV", "VEV", "EVV", "VVV"), # in mclust version <= 4.x # emModelNames = c("EII", "VII", "EEI", "VEI", "EVI", "VVI", "EEE", "EEV", "VEV", "VVV"), hcModelName = "VVV", hcUse = "SVD", subset = 2000, fillEllipses = FALSE, bicPlotSymbols = structure(c(17, 2, 16, 10, 13, 1, 15, 8, 5, 9, 12, 7, 14, 0, 17, 2), .Names = c("EII", "VII", "EEI", "EVI", "VEI", "VVI", "EEE", "VEE", "EVE", "VVE", "EEV", "VEV", "EVV", "VVV", "E", "V")), bicPlotColors = structure( { pal <- grDevices::colorRampPalette(c("forestgreen", "royalblue1", "red3"), space = "Lab") c("gray", "black", pal(12), "gray", "black") }, .Names = c("EII", "VII", "EEI", "EVI", "VEI", "VVI", "EEE", "VEE", "EVE", "VVE", "EEV", "VEV", "EVV", "VVV", "E", "V")), classPlotSymbols = c(16, 0, 17, 3, 15, 4, 1, 8, 2, 7, 5, 9, 6, 10, 11, 18, 12, 13, 14), classPlotColors = c("dodgerblue2", "red3", "green3", "slateblue", "darkorange", "skyblue1", "violetred4", "forestgreen", "steelblue4", "slategrey", "brown", "black", "darkseagreen", "darkgoldenrod3", "olivedrab", "royalblue", "tomato4", "cyan2", "springgreen2"), warn = FALSE)) mclust.options <- function(...) { current <- get(".mclust", envir = asNamespace("mclust")) if(nargs() == 0) return(current) args <- list(...) if(length(args) == 1 && is.null(names(args))) { arg <- args[[1]] switch(mode(arg), list = args <- arg, character = return(.mclust[[arg]]), stop("invalid argument: ", dQuote(arg))) } if(length(args) == 0) return(current) n <- names(args) if (is.null(n)) stop("options must be given by name") # changed <- current[n] current[n] <- args assign(".mclust", current, envir = asNamespace("mclust")) # da provare # assignInNamespace(".mclust", current, ns = asNamespace("mclust")) invisible(current) } mclust/R/icl.R0000644000175000017500000000625013656733162013025 0ustar nileshnilesh## ## Integrated Complete-data Likelihood (ICL) Criterion ## icl <- function(object, ...) UseMethod("icl") icl.Mclust <- function(object, ...) { n <- object$n # G <- object$G + ifelse(is.na(object$hypvol),0,1) z <- object$z if(is.null(z)) z <- matrix(1, nrow = n, ncol = 1) C <- matrix(0, n, ncol(z)) for(i in 1:n) C[i, which.max(z[i,])] <- 1 object$bic + 2*sum(C * ifelse(z > 0, log(z), 0)) } icl.MclustDA <- function(object, ...) { n <- object$n z <- predict(object)$z df <- object$df if(is.null(z)) z <- matrix(1, nrow = n, ncol = 1) C <- matrix(0, n, ncol(z)) for(i in 1:n) C[i, which.max(z[i,])] <- 1 object$bic + 2*sum(C * ifelse(z > 0, log(z), 0)) } mclustICL <- function(data, G = NULL, modelNames = NULL, initialization = list(hcPairs=NULL, subset=NULL, noise=NULL), x = NULL, ...) { call <- match.call() data <- data.matrix(data) n <- nrow(data) d <- ncol(data) mc <- match.call(expand.dots = TRUE) mc[[1]] <- as.name("mclustBIC") mc[[2]] <- data BIC <- eval(mc, parent.frame()) class(BIC) <- "mclustBIC" G <- attr(BIC, "G") modelNames <- attr(BIC, "modelNames") ICL <- matrix(NA, nrow = length(G), ncol = length(modelNames)) mostattributes(ICL) <- attributes(BIC) if(!is.null(x)) { r <- match(as.character(G), rownames(x), nomatch = 0) c <- match(modelNames, colnames(x), nomatch = 0) ICL[r,c] <- BIC[r,c] } for(i in 1:nrow(ICL)) { for(j in 1:ncol(ICL)) { if(is.na(BIC[i,j])) next() # not fitted if(!is.na(ICL[i,j])) next() # already available Sumry <- summary(BIC, data, G = G[i], modelNames = modelNames[j]) ICL[i,j] <- icl.Mclust(Sumry) } } class(ICL) <- "mclustICL" attr(ICL, "criterion") <- "ICL" return(ICL) } print.mclustICL <- function (x, pick = 3, ...) { subset <- !is.null(attr(x, "subset")) oldClass(x) <- attr(x, "args") <- NULL attr(x, "criterion") <- NULL attr(x, "control") <- attr(x, "initialization") <- NULL attr(x, "oneD") <- attr(x, "warn") <- attr(x, "Vinv") <- NULL attr(x, "prior") <- attr(x, "G") <- attr(x, "modelNames") <- NULL ret <- attr(x, "returnCodes") == -3 n <- attr(x, "n") d <- attr(x, "d") attr(x, "returnCodes") <- attr(x, "n") <- attr(x, "d") <- NULL oldClass(x) <- attr(x, "args") <- attr(x, "criterion") <- NULL catwrap("Integrated Complete-data Likelihood (ICL) criterion:") print(x, ...) cat("\n") catwrap(paste("Top", pick, "models based on the ICL criterion:")) print(pickBIC(x, pick), ...) invisible() } summary.mclustICL <- function(object, G, modelNames, ...) { if(!missing(G)) object <- object[rownames(object) %in% G,,drop=FALSE] if(!missing(modelNames)) object <- object[,colnames(object) %in% modelNames,drop=FALSE] structure(pickBIC(object, ...), class = "summary.mclustICL") } print.summary.mclustICL <- function(x, digits = getOption("digits"), ...) { cat("Best ICL values:\n") x <- drop(as.matrix(x)) x <- rbind(ICL = x, "ICL diff" = x - max(x)) print(x, digits = digits) invisible() } plot.mclustICL <- function(x, ylab = "ICL", ...) { plot.mclustBIC(x, ylab = ylab, ...) } mclust/R/zzz.R0000644000175000017500000000156713377256700013117 0ustar nileshnilesh# .onLoad <- function(libname, pkgname) # { # library.dynam("mclust", pkgname, libname) # } mclustStartupMessage <- function() { # Startup message obtained as # > figlet -f slant MCLUST msg <- c(paste0( " __ ___________ __ _____________ / |/ / ____/ / / / / / ___/_ __/ / /|_/ / / / / / / / /\\__ \\ / / / / / / /___/ /___/ /_/ /___/ // / /_/ /_/\\____/_____/\\____//____//_/ version ", packageVersion("mclust")), "\nType 'citation(\"mclust\")' for citing this R package in publications.") return(msg) } .onAttach <- function(lib, pkg) { # unlock .mclust variable allowing its modification unlockBinding(".mclust", asNamespace("mclust")) # startup message msg <- mclustStartupMessage() if(!interactive()) msg[1] <- paste("Package 'mclust' version", packageVersion("mclust")) packageStartupMessage(msg) invisible() } mclust/R/mclustda.R0000644000175000017500000011775414156711630014076 0ustar nileshnileshMclustDA <- function(data, class, G = NULL, modelNames = NULL, modelType = c("MclustDA", "EDDA"), prior = NULL, control = emControl(), initialization = NULL, warn = mclust.options("warn"), verbose = interactive(), ...) { call <- match.call() mc <- match.call(expand.dots = TRUE) # if(missing(data)) stop("no training data provided!") data <- data.matrix(data) n <- nrow(data) p <- ncol(data) oneD <- if(p==1) TRUE else FALSE # if(missing(class)) stop("class labels for training data must be provided!") class <- as.factor(class) classLabel <- levels(class) ncl <- nlevels(class) if(ncl == 1) G <- 1 prop <- as.vector(table(class))/n names(prop) <- classLabel # modelType <- match.arg(modelType, choices = eval(formals(MclustDA)$modelType), several.ok = FALSE) # if(is.null(G)) { G <- rep(list(1:5), ncl) } else if(is.list(G)) { G <- lapply(G, sort) } else { G <- rep(list(sort(G)), ncl) } if(any(unlist(G) <= 0)) stop("G must be positive") # if(is.null(modelNames)) { if(oneD) modelNames <- c("E", "V") else modelNames <- mclust.options("emModelNames") } if(n <= p) { m <- match(c("EEE","EEV","VEV","VVV"), mclust.options("emModelNames"), nomatch=0) modelNames <- modelNames[-m] } if(!is.list(modelNames)) { modelNames <- rep(list(modelNames), ncl) } # # hcUse <- mclust.options("hcUse") # mclust.options("hcUse" = "VARS") # on.exit(mclust.options("hcUse" = hcUse)) # if(modelType == "EDDA") { mc[[1]] <- as.name("mstep") mc$class <- mc$G <- mc$modelNames <- mc$modelType <- NULL mc$warn <- FALSE mc$z <- unmap(as.numeric(class)) G <- 1 modelNames <- unique(unlist(modelNames)) BIC <- rep(NA, length(modelNames)) Model <- NULL if(verbose) { cat("fitting ...\n") flush.console() pbar <- txtProgressBar(min = 0, max = length(modelNames), style = 3) on.exit(close(pbar)) ipbar <- 0 } for(i in seq(modelNames)) { mc$modelName <- as.character(modelNames[i]) mStep <- eval(mc, parent.frame()) eStep <- do.call("estep", c(mStep, list(data = data, warn = FALSE))) BIC[i] <- do.call("bic", c(eStep, list(equalPro = TRUE))) if(!is.na(BIC[i]) && BIC[i] >= max(BIC, na.rm = TRUE)) Model <- eStep if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } } if(all(is.na(BIC))) { warning("No model(s) can be estimated!!") return() } names(BIC) <- modelNames bic <- max(BIC, na.rm = TRUE) loglik <- Model$loglik df <- (2*loglik - bic)/log(Model$n) # there are (nclass-1) more df than really needed # equal to logLik(object) but faster Model <- c(Model, list("BIC" = BIC)) Models <- rep(list(Model), ncl) names(Models) <- classLabel for(l in 1:ncl) { I <- (class == classLabel[l]) Models[[l]]$n <- sum(I) Models[[l]]$G <- 1 Models[[l]]$bic <- Models[[l]]$loglik <- NULL par <- Models[[l]]$parameters par$pro <- 1 par$mean <- if(oneD) par$mean[l] else par$mean[,l,drop=FALSE] par$variance$G <- 1 if(oneD) { # par$variance$sigma <- par$variance$sigma[l] if(length(par$variance$sigmasq) > 1) par$variance$sigmasq <- par$variance$sigmasq[l] else par$variance$sigmasq <- par$variance$sigmasq } else { par$variance$sigma <- par$variance$sigma[,,l,drop=FALSE] if(length(par$variance$sigmasq) > 1) par$variance$sigmasq <- par$variance$sigmasq[l] if(length(par$variance$scale) > 1) par$variance$scale <- par$variance$scale[l] if(length(dim(par$variance$shape)) > 1) par$variance$shape <- par$variance$shape[,l] if(length(dim(par$variance$orientation)) > 2) # LS was > 1 par$variance$orientation <- par$variance$orientation[,,l,drop=FALSE] if(length(dim(par$variance$cholSigma)) > 2) par$variance$cholSigma <- par$variance$cholSigma[,,l,drop=FALSE] if(length(dim(par$variance$cholsigma)) > 2) par$variance$cholsigma <- par$variance$cholsigma[,,l,drop=FALSE] } Models[[l]]$parameters <- par Models[[l]]$z <- NULL # z[I,,drop=FALSE] Models[[l]]$classification <- rep(1, sum(I)) # apply(z[I,,drop=FALSE], 1, which.max) Models[[l]]$uncertainty <- NULL # 1 - apply(z[I,], 1, max) Models[[l]]$observations <- which(I) } } else { # modelType == "MclustDA" i.e. different covariance structures for each class Models <- rep(list(NULL), ncl) mc[[1]] <- as.name("mclustBIC") mc$class <- NULL for(l in 1:ncl) { I <- (class == classLabel[l]) mc[[2]] <- data[I,] mc$G <- G[[l]] mc$modelNames <- as.character(modelNames[[l]]) if(verbose) cat(paste0("Class ", classLabel[l], ": ")) BIC <- eval(mc, parent.frame()) # slightly adjust parameters if none of the models can be fitted while(all(is.na(BIC))) { if(length(mc$modelNames) == 1) { j <- which(mc$modelNames == mclust.options("emModelNames")) if(j == 1) mc$G <- mc$G - 1 else mc$modelNames <- mclust.options("emModelNames")[j-1] } else { mc$G <- mc$G - 1 } BIC <- eval(mc, parent.frame()) } SUMMARY <- summary(BIC, data[I,]) SUMMARY$bic <- BIC names(SUMMARY)[which(names(SUMMARY) == "bic")] <- "BIC" Models[[l]] <- c(SUMMARY, list(observations = which(I))) } bic <- loglik <- df <- NULL } names(Models) <- classLabel Models$Vinv <- NULL out <- list(call = call, data = data, class = class, type = modelType, n = n, d = p, prop = prop, models = Models, bic = bic, loglik = loglik, df = df) out <- structure(out, prior = prior, control = control, class = "MclustDA") if(modelType == "MclustDA") { l <- logLik.MclustDA(out, data) out$loglik <- as.numeric(l) out$df <- attr(l, "df") out$bic <- 2*out$loglik - log(n)*out$df } return(out) } print.MclustDA <- function(x, ...) { cat("\'", class(x)[1], "\' model object:\n", sep = "") models <- x$models nclass <- length(models) n <- sapply(1:nclass, function(i) models[[i]]$n) M <- sapply(1:nclass, function(i) models[[i]]$modelName) G <- sapply(1:nclass, function(i) models[[i]]$G) out <- data.frame(n = n, Model = M, G = G) rownames(out) <- names(models) out <- as.matrix(out) names(dimnames(out)) <- c("Classes", "") print(out, quote = FALSE, right = TRUE) cat("\n") catwrap("\nAvailable components:\n") print(names(x)) # str(x, max.level = 2, give.attr = FALSE, strict.width = "wrap") invisible(x) } summary.MclustDA <- function(object, parameters = FALSE, newdata, newclass, ...) { # collect info models <- object$models nclass <- length(models) classes <- names(models) n <- sapply(1:nclass, function(i) models[[i]]$n) G <- sapply(1:nclass, function(i) models[[i]]$G) modelName <- sapply(1:nclass, function(i) models[[i]]$modelName) prior <- attr(object, "prior") printParameters <- parameters par <- getParameters.MclustDA(object) class <- object$class data <- object$data pred <- predict(object, newdata = data, ...) ce <- mean(class != pred$classification) brier <- BrierScore(pred$z, class) tab <- try(table(class, pred$classification)) if(class(tab) == "try-error") { ce <- tab <- NA } else { names(dimnames(tab)) <- c("Class", "Predicted") } tab.newdata <- ce.newdata <- brier.newdata <- NULL if(!missing(newdata) & !missing(newclass)) { pred.newdata <- predict(object, newdata = newdata, ...) if(missing(newclass)) { tab.newdata <- table(pred.newdata$classification) names(dimnames(tab.newdata)) <- "Predicted" } else { tab.newdata <- table(newclass, pred.newdata$classification) names(dimnames(tab.newdata)) <- c("Class", "Predicted") ce.newdata <- mean(newclass != pred.newdata$classification) brier.newdata <- BrierScore(pred.newdata$z, newclass) } } obj <- list(type = object$type, n = n, d = object$d, loglik = object$loglik, df = object$df, bic = object$bic, nclass = nclass, classes = classes, G = G, modelName = modelName, prop = object$prop, parameters = par, prior = prior, tab = tab, ce = ce, brier = brier, tab.newdata = tab.newdata, ce.newdata = ce.newdata, brier.newdata = brier.newdata, printParameters = printParameters) class(obj) <- "summary.MclustDA" return(obj) } print.summary.MclustDA <- function(x, digits = getOption("digits"), ...) { title <- paste("Gaussian finite mixture model for classification") txt <- paste(rep("-", min(nchar(title), getOption("width"))), collapse = "") catwrap(txt) catwrap(title) catwrap(txt) cat("\n") catwrap(paste(x$type, "model summary:")) cat("\n") # tab <- data.frame("log-likelihood" = x$loglik, "n" = sum(x$n), "df" = x$df, "BIC" = x$bic, row.names = "", check.names = FALSE) print(tab, digits = digits) tab <- data.frame("n" = x$n, "%" = round(x$n/sum(x$n)*100,2), "Model" = x$modelName, "G" = x$G, check.names = FALSE, row.names = x$classes) tab <- as.matrix(tab) names(dimnames(tab)) <- c("Classes", "") print(tab, digits = digits, quote = FALSE, right = TRUE) if(!is.null(x$prior)) { cat("\nPrior: ") cat(x$prior$functionName, "(", paste(names(x$prior[-1]), x$prior[-1], sep = " = ", collapse = ", "), ")", sep = "") cat("\n") } if(x$printParameters) { cat("\nClass prior probabilities:\n") print(x$prop, digits = digits) for(i in seq(x$nclass)) { cat("\nClass = ", x$class[i], "\n", sep = "") par <- x$parameters[[i]] if(x$type == "MclustDA") { cat("\nMixing probabilities: ") cat(round(par$pro, digits = digits), "\n") } cat("\nMeans:\n") print(par$mean, digits = digits) cat("\nVariances:\n") if(x$d > 1) { for(g in seq(x$G[i])) { cat("[,,", g, "]\n", sep = "") print(par$variance[,,g], digits = digits) } } else print(par$variance, digits = digits) } } cat("\nTraining confusion matrix:\n") print(x$tab) cat("Classification error =", round(x$ce, min(digits,4)), "\n") cat("Brier score =", round(x$brier, min(digits,4)), "\n") if(!is.null(x$tab.newdata)) { cat("\nTest confusion matrix:\n") print(x$tab.newdata) if(!is.null(x$ce.newdata)) { cat("Classification error =", round(x$ce.newdata, min(digits,4)), "\n") cat("Brier score =", round(x$brier.newdata, min(digits,4)), "\n") } } invisible(x) } getParameters.MclustDA <- function(object) { # collect info models <- object$models nclass <- length(models) classes <- names(models) n <- sapply(1:nclass, function(i) models[[i]]$n) G <- sapply(1:nclass, function(i) models[[i]]$G) modelName <- sapply(1:nclass, function(i) models[[i]]$modelName) # prior <- attr(object, "prior") par <- vector(mode = "list", length = nclass) for(i in seq(nclass)) { par[[i]] <- models[[i]]$parameters if(is.null(par[[i]]$pro)) par$pro <- 1 if(par[[i]]$variance$d < 2) { sigma <- rep(par[[i]]$variance$sigma, models[[i]]$G)[1:models[[i]]$G] names(sigma) <- names(par[[i]]$mean) par[[i]]$variance$sigma <- sigma } par[[i]]$variance <- par[[i]]$variance$sigma } return(par) } logLik.MclustDA <- function (object, data, ...) { if(missing(data)) data <- object$data n <- object$n d <- object$d par <- getParameters.MclustDA(object) nclass <- length(par) fclass <- sapply(object$models, function(m) m$n)/n logfclass <- log(fclass) G <- sapply(par, function(x) length(x$pro)) if(object$type == "EDDA") { df <- d * nclass + nVarParams(object$models[[1]]$modelName, d = d, G = nclass) } else { df <- sum(sapply(object$models, function(mod) with(mod, (G - 1) + G * d + nVarParams(modelName, d = d, G = G)))) } # ll <- sapply(object$models, function(mod) # { do.call("dens", c(list(data = data, logarithm = FALSE), mod)) }) # l <- sum(log(apply(ll, 1, function(l) sum(fclass*l)))) ll <- sapply(object$models, function(mod) { do.call("dens", c(list(data = data, logarithm = TRUE), mod)) }) l <- sum(apply(ll, 1, function(l) logsumexp(logfclass+l))) attr(l, "nobs") <- n attr(l, "df") <- df class(l) <- "logLik" return(l) } predict.MclustDA <- function(object, newdata, prop = object$prop, ...) { if(!inherits(object, "MclustDA")) stop("object not of class 'MclustDA'") models <- object$models nclass <- length(models) classNames <- if(is.null(object$class)) names(models) else levels(object$class) n <- sapply(1:nclass, function(i) models[[i]]$n) if(missing(newdata)) { newdata <- object$data } # if(object$d == 1) newdata <- as.vector(newdata) if(is.numeric(prop)) { if(any(prop < 0)) stop("'prop' must be nonnegative") if(length(prop) != nclass) stop("'prop' is of incorrect length") prop <- prop/sum(prop) } else { prop <- n/sum(n) } # class density computed on log scale densfun <- function(mod, data) { do.call("dens", c(list(data = data, logarithm = TRUE), mod)) } # z <- matrix(as.double(NA), nrow = NROW(newdata), ncol = nclass) for(j in 1:nclass) z[,j] <- densfun(models[[j]], data = newdata) z <- sweep(z, MARGIN = 2, FUN = "+", STATS = log(prop)) z <- sweep(z, MARGIN = 1, FUN = "-", STATS = apply(z, 1, logsumexp)) z <- exp(z) colnames(z) <- classNames cl <- apply(z, 1, which.max) class <- factor(classNames[cl], levels = classNames) # out <- list(classification = class, z = z) return(out) } plot.MclustDA <- function(x, what = c("scatterplot", "classification", "train&test", "error"), newdata, newclass, dimens = NULL, symbols, colors, main = NULL, ...) { object <- x # Argh. Really want to use object anyway if(!inherits(object, "MclustDA")) stop("object not of class 'MclustDA'") data <- object$data if(object$d > 1) dataNames <- colnames(data) else dataNames <- deparse(object$call$data) n <- nrow(data) p <- ncol(data) dimens <- if(is.null(dimens)) seq(p) else dimens[dimens <= p] d <- length(dimens) if(d == 0) { warning("dimens larger than data dimensionality...") return(invisible()) } if(missing(newdata)) { newdata <- matrix(as.double(NA), 0, p) } else { newdata <- as.matrix(newdata) } if(ncol(newdata) != p) stop("incompatible newdata dimensionality") if(missing(newclass)) { newclass <- vector(length = 0) } else { if(nrow(newdata) != length(newclass)) stop("incompatible newdata and newclass") } if(object$d > 1) newdataNames <- colnames(newdata) else newdataNames <- deparse(match.call()$newdata) what <- match.arg(what, several.ok = TRUE) models <- object$models M <- length(models) if(missing(dimens)) dimens <- seq_len(p) trainClass <- object$class nclass <- length(unique(trainClass)) Data <- rbind(data, newdata) predClass <- predict(object, Data)$classification if(missing(symbols)) { if(M <= length(mclust.options("classPlotSymbols"))) { symbols <- mclust.options("classPlotSymbols") } else if(M <= 26) { symbols <- LETTERS } } if(length(symbols) == 1) symbols <- rep(symbols,M) if(length(symbols) < M & !any(what == "train&test")) { warning("more symbols needed to show classification") symbols <- rep(16, M) } if(missing(colors)) { colors <- mclust.options("classPlotColors") } if(length(colors) == 1) colors <- rep(colors,M) if(length(colors) < M & !any(what == "train&test")) { warning("more colors needed to show classification") colors <- rep("black", M) } oldpar <- par(no.readonly = TRUE) plot.MclustDA.scatterplot <- function(...) { if(d == 1) { mclust1Dplot(data = if(nrow(newdata) == 0) data[,dimens[1],drop=FALSE] else newdata[,dimens[1],drop=FALSE], what = "classification", classification = if(nrow(newdata) == 0) trainClass else newclass, xlab = if(nrow(newdata) == 0) dataNames[dimens] else newdataNames[dimens], ylab = "Classes", main = NULL, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) if(nrow(newdata) == 0) "Training data" else "Test data" else NULL, cex.main = oldpar$cex.lab) } scatellipses <- function(data, dimens, nclass, symbols, colors, ...) { m <- lapply(models, function(m) { m$parameters$mean <- array(m$parameters$mean[dimens,], c(2,m$G)) m$parameters$variance$sigma <- array(m$parameters$variance$sigma[dimens,dimens,], c(2,2,m$G)) m }) plot(data[,dimens], type = "n", ...) for(l in 1:nclass) { I <- m[[l]]$observations points(data[I,dimens[1]], data[I,dimens[2]], pch = symbols[l], col = colors[l]) for(g in 1:(m[[l]]$G)) { mvn2plot(mu = m[[l]]$parameters$mean[,g], sigma = m[[l]]$parameters$variance$sigma[,,g], k = 15, fillEllipse = mclust.options("fillEllipses"), col = if(mclust.options("fillEllipses")) colors[l] else rep("grey30",3)) } } } if(d == 2) { scatellipses(if(nrow(newdata) == 0) data else newdata, dimens = dimens[1:2], nclass = nclass, symbols = symbols, colors = colors, xlab = if(nrow(newdata) == 0) dataNames[dimens[1]] else newdataNames[dimens[1]], ylab = if(nrow(newdata) == 0) dataNames[dimens[2]] else newdataNames[dimens[2]], ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) if(nrow(newdata) == 0) "Training data" else "Test data" else NULL, cex.main = oldpar$cex.lab) } if(d > 2) { on.exit(par(oldpar)) par(mfrow = c(d, d), mar = rep(0.2/2,4), oma = rep(4,4)+c(0,0,1*(!is.null(main)),0)) for(i in seq(d)) { for(j in seq(d)) { if(i == j) { plot(if(nrow(newdata) == 0) data[,dimens[c(i,j)]] else newdata[,dimens[c(i,j)]], type="n", xlab = "", ylab = "", axes=FALSE) text(mean(par("usr")[1:2]), mean(par("usr")[3:4]), labels = if(nrow(newdata) == 0) dataNames[dimens][i] else newdataNames[dimens][i], cex = 1.5, adj = 0.5) box() } else { scatellipses(if(nrow(newdata) == 0) data else newdata, dimens = dimens[c(j,i)], nclass = nclass, symbols = symbols, colors = colors, xaxt = "n", yaxt = "n") } if(i == 1 && (!(j%%2))) axis(3) if(i == d && (j%%2)) axis(1) if(j == 1 && (!(i%%2))) axis(2) if(j == d && (i%%2)) axis(4) } } if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) if(nrow(newdata) == 0) "Training data" else "Test data" else NULL, cex.main = 1.2*oldpar$cex.main, outer = TRUE, line = 3) } } plot.MclustDA.classification <- function(...) { if(nrow(newdata) == 0 && d == 1) { mclust1Dplot(data = data[,dimens[1],drop=FALSE], what = "classification", classification = predClass[1:n], colors = colors[1:nclass], xlab = dataNames[dimens], main = FALSE, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Training data classification" else NULL, cex.main = oldpar$cex.lab) } if(nrow(newdata) == 0 && d == 2) { coordProj(data = data[,dimens], what = "classification", classification = predClass[1:n], main = FALSE, colors = colors[1:nclass], symbols = symbols[1:nclass], ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Training data classification" else NULL, cex.main = oldpar$cex.lab) } if(nrow(newdata) == 0 && d > 2) { clPairs(data[,dimens], classification = predClass[1:n], colors = colors[1:nclass], symbols = symbols[1:nclass], cex.labels = 1.5, main = if(!is.null(main)) if(is.character(main)) main else if(as.logical(main)) "Training data classification" else NULL, cex.main = oldpar$cex.lab) } if(nrow(newdata) > 0 && d == 1) { mclust1Dplot(data = newdata[,dimens], what = "classification", classification = predClass[-(1:n)], xlab = newdataNames[dimens], main = FALSE, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Test data classification" else NULL, cex.main = oldpar$cex.lab) } if(nrow(newdata) > 0 && d == 2) { coordProj(data = newdata[,dimens], what ="classification", classification = predClass[-(1:n)], colors = colors[1:nclass], symbols = symbols[1:nclass], main = FALSE, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Test data classification" else NULL, cex.main = oldpar$cex.lab) } if(nrow(newdata) > 0 & length(dimens) > 2) { on.exit(par(oldpar)) # par(oma = c(0,0,10,0)) clPairs(data = newdata[,dimens], classification = predClass[-(1:n)], colors = colors[1:nclass], symbols = symbols[1:nclass], cex.labels = 1.5, main = if(!is.null(main)) if(is.character(main)) main else if(as.logical(main)) "Test data classification" else NULL, cex.main = oldpar$cex.lab) } } plot.MclustDA.traintest <- function(...) { cl <- factor(rep(c("Train","Test"), times = c(nrow(data), nrow(newdata))), levels = c("Train", "Test")) if(d == 1) { mclust1Dplot(data = Data[,dimens], what = "classification", classification = cl, xlab = dataNames[dimens], ylab = "", colors = c("grey20", "grey80"), main = FALSE, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Training and Test data" else NULL, cex.main = oldpar$cex.lab) } if(d == 2) { coordProj(Data, dimens = dimens[1:2], what = "classification", classification = cl, cex = 0.8, symbols = c(19,3), colors = c("grey80", "grey20"), main = FALSE, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Training (o) and Test (+) data" else NULL, cex.main = oldpar$cex.lab) } if(d > 2) { clPairs(Data[,dimens], classification = cl, symbols = c(19,3), colors = c("grey80", "grey20"), main = if(!is.null(main)) if(is.character(main)) main else if(as.logical(main)) "Training (o) and Test (+) data" else NULL, cex.main = oldpar$cex.lab) } } plot.MclustDA.error <- function(...) { if(nrow(newdata) == 0 && d == 1) { mclust1Dplot(data = data[,dimens], what = "error", classification = predClass[1:n], truth = trainClass, xlab = dataNames[dimens], main = FALSE, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Training data error" else NULL, cex.main = oldpar$cex.lab) } if(nrow(newdata) == 0 && d == 2) { coordProj(data = data[,dimens[1:2]], what = "error", classification = predClass[1:n], truth = trainClass, main = FALSE, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Training data error" else NULL, cex.main = oldpar$cex.lab) } if(nrow(newdata) == 0 && d > 2) { on.exit(par(oldpar)) par(mfrow = c(d, d), mar = rep(0.2/2,4), oma = rep(4,4)+c(0,0,1*(!is.null(main)),0)) for(i in seq(d)) { for(j in seq(d)) { if(i == j) { plot(data[,dimens[c(i,j)]], type="n", xlab = "", ylab = "", axes=FALSE) text(mean(par("usr")[1:2]), mean(par("usr")[3:4]), dataNames[dimens][i], cex = 1.5, adj = 0.5) box() } else { coordProj(data = data[,dimens[c(j,i)]], what = "error", classification = predClass[1:n], truth = trainClass, main = FALSE, xaxt = "n", yaxt = "n") } if(i == 1 && (!(j%%2))) axis(3) if(i == d && (j%%2)) axis(1) if(j == 1 && (!(i%%2))) axis(2) if(j == d && (i%%2)) axis(4) } } if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Training data error" else NULL, cex.main = 1.2*oldpar$cex.main, outer = TRUE, line = 3) } if(nrow(newdata) > 0 && d == 1) { mclust1Dplot(data = newdata[,dimens], what = "error", classification = predClass[-(1:n)], truth = newclass, xlab = newdataNames[dimens], main = FALSE, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Test data error" else NULL, cex.main = oldpar$cex.lab) } if(nrow(newdata) > 0 && d == 2) { coordProj(data = newdata[,dimens[1:2]], what = "error", classification = predClass[-(1:n)], truth = newclass, main = FALSE, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Test data error" else NULL, cex.main = oldpar$cex.lab) } if(nrow(newdata) > 0 && d > 2) { on.exit(par(oldpar)) par(mfrow = c(d, d), mar = rep(0.2/2,4), oma = rep(4,4)+c(0,0,1*(!is.null(main)),0)) for(i in seq(d)) { for(j in seq(d)) { if(i == j) { plot(newdata[,dimens[c(i,j)]], type="n", xlab = "", ylab = "", axes=FALSE) text(mean(par("usr")[1:2]), mean(par("usr")[3:4]), newdataNames[dimens][i], cex = 1.5, adj = 0.5) box() } else { coordProj(data = newdata[,dimens[c(j,i)]], what = "error", classification = predClass[-(1:n)], truth = newclass, main = FALSE, xaxt = "n", yaxt = "n") } if(i == 1 && (!(j%%2))) axis(3) if(i == d && (j%%2)) axis(1) if(j == 1 && (!(i%%2))) axis(2) if(j == d && (i%%2)) axis(4) } } if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Test data error" else NULL, cex.main = 1.2*oldpar$cex.main, outer = TRUE, line = 3) } } if(interactive() & length(what) > 1) { title <- "Model-based discriminant analysis plots:" # present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) while(choice != 0) { if(what[choice] == "scatterplot") plot.MclustDA.scatterplot(...) if(what[choice] == "classification") plot.MclustDA.classification(...) if(what[choice] == "train&test") plot.MclustDA.traintest(...) if(what[choice] == "error") plot.MclustDA.error(...) # re-present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) } } else { if(any(what == "scatterplot")) plot.MclustDA.scatterplot(...) if(any(what == "classification")) plot.MclustDA.classification(...) if(any(what == "train&test")) plot.MclustDA.traintest(...) if(any(what == "error")) plot.MclustDA.error(...) } invisible() } # TODO: old version to be deleted at a certain point # cvMclustDA <- function(object, nfold = 10, # metric = c("error", "brier"), # prop = object$prop, # verbose = interactive(), ...) # { # # call <- object$call # nfold <- as.numeric(nfold) # metric <- match.arg(metric, # choices = eval(formals(cvMclustDA)$metric), # several.ok = FALSE) # # # data <- object$data # class <- as.factor(object$class) # n <- length(class) # G <- lapply(object$models, function(mod) mod$G) # modelName <- lapply(object$models, function(mod) mod$modelName) # # # ce <- function(pred, class) # { # 1 - sum(class == pred, na.rm = TRUE)/length(class) # } # # # folds <- if(nfold == n) lapply(1:n, function(x) x) # else balancedFolds(class, nfolds = nfold) # nfold <- length(folds) # folds.size <- sapply(folds, length) # # # cvmetric <- rep(NA, nfold) # cvclass <- factor(rep(NA, n), levels = levels(class)) # cvprob <- matrix(as.double(NA), nrow = n, ncol = nlevels(class), # dimnames = list(NULL, levels(class))) # # if(verbose) # { # cat("cross-validating ...\n") # flush.console() # pbar <- txtProgressBar(min = 0, max = nfold, style = 3) # on.exit(close(pbar)) # } # # for(i in seq(nfold)) # { # x <- data[-folds[[i]],,drop=FALSE] # y <- class[-folds[[i]]] # call$data <- x # call$class <- y # call$G <- G # call$modelNames <- modelName # call$verbose <- FALSE # mod <- eval(call, parent.frame()) # # # predTest <- predict(mod, data[folds[[i]],,drop=FALSE], prop = prop) # cvmetric[i] <- if(metric == "error") # ce(predTest$classification, class[folds[[i]]]) # else # BrierScore(predTest$z, class[folds[[i]]]) # cvclass[folds[[i]]] <- predTest$classification # cvprob[folds[[i]],] <- predTest$z # # # if(verbose) # setTxtProgressBar(pbar, i) # } # # # cv <- sum(cvmetric*folds.size)/sum(folds.size) # se <- sqrt(var(cvmetric)/nfold) # # # out <- list(classification = cvclass, # z = cvprob, # error = if(metric == "error") cv else NA, # brier = if(metric == "brier") cv else NA, # se = se) # return(out) # } cvMclustDA <- function(object, nfold = 10, prop = object$prop, verbose = interactive(), ...) { if(!is.null(match.call(expand.dots = TRUE)$metric)) warning("'metric' argument is deprecated! Ignored.") # call <- object$call nfold <- as.numeric(nfold) data <- object$data class <- as.factor(object$class) n <- length(class) G <- lapply(object$models, function(mod) mod$G) modelName <- lapply(object$models, function(mod) mod$modelName) # ce <- function(pred, class) { 1 - sum(class == pred, na.rm = TRUE)/length(class) } # folds <- if(nfold == n) lapply(1:n, function(x) x) else balancedFolds(class, nfolds = nfold) nfold <- length(folds) folds.size <- sapply(folds, length) # metric.cv <- matrix(as.double(NA), nrow = nfold, ncol = 2) class.cv <- factor(rep(NA, n), levels = levels(class)) prob.cv <- matrix(as.double(NA), nrow = n, ncol = nlevels(class), dimnames = list(NULL, levels(class))) if(verbose) { cat("cross-validating ...\n") flush.console() pbar <- txtProgressBar(min = 0, max = nfold, style = 3) on.exit(close(pbar)) } for(i in seq(nfold)) { x <- data[-folds[[i]],,drop=FALSE] y <- class[-folds[[i]]] call$data <- x call$class <- y call$G <- G call$modelNames <- modelName call$verbose <- FALSE mod <- eval(call, parent.frame()) # predTest <- predict(mod, data[folds[[i]],,drop=FALSE], prop = prop) metric.cv[i,1] <- ce(predTest$classification, class[folds[[i]]]) metric.cv[i,2] <- BrierScore(predTest$z, class[folds[[i]]]) class.cv[folds[[i]]] <- predTest$classification prob.cv[folds[[i]],] <- predTest$z # if(verbose) setTxtProgressBar(pbar, i) } # cv <- apply(metric.cv, 2, function(m) sum(m*folds.size)/sum(folds.size)) se <- apply(metric.cv, 2, function(m) sqrt(var(m)/nfold)) # out <- list(classification = class.cv, z = prob.cv, ce = cv[1], se.ce = se[1], brier = cv[2], se.brier = se[2]) return(out) } balancedFolds <- function(y, nfolds = min(min(table(y)), 10)) { # Create 'nfolds' balanced folds conditional on grouping variable 'y'. # Function useful in evaluating a classifier by balanced cross-validation. # Returns a list with 'nfolds' elements containing indexes of each fold. # # Based on balanced.folds() in package 'pamr' by T. Hastie, R. Tibshirani, # Balasubramanian Narasimhan, Gil Chu. totals <- table(y) fmax <- max(totals) nfolds <- min(nfolds, fmax) # ensure number of folds not larger than the max class size folds <- as.list(seq(nfolds)) yids <- split(seq(y), y) # get the ids in a list, split by class ## create a big matrix, with enough rows to get in all the folds per class bigmat <- matrix(as.double(NA), nrow = ceiling(fmax/nfolds) * nfolds, ncol = length(totals)) for(i in seq(totals)) { bigmat[seq(totals[i]), i] <- if (totals[i]==1) yids[[i]] else sample(yids[[i]]) } smallmat <- matrix(bigmat, nrow = nfolds) # reshape the matrix ## clever sort to mix up the NAs smallmat <- permuteRows(t(smallmat)) res <-vector("list", nfolds) for(j in 1:nfolds) { jj <- !is.na(smallmat[, j]) res[[j]] <- smallmat[jj, j] } return(res) } permuteRows <- function(x) { dd <- dim(x) n <- dd[1] p <- dd[2] mm <- runif(length(x)) + rep(seq(n) * 10, rep(p, n)) matrix(t(x)[order(mm)], n, p, byrow = TRUE) } # Deprecated functions cv1EMtrain <- function(data, labels, modelNames=NULL) { .Deprecated("cvMclustDA", package = "mclust") z <- unmap(as.numeric(labels)) G <- ncol(z) dimDataset <- dim(data) oneD <- is.null(dimDataset) || length(dimDataset[dimDataset > 1]) == 1 if (oneD || length(dimDataset) != 2) { if (is.null(modelNames)) modelNames <- c("E", "V") if (any(!match(modelNames, c("E", "V"), nomatch = 0))) stop("modelNames E or V for one-dimensional data") n <- length(data) cv <- matrix(1, nrow = n, ncol = length(modelNames)) dimnames(cv) <- list(NULL, modelNames) for (m in modelNames) { for (i in 1:n) { mStep <- mstep(modelName = m, data = data[-i], z = z[-i,], warn = FALSE) eStep <- do.call("estep", c(mStep, list(data = data[i], warn = FALSE))) if (is.null(attr(eStep, "warn"))) { k <- (1:G)[eStep$z == max(eStep$z)] l <- (1:G)[z[i,] == max(z[i,])] cv[i, m] <- as.numeric(!any(k == l)) } } } } else { if (is.null(modelNames)) modelNames <- mclust.options("emModelNames") n <- nrow(data) cv <- matrix(1, nrow = n, ncol = length(modelNames)) dimnames(cv) <- list(NULL, modelNames) for (m in modelNames) { for (i in 1:n) { mStep <- mstep(modelName = m, data = data[-i,], z = z[-i,], warn = FALSE) eStep <- do.call("estep", c(mStep, list(data = data[i, , drop = FALSE], warn = FALSE))) if (is.null(attr(eStep, "warn"))) { k <- (1:G)[eStep$z == max(eStep$z)] l <- (1:G)[z[i,] == max(z[i,])] cv[i, m] <- as.numeric(!any(k == l)) } } } } errorRate <- apply(cv, 2, sum) errorRate/n } bicEMtrain <- function(data, labels, modelNames=NULL) { .Deprecated("MclustDA", package = "mclust") z <- unmap(as.numeric(labels)) G <- ncol(z) dimData <- dim(data) oneD <- is.null(dimData) || length(dimData[dimData > 1]) == 1 if (oneD || length(dimData) != 2) { if (is.null(modelNames)) modelNames <- c("E", "V") if (any(!match(modelNames, c("E", "V"), nomatch = 0))) stop("modelNames E or V for one-dimensional data") } else { if (is.null(modelNames)) modelNames <- mclust.options("emModelNames") } BIC <- rep(NA, length(modelNames)) names(BIC) <- modelNames for (m in modelNames) { mStep <- mstep(modelName = m, data = data, z = z, warn = FALSE) eStep <- do.call("estep", c(mStep, list(data=data, warn=FALSE))) if (is.null(attr(eStep, "warn"))) BIC[m] <- do.call("bic", eStep) } BIC } cv.MclustDA <- function(...) { .Deprecated("cvMclustDA", package = "mclust") cvMclustDA(...) } # "[.mclustDAtest" <- function (x, i, j, drop = FALSE) # { # clx <- oldClass(x) # oldClass(x) <- NULL # NextMethod("[") # } classPriorProbs <- function(object, newdata = object$data, itmax = 1e3, eps = sqrt(.Machine$double.eps)) { if(!inherits(object, "MclustDA")) stop("object not of class 'MclustDA'") z <- predict(object, newdata = newdata)$z prop <- object$prop p <- colMeans(z) p0 <- p+1 it <- 0 # while(max(abs(p-p0)/abs(p)) > eps & it < itmax) while(any(abs(p-p0) > eps*(1+abs(p))) & it < itmax) { it <- it+1 p0 <- p # z_upd <- t(apply(z, 1, function(z) { z <- z*p/prop; z/sum(z) })) z_upd <- sweep(z, 2, FUN = "*", STATS = p/prop) z_upd <- sweep(z_upd, MARGIN = 1, FUN = "/", STATS = rowSums(z_upd)) p <- colMeans(z_upd) } return(p) } mclust/R/gmmhd.R0000644000175000017500000004377614053123510013346 0ustar nileshnilesh###################################################### ## ## ## Identifying Connected Components in Gaussian ## ## Finite Mixture Models for Clustering ## ## ## ## Author: Luca Scrucca ## ###################################################### gmmhd <- function(object, ngrid = min(round((log(nrow(data)))*10), nrow(data)), dr = list(d = 3, lambda = 1, cumEvalues = NULL, mindir = 2), classify = list(G = 1:5, modelNames = mclust.options("emModelNames")[-c(8,10)]), ...) { if(!inherits(object, "Mclust")) stop("first argument must be an object of class 'Mclust'") if(!requireNamespace("geometry", quietly = TRUE)) stop("Package 'geometry' is required. Please install it.") data <- object$data n <- nrow(data) if(ngrid > n) { warning("ngrid too large, set equal to n") n.grid <- n } mNames <- attr(object$BIC, "modelNames") if(is.null(dr$d)) dr$d <- 2 if(is.null(dr$lambda)) dr$lambda <- 1 if(is.null(classify$G)) classify$G <- 1:5 if(is.null(classify$modelNames)) classify$modelNames <- mNames classify$modelNames <- intersect(classify$modelNames, mNames) if(is.null(dr$mindir)) dr$mindir <- 2 if(ncol(data) >= dr$d) { # compute GMMDR directions DR <- MclustDR(object, lambda = dr$lambda) # subset selection of GMMDR directions evalues <- DR$evalues[seq(DR$numdir)] if(is.null(dr$cumEvalues)) { # if dr$cumEvalues not provided # perform suset selection of GMMDR directions DR <- MclustDRsubsel(DR, G = attr(object$BIC, "G"), modelNames = mNames, mindir = dr$mindir, verbose = FALSE) dims <- seq(DR$numdir) } else { # select the smallest subset with cumsum eigenvalues > dr$cumEvalues dims <- min(which(cumsum(evalues/sum(evalues)) > dr$cumEvalues)) dims <- seq(min(dr$mindir, dims)) } # estimate the density from Mclust model on the selected directions x <- DR$dir[,dims,drop=FALSE] colnames(x) <- paste("GMMDR dir", 1:ncol(x), sep = "") mc <- object$call mc$data <- x mc$modelNames <- mNames mc$verbose <- FALSE obj <- eval(mc, parent.frame()) DR$parameters <- obj$parameters fdens <- dens(data = x, modelName = obj$modelName, parameters = obj$parameters) } else { x <- data DR <- NULL fdens <- dens(data = x, modelName = object$modelName, parameters = object$parameters) } p <- ncol(x) xscaled <- scale(x, colMeans(x), apply(x,2,sd)) # if to add vertices of convex envelope # xrange <- apply(x, 2, range) # xbound <- do.call("expand.grid", matrix2list(xrange)) # x <- rbind(as.matrix(x), as.matrix(xbound*1.1)) # fdens <- c(fdens, rep(0,nrow(xbound))) # uniform grid of proportions for which quantiles are calculated pn <- seq(0, 1, length = ngrid) qn <- as.numeric(quantile(fdens[1:n], 1-pn)) nc <- pc <- rep(0, length(qn)) con <- vector("list", length = length(qn)) # Delaunay triangulation matrix of dim (m x p+1), where each row provides a # set of indices to the points describing a simplex of dimension p mode(xscaled) <- "double" # delaunayn requires a real matrix DT <- suppressMessages(geometry::delaunayn(xscaled, options="QJ")) # plot(x); for(l in 1:nrow(DT)) polygon(x[DT[l,],], border = grey(.8)) on.exit(unlink("qhull_out.txt")) # Graph of neighborhood for each point NB <- vector(mode = "list", length = n) for(i in seq(n)) { NB[[i]] <- sort(unique(as.vector(DT[rowSums(DT==i)>0,]))) } for(i in seq(length(qn))) { c <- qn[i] Sc <- which(fdens[1:n] > c); names(Sc) <- NULL if(length(Sc) < 1) next() pc[i] <- length(Sc)/n # select neighborhoods of edges with density > c level nb <- NB[Sc] # select within neighborhoods those edges whose density > c level nb <- lapply(nb, function(nb) sort(intersect(nb, Sc))) nb <- nb[!duplicated(nb)] # table(sapply(nb,length)) # remove neighborhoods which do not share any facet, i.e. having # less than p edges/obs # nb <- nb[sapply(nb, length) >= p] # remove neighborhoods which are not simplices of dim (p+1) nb <- nb[sapply(nb, length) > p] # get connected components ConComp <- ConnectComp(nb) # sapply(ConComp,length); ConComp if(length(ConComp) < 1) next() nc[i] <- length(ConComp) con[[i]] <- ConComp # lapply(ConComp, sort) } # obj <- list(Mclust = object, MclustDA = NULL, MclustDR = DR, x = x, # i.e. the input data or GMMDR directions density = fdens[1:n], con = con, nc = structure(nc, names = format(pn, digit = 3)), pc = pc, pn = pn, qn = structure(qn, names = format(pn, digit = 3)), clusterCores = NULL, cluster = NULL, numClusters = NULL) class(obj) <- "gmmhd" # cluster cores obj$clusterCores <- gmmhdClusterCores(obj) # semi-supervised classification modClass <- gmmhdClassify(obj, G = classify$G, modelNames = classify$modelNames, verbose = FALSE) obj$MclustDA <- modClass$model obj$cluster <- modClass$cluster obj$numClusters <- length(tabulate(obj$cluster)) return(obj) } print.gmmhd <- function(x, digits = getOption("digits"), ...) { cat("\'", class(x)[1], "\' model object:\n", sep = "") cat(paste0(" Mclust initial model = (", x$Mclust$modelName, ",", x$Mclust$G, ")\n")) if(!is.null(x$MclustDR)) cat(paste0(" MclustDR projection = (", x$MclustDR$modelName, ",", x$MclustDR$G, ")\n")) cat(paste0(" GMMHD final number of clusters = ", x$numClusters, "\n")) invisible() } summary.gmmhd <- function(object, ...) { title <- paste("GMM with high-density connected components for clustering") out <- with(object, list(title = title, "Mclust" = list("G" = Mclust$G, "modelName" = Mclust$modelName), "MclustDR" = list("G" = MclustDR$G, "modelName" = MclustDR$modelName), "clusterCores" = table(clusterCores, useNA = "ifany", dnn = NULL), "cluster" = table(cluster, useNA = "ifany", dnn = NULL))) if(is.null(object$MclustDR)) out$MclustDR <- NULL class(out) <- "summary.gmmhd" return(out) } print.summary.gmmhd <- function(x, digits = getOption("digits"), ...) { cat(rep("-", nchar(x$title)),"\n",sep="") cat(x$title, "\n") cat(rep("-", nchar(x$title)),"\n",sep="") # cat("\nInitial model: Mclust (", x$Mclust$modelName, ",", x$Mclust$G, ")", "\n", sep = "") # if(!is.null(x$MclustDR)) cat("\nModel on projection subspace: (", x$MclustDR$modelName, ",", x$MclustDR$G, ")", "\n", sep = "") # cat("\nCluster cores:\n") print(x$clusterCores) # cat("\nFinal clustering:\n") print(x$cluster) # invisible() } plot.gmmhd <- function(x, what = c("mode", "cores", "clusters"), ...) { object <- x what <- match.arg(what, choices = eval(formals(plot.gmmhd)$what), several.ok = TRUE) if(interactive() & length(what) > 1) { title <- "GMM high-density connected components:" # present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) while(choice != 0) { if(what[choice] == "mode") plot.gmmhd.mode(object, ...) if(what[choice] == "cores") plot.gmmhd.cores(object, ...) if(what[choice] == "clusters") plot.gmmhd.clusters(object, ...) # re-present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) } } else { if(any(what == "mode")) plot.gmmhd.mode(object, ...) if(any(what == "cores")) plot.gmmhd.cores(object, ...) if(any(what == "clusters")) plot.gmmhd.clusters(object, ...) } invisible() } plot.gmmhd.mode <- function(object, ...) { plot(c(object$pc,1), c(object$nc,0), type = "S", xlab = "Proportion of observed data", ylab = "Mode function", yaxt = "n") axis(side = 2, at = seq(0, max(object$nc, na.rm = TRUE))) } plot.gmmhd.cores <- function(object, col = c("grey50", mclust.options("classPlotColors")), pch = c(1, mclust.options("classPlotSymbols")), ...) { x <- object$x p <- ncol(x) n <- nrow(x) clCores <- object$clusterCores numClusters <- object$numClusters colCores <- col[1] col <- col[-1] col <- col[clCores] col[is.na(col)] <- colCores pch <- unique(pch) pchCores <- pch[1] pch <- pch[-1] pch <- pch[clCores] pch[is.na(pch)] <- pchCores cex <- rep(par("cex"), length(pch)) cex[is.na(clCores)] <- par("cex")/2 if(p == 1) { plot(x, object$density, col = col, pch = pch, cex = cex, ylim = range(0,object$density), xlab = colnames(x)[1], ylab = "Density", ...) } else if(p == 2) { plot(x[,1:2,drop=FALSE], col = col, pch = pch, cex = cex, ...) } else if(p > 2) { pairs(x, col = col, pch = pch, cex = cex, gap = 0, ...) } invisible() } plot.gmmhd.clusters <- function(object, col = mclust.options("classPlotColors"), pch = mclust.options("classPlotSymbols"), ...) { x <- object$x p <- ncol(x) n <- nrow(x) cluster <- object$cluster numClusters <- object$numClusters col <- col[cluster] pch <- setdiff(pch,22)[cluster] if(p == 1) { plot(x, object$density, col = col, pch = pch, ylim = range(0,object$density), xlab = colnames(x)[1], ylab = "Density", ...) } else if(p == 2) { plot(x[,1:2,drop=FALSE], col = col, pch = pch, ...) } else if(p > 2) { pairs(x, col = col, pch = pch, cex = 0.8, gap = 0, ...) } invisible() } gmmhdClusterCores <- function(object, tails = FALSE, ...) { # Identify cluster cores as the first subset of connected components # corresponding to the largest local mode n <- nrow(object$x) nc <- object$nc pc <- object$pc conComp <- object$con # select the subset with largest number of modes ... i <- which(diff(c(nc,0)) < 0) # i <- i[which(nc[i] == max(nc[i]))] # no to consider only the highest mode # remove spurius local modes, i.e. those not identified by at least # two consecutive density level # LS:20150107 okmode <- which(nc[i] == nc[i-1])[1] # LS:20150107 i <- if(length(okmode) > 0) i[okmode] else length(nc) # plot(pc, nc); abline(v = pc[i]) # ... and consider multiplicity of modes # LS: 20150107 i <- which(nc == max(nc[i])) # cc <- conComp[i] clusterCores <- matrix(as.double(NA), n, length(i)) for(j in 1:ncol(clusterCores)) for(cl in 1:length(cc[[j]])) { clusterCores[cc[[j]][[cl]],j] <- cl } while(ncol(clusterCores) > 1) { ncl <- length(unique(na.omit(clusterCores[,2]))) tmp <- rep(NA, n) for(cl in 1:ncl) { l <- which(clusterCores[,2] == cl) if(all(is.na(clusterCores[l,1]))) { tmp[l] <- paste(clusterCores[l,2],"*",sep="") } else { if(length(unique(na.omit(clusterCores[l,1]))) > 1) tmp[l] <- clusterCores[l,1] else tmp[l] <- paste(clusterCores[l,2],"*",sep="") } } clusterCores[,2] <- unclass(as.factor(tmp)) clusterCores <- clusterCores[,-1,drop=FALSE] } clusterCores <- as.vector(clusterCores) return(clusterCores) # select the last subset with largest number of modes # i <- max(which(nc == max(nc))) # select the first subset with largest number of modes i <- which(diff(c(nc,0)) < 0) i <- i[which(nc[i] == max(nc[i]))[1]] # select the largest subset with the largest number of modes # i <- i[max(which(nc[i] == max(nc[i])))] conComp <- object$con[[i]] clusterCores <- rep(NA, n) for(cl in 1:length(conComp)) { clusterCores[conComp[[cl]]] <- cl } return(clusterCores) } gmmhdClassify <- function(object, G = 1:5, modelNames = mclust.options("emModelNames"), verbose = TRUE, ...) { if(!inherits(object, "gmmhd")) stop("object is not of class 'gmmhd'") x <- object$x n <- nrow(x) p <- ncol(x) if(p == 1) modelNames <- unique(substr(modelNames, 1, 1)) clusterCores <- object$clusterCores numClusters <- length(tabulate(clusterCores)) con <- object$con # classify unclustered obs based on training cluster cores isCore <- (!is.na(clusterCores)) logRatio <- function(p) { p <- pmax(pmin(p, 1-sqrt(.Machine$double.eps)),sqrt(.Machine$double.eps)) log(p)-log(1-p) } # select num. components G to guarantee at least minSize obs per class numCompClass <- function(class, G, minSize = 10) { classSize <- tabulate(class) Gin <- as.vector(G) maxG <- classSize %/% minSize maxG <- pmin(maxG, max(G)) G <- vector(length = length(maxG), mode = "list") for(k in 1:length(G)) { G[[k]] <- intersect(Gin, seq(maxG[k])) } return(G) } inc <- isCore cluster <- clusterCores while(sum(inc) < n) { mod <- MclustDA(data = x[inc,,drop=FALSE], class = as.character(cluster[inc]), G = numCompClass(cluster[inc], G), modelNames = modelNames, verbose = verbose) unallocated <- which(!inc) # remove those obs with density ~ 0 dens <- density.MclustDA(mod, newdata=x[unallocated,,drop=FALSE]) dens <- pmax(dens, .Machine$double.eps) i <- (dens/max(dens) > sqrt(.Machine$double.eps)) if(sum(i) > 0) unallocated <- unallocated[i] # pred <- predict(mod, newdata = x[unallocated,,drop=FALSE]) # questa versione puo' non allocare obs ai clusterCores piccoli # zmax <- apply(pred$z,1,max) # zclass <- apply(pred$z,1,which.max) # log.ratio <- logRatio(zmax) # alloc <- (log.ratio >= quantile(log.ratio, prob = sum(inc)/n)) # questa versione cerca di ctr per dim clusters e alloca alla classe # predicted iff logRatio is larger than sqrt(sum(inc)/n) quantile z <- pred$z zclass <- apply(z,1,which.max) alloc <- matrix(NA, nrow(z), ncol(z)) for(k in seq(ncol(z))) { log.ratio <- logRatio(z[,k]) alloc[,k] <- (log.ratio >= quantile(log.ratio, prob = sqrt(sum(inc)/n))) & (zclass == k) } alloc <- apply(alloc, 1, any) toclass <- unallocated[alloc] cluster[toclass] <- zclass[alloc] inc <- (!is.na(cluster)) } mod <- MclustDA(data = x, class = cluster, G = numCompClass(cluster[inc], G), modelNames = modelNames, verbose = verbose) cluster <- predict(mod, x)$classification out <- list(model = mod, clusterCores = clusterCores, cluster = cluster) return(out) } density.MclustDA <- function(object, newdata, prior, logarithm = FALSE, ...) { # Compute the density based on a MclustDA model # (later it may be included in the 'mclust' package) # or it can be obtained from predict.MclustDA if(!inherits(object, "MclustDA")) stop("object not of class 'MclustDA'") models <- object$models nclass <- length(models) n <- sapply(1:nclass, function(i) models[[i]]$n) if(missing(newdata)) { newdata <- object$data } if(object$d == 1) newdata <- as.vector(newdata) if(missing(prior)) { prior <- n/sum(n) } else { if(length(prior) != nclass) stop("wrong number of prior probabilities") if(any(prior < 0)) stop("prior must be nonnegative") } # compute on log scale for stability densfun <- function(mod, data) { do.call("dens", c(list(data = data, logarithm = TRUE), mod)) } # cden <- as.matrix(data.frame(lapply(models, densfun, data = newdata))) cden <- sweep(cden, 2, FUN = "+", STATS = log(prior)) maxlog <- apply(cden, 1, max) cden <- sweep(cden, 1, FUN = "-", STATS = maxlog) den <- log(apply(exp(cden), 1, sum)) + maxlog if(!logarithm) den <- exp(den) return(den) } # old version ConnectComp_old <- function(nb) { # Get connected components # Example: # nb <- list(c(1,2,3), c(2,3,4), c(9,10,11), c(9,11,12), c(1,6,5)) # if(length(nb) < 1 | !is.list(nb)) return(NULL) nb <- lapply(nb, function(x) as.integer(x)) n <- length(nb) u <- sort(unique(unlist(nb))) nu <- length(u) cnb <- cnb.old <- nb stable <- FALSE # merge the neighbors until the configuration is stable while(!stable) { i <- 0 while(i < length(cnb)) { i <- i + 1 j <- which(sapply(cnb, function(nbb) any(intersect(cnb[[i]], nbb)))) cnb[[i]] <- sort(unique(unlist(cnb[j]))) cnb[setdiff(j, i)] <- NULL } if(identical(cnb, cnb.old)) stable <- TRUE cnb.old <- cnb } return(cnb) } ConnectComp <- function(nb) { # Get connected components # Example: # nb <- list(c(1,2,3), c(2,3,4), c(9,10,11), c(9,11,12), c(1,6,5)) # ConnectComp(nb) if(length(nb) < 1 | !is.list(nb)) return(NULL) nb <- lapply(nb, function(x) as.integer(x)) n <- length(nb) u <- sort(unique(unlist(nb))) nu <- length(u) cnb <- cnb.old <- nb stable <- FALSE # merge the neighbors until the configuration is stable while(!stable) { i <- 0 while(i < length(cnb)) { i <- i + 1 j <- which(sapply(cnb, function(nbb) any(is.element(cnb[[i]], nbb)))) cnb[[i]] <- sort(unique(unlist(cnb[j]))) cnb[setdiff(j, i)] <- NULL } if(identical(cnb, cnb.old)) stable <- TRUE cnb.old <- cnb } return(cnb) } mclust/R/graphics.R0000644000175000017500000013661414156726213014061 0ustar nileshnileshmclust1Dplot <- function(data, parameters = NULL, z = NULL, classification = NULL, truth = NULL, uncertainty = NULL, what = c("classification", "density", "error", "uncertainty"), symbols = NULL, colors = NULL, ngrid = length(data), xlab = NULL, ylab = NULL, xlim = NULL, ylim = NULL, cex = 1, main = FALSE, ...) { p <- ncol(as.matrix(data)) if (p != 1) stop("for one-dimensional data only") data <- as.vector(data) n <- length(data) if(is.null(classification) && !is.null(z)) classification <- map(z) if(is.null(uncertainty) && !is.null(z)) uncertainty <- 1 - apply(z, 1, max) if (!is.null(parameters)) { mu <- parameters$mean L <- ncol(mu) sigmasq <- parameters$variance$sigmasq haveParams <- !is.null(mu) && !is.null(sigmasq) && !any(is.na(mu)) && !any(is.na(sigmasq)) } else haveParams <- FALSE if (is.null(xlim)) xlim <- range(data) if (haveParams) { G <- length(mu) if ((l <- length(sigmasq)) == 1) { sigmasq <- rep(sigmasq, G) } else if (l != G) { params <- FALSE warning("mu and sigma are incompatible") } } if (!is.null(truth)) { if (is.null(classification)) { classification <- truth truth <- NULL } else { if (length(unique(truth)) != length(unique(classification))) truth <- NULL else truth <- as.character(truth) } } if(!is.null(classification)) { classification <- as.character(classification) U <- sort(unique(classification)) L <- length(U) if(is.null(symbols)) { symbols <- rep("|", L) } else if(length(symbols) == 1) { symbols <- rep(symbols, L) } else if(length(symbols) < L) { warning("more symbols needed to show classification") symbols <- rep("|", L) } if(is.null(colors)) { colors <- mclust.options("classPlotColors")[1:L] } else if(length(colors) == 1) { colors <- rep(colors, L) } else if(length(colors) < L) { warning("more colors needed to show classification") colors <- rep("black", L) } } main <- if(is.null(main) || is.character(main)) FALSE else as.logical(main) what <- match.arg(what, choices = eval(formals(mclust1Dplot)$what)) bad <- what == "classification" && is.null(classification) bad <- bad || (what == "uncertainty" && is.null(uncertainty)) bad <- bad || (what == "error" && (is.null(classification) || is.null(truth))) if(bad) stop("insufficient input for specified plot") M <- L switch(what, "classification" = { plot(data, seq(from = 0, to = M, length = n), type = "n", xlab = if(is.null(xlab)) "" else xlab, ylab = if(is.null(ylab)) "Classification" else ylab, xlim = xlim, ylim = if(is.null(ylim)) grDevices::extendrange(r = c(0,M), f = 0.1) else ylim, yaxt = "n", main = "", ...) axis(side = 2, at = 0:M, labels = c("", sort(unique(classification)))) if(main) title("Classification") for(k in 1:L) { I <- classification == U[k] if(symbols[k] == "|") { vpoints(data[I], rep(0, length(data[I])), cex = cex) vpoints(data[I], rep(k, length(data[I])), col = colors[k], cex = cex) } else { points(data[I], rep(0, length(data[I])), pch = symbols[k], cex = cex) points(data[I], rep(k, length(data[I])), pch = symbols[k], col = colors[k], cex = cex) } } }, "error" = { ERRORS <- classError(classification, truth)$misclassified plot(data, seq(from = 0, to = M, length = n), type = "n", xlab = xlab, ylab = if(is.null(ylab)) "Class errors" else ylab, xlim = xlim, ylim = if(is.null(ylim)) grDevices::extendrange(r = c(0,M), f = 0.1) else ylim, yaxt = "n", ...) axis(side = 2, at = 0:M, labels = c("", unique(classification))) if(main) title("Classification error") good <- rep(TRUE, length(classification)) good[ERRORS] <- FALSE sym <- "|" for(k in 1:L) { K <- classification == U[k] I <- (K & good) if(any(I)) { if(FALSE) { sym <- if (L > 4) 1 else if (k == 4) 5 else k - 1 } l <- sum(as.numeric(I)) if(sym == "|") vpoints(data[I], rep(0, l), col = colors[k], cex = cex) else points(data[I], rep(0, l), pch = sym, col = colors[k], cex = cex) } I <- K & !good if(any(I)) { if(FALSE) { sym <- if (L > 5) 16 else k + 14 } l <- sum(as.numeric(I)) if(sym == "|") vpoints(data[I], rep(k, l), col = colors[k], cex = cex) else points(data[I], rep(k, l), pch = sym, col = colors[k], cex = cex) } } }, "uncertainty" = { u <- (uncertainty - min(uncertainty))/ (max(uncertainty) - min(uncertainty) + sqrt(.Machine$double.eps)) b <- bubble(u, cex = cex*c(0.3, 2), alpha = c(0.3, 1)) if(is.null(classification)) { classification <- rep(1, length(u)) U <- 1 } if(is.null(colors)) colors <- palette()[1] cl <- sapply(classification, function(cl) which(cl == U)) plot(data, uncertainty, type = "h", xlab = xlab, ylab = if(is.null(ylab)) "Uncertainty" else ylab, xlim = xlim, ylim = if(is.null(ylim)) c(0,1) else ylim, col = mapply(adjustcolor, col = colors[cl], alpha.f = b$alpha), ...) rug(data, lwd = 1, col = adjustcolor(par("fg"), alpha.f = 0.8)) if(main) title("Uncertainty") }, "density" = { if(is.null(parameters$pro) && parameters$variance$G != 1) stop("mixing proportions missing") x <- grid1(n = ngrid, range = xlim, edge = TRUE) plot(x, dens(data = x, modelName = "V", parameters = parameters), xlab = xlab, ylab = if(is.null(ylab)) "Density" else ylab, xlim = xlim, type = "l", main = "", ...) if(main) title("Density") }, { plot(data, rep(0, n), type = "n", xlab = "", ylab = "", xlim = xlim, main = "", ...) vpoints(data, rep(0, n), cex = cex) if(main) title("Point Plot") } ) invisible() } mclust2Dplot <- function(data, parameters = NULL, z = NULL, classification = NULL, truth = NULL, uncertainty = NULL, what = c("classification", "uncertainty", "error"), addEllipses = TRUE, fillEllipses = mclust.options("fillEllipses"), symbols = NULL, colors = NULL, xlim = NULL, ylim = NULL, xlab = NULL, ylab = NULL, scale = FALSE, cex = 1, PCH = ".", main = FALSE, swapAxes = FALSE, ...) { if(dim(data)[2] != 2) stop("data must be two dimensional") if(is.null(classification) && !is.null(z)) classification <- map(z) if(is.null(uncertainty) && !is.null(z)) uncertainty <- 1 - apply(z, 1, max) if(!is.null(parameters)) { mu <- parameters$mean L <- ncol(mu) sigma <- parameters$variance$sigma haveParams <- !is.null(mu) && !is.null(sigma) && !any(is.na(mu)) && !any(is.na(sigma)) } else haveParams <- FALSE main <- if(is.null(main) || is.character(main)) FALSE else as.logical(main) if(is.null(xlim)) xlim <- range(data[, 1]) if(is.null(ylim)) ylim <- range(data[, 2]) if(scale) { par(pty = "s") d <- diff(xlim) - diff(ylim) if(d > 0) { ylim <- c(ylim[1] - d/2, ylim[2] + d/2.) } else { xlim <- c(xlim[1] + d/2, xlim[2] - d/2) } } dnames <- dimnames(data)[[2]] if(is.null(xlab)) { xlab <- if(is.null(dnames)) "" else dnames[1] } if(is.null(ylab)) { ylab <- if(is.null(dnames)) "" else dnames[2] } if(haveParams) { G <- ncol(mu) dimpar <- dim(sigma) if(length(dimpar) != 3) { haveParams <- FALSE warning("covariance must be a 3D matrix") } if(G != dimpar[3]) { haveParams <- FALSE warning("means and variance parameters are incompatible") } mu <- array(mu, c(2, G)) sigma <- array(sigma, c(2, 2, G)) } if(swapAxes) { if(haveParams) { mu <- mu[2:1,] sigma <- sigma[2:1, 2:1,] } data <- data[, 2:1] } if(!is.null(truth)) { if(is.null(classification)) { classification <- truth truth <- NULL } else { if(length(unique(truth)) != length(unique(classification))) truth <- NULL else truth <- as.character(truth) } } if(charmatch("classification", what, nomatch = 0) && is.null(classification) && !is.null(z)) { classification <- map(z) } if(!is.null(classification)) { classification <- as.character(classification) U <- sort(unique(classification)) L <- length(U) noise <- (U[1] == "0") if(is.null(symbols)) { if(L <= length(mclust.options("classPlotSymbols"))) { symbols <- mclust.options("classPlotSymbols")[1:L] if(noise) { symbols <- c(16,symbols)[1:L] } } else if(L <= 9) { symbols <- as.character(1:9) } else if(L <= 26) { symbols <- LETTERS } } if(is.null(colors)) { if(L <= length(mclust.options("classPlotColors"))) { colors <- mclust.options("classPlotColors")[1:L] if(noise) { colors <- unique(c("black", colors))[1:L] } } } else if(length(colors) == 1) colors <- rep(colors, L) if(length(symbols) < L) { warning("more symbols needed to show classification ") symbols <- rep(16,L) } if(length(colors) < L) { warning("more colors needed to show classification ") colors <- rep("black",L) } } what <- match.arg(what, choices = eval(formals(mclust2Dplot)$what)) bad <- what == "classification" && is.null(classification) bad <- bad || (what == "uncertainty" && is.null(uncertainty)) bad <- bad || (what == "error" && (is.null(classification) || is.null(truth))) if(bad) stop("insufficient input for specified plot") switch(EXPR = what, "classification" = { plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, main = "", ...) if(main) title("Classification") for(k in 1:L) { I <- classification == U[k] points(data[I, 1], data[I, 2], pch = symbols[k], col = colors[k], cex = if(U[k] == "0") cex/2 else cex) } }, "error" = { ERRORS <- classError(classification, truth)$misclassified plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, main = "", ...) if(main) title("Classification Errors") CLASSES <- unique(as.character(truth)) symOpen <- c(2, 0, 1, 5) symFill <- c(17, 15, 16, 18) good <- rep(TRUE,length(classification)) good[ERRORS] <- FALSE if(L > 4) { points(data[good, 1], data[good, 2], pch = 1, col = colors, cex = cex) points(data[!good, 1], data[!good, 2], pch = 16, cex = cex) } else { for(k in 1:L) { K <- truth == CLASSES[k] points(data[K, 1], data[K, 2], pch = symOpen[k], col = colors[k], cex = cex) if(any(I <- (K & !good))) { points(data[I, 1], data[I, 2], pch = symFill[k], cex = cex) } } } }, "uncertainty" = { u <- (uncertainty - min(uncertainty))/ (max(uncertainty) - min(uncertainty) + sqrt(.Machine$double.eps)) b <- bubble(u, cex = cex*c(0.3, 2), alpha = c(0.3, 0.9)) cl <- sapply(classification, function(cl) which(cl == U)) plot(data[, 1], data[, 2], pch = 19, xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, main = "", cex = b$cex, col = mapply(adjustcolor, col = colors[cl], alpha.f = b$alpha), ...) if(main) title("Uncertainty") fillEllipses <- FALSE }, { plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, main = "", ...) if(main) title("Point Plot") points(data[, 1], data[, 2], pch = PCH, cex = cex) } ) if(haveParams && addEllipses) { ## plot ellipsoids for(g in 1:G) mvn2plot(mu = mu[,g], sigma = sigma[,,g], k = 15, fillEllipse = fillEllipses, col = if(fillEllipses) colors[g] else rep("grey30",3)) } invisible() } # old version mvn2plot <- function(mu, sigma, k = 15, alone = FALSE, col = rep("grey30",3), pch = 8, lty = c(1,2), lwd = c(1,1)) { p <- length(mu) if (p != 2) stop("only two-dimensional case is available") if (any(unique(dim(sigma)) != p)) stop("mu and sigma are incompatible") ev <- eigen(sigma, symmetric = TRUE) s <- sqrt(rev(sort(ev$values))) V <- t(ev$vectors[, rev(order(ev$values))]) theta <- (0:k) * (pi/(2 * k)) x <- s[1] * cos(theta) y <- s[2] * sin(theta) xy <- cbind(c(x, -x, -x, x), c(y, y, -y, -y)) xy <- xy %*% V xy <- sweep(xy, MARGIN = 2, STATS = mu, FUN = "+") if(alone) { xymin <- apply(xy, 2, FUN = "min") xymax <- apply(xy, 2, FUN = "max") r <- ceiling(max(xymax - xymin)/2) xymid <- (xymin + xymax)/2 plot(xy[, 1], xy[, 2], type = "n", xlab = "x", ylab = "y", xlim = c(-r, r) + xymid[1], ylim = c(-r, r) + xymid[2]) } l <- length(x) i <- 1:l for(k in 1:4) { lines(xy[i,], col = col[1], lty = lty[1], lwd = lwd[1]) i <- i + l } x <- s[1] y <- s[2] xy <- cbind(c(x, -x, 0, 0), c(0, 0, y, -y)) xy <- xy %*% V xy <- sweep(xy, MARGIN = 2, STATS = mu, FUN = "+") lines(xy[1:2,], col = col[2], lty = lty[2], lwd = lwd[2]) lines(xy[3:4,], col = col[2], lty = lty[2], lwd = lwd[2]) points(mu[1], mu[2], col = col[3], pch = pch) invisible() } mvn2plot <- function(mu, sigma, k = 15, alone = FALSE, fillEllipse = FALSE, alpha = 0.3, col = rep("grey30", 3), pch = 8, lty = c(1,2), lwd = c(1,1), ...) { p <- length(mu) if(p != 2) stop("only two-dimensional case is available") if(any(unique(dim(sigma)) != p)) stop("mu and sigma are incompatible") ev <- eigen(sigma, symmetric = TRUE) s <- sqrt(rev(sort(ev$values))) V <- t(ev$vectors[, rev(order(ev$values))]) theta <- (0:k) * (pi/(2 * k)) x <- s[1] * cos(theta) y <- s[2] * sin(theta) xy <- cbind(c(x, -x, -x, x), c(y, y, -y, -y)) xy <- xy %*% V xy <- sweep(xy, MARGIN = 2, STATS = mu, FUN = "+") # if(alone) { xymin <- apply(xy, 2, FUN = "min") xymax <- apply(xy, 2, FUN = "max") r <- ceiling(max(xymax - xymin)/2) xymid <- (xymin + xymax)/2 plot(xy[, 1], xy[, 2], type = "n", xlab = "x", ylab = "y", xlim = c(-r, r) + xymid[1], ylim = c(-r, r) + xymid[2]) } # draw ellipses if(fillEllipse) { col <- rep(col, 3) polygon(xy[chull(xy),], border = NA, col = adjustcolor(col[1], alpha.f = alpha)) } else { l <- length(x) i <- 1:l for(k in 1:4) { lines(xy[i,], col = col[1], lty = lty[1], lwd = lwd[1]) i <- i + l } } # draw principal axes and centroid x <- s[1] y <- s[2] xy <- cbind(c(x, -x, 0, 0), c(0, 0, y, -y)) xy <- xy %*% V xy <- sweep(xy, MARGIN = 2, STATS = mu, FUN = "+") lines(xy[1:2,], col = col[2], lty = lty[2], lwd = lwd[2]) lines(xy[3:4,], col = col[2], lty = lty[2], lwd = lwd[2]) points(mu[1], mu[2], col = col[3], pch = pch) # invisible() } clPairs <- function (data, classification, symbols = NULL, colors = NULL, cex = NULL, labels = dimnames(data)[[2]], cex.labels = 1.5, gap = 0.2, grid = FALSE, ...) { data <- as.matrix(data) n <- nrow(data) d <- ncol(data) if(missing(classification)) classification <- rep(1, n) if(!is.factor(classification)) classification <- as.factor(classification) l <- length(levels(classification)) if(length(classification) != n) stop("classification variable must have the same length as nrows of data!") if(is.null(symbols)) { if(l == 1) { symbols <- "." } if(l <= length(mclust.options("classPlotSymbols"))) { symbols <- mclust.options("classPlotSymbols") } else { if(l <= 9) { symbols <- as.character(1:9) } else if(l <= 26) { symbols <- LETTERS[1:l] } else symbols <- rep(16,l) } } if(length(symbols) == 1) symbols <- rep(symbols, l) if(length(symbols) < l) { symbols <- rep(16, l) warning("more symbols needed") } if(is.null(colors)) { if(l <= length(mclust.options("classPlotColors"))) colors <- mclust.options("classPlotColors")[1:l] } if(length(colors) == 1) colors <- rep(colors, l) if(length(colors) < l) { colors <- rep( "black", l) warning("more colors needed") } if(is.null(cex)) cex <- rep(1, l) grid <- isTRUE(as.logical(grid)) if(d > 2) { pairs(x = data, labels = labels, panel = function(...) { if(grid) grid() points(...) }, pch = symbols[classification], col = colors[classification], cex = cex[classification], gap = gap, cex.labels = cex.labels, ...) } else if(d == 2) { plot(data, cex = cex[classification], pch = symbols[classification], col = colors[classification], panel.first = if(grid) grid(), ...) } invisible(list(d = d, class = levels(classification), col = colors, pch = symbols[seq(l)], cex = cex)) } clPairsLegend <- function(x, y, class, col, pch, cex, box = TRUE, ...) { usr <- par("usr") if(box & all(usr == c(0,1,0,1))) { oldpar <- par(mar = rep(0.2, 4), no.readonly = TRUE) on.exit(par(oldpar)) box(which = "plot", lwd = 0.8) } if(!all(usr == c(0,1,0,1))) { x <- x*(usr[2]-usr[1])+usr[1] y <- y*(usr[4]-usr[3])+usr[3] } dots <- list(...) dots$x <- x dots$y <- y dots$legend <- class dots$text.width <- max(strwidth(dots$title, units = "user"), strwidth(dots$legend, units = "user")) dots$col <- if(missing(col)) 1 else col dots$text.col <- if(missing(col)) 1 else col dots$pch <- if(missing(pch)) 1 else pch dots$cex <- if(missing(cex)) 1 else cex dots$title.col <- par("fg") dots$title.adj <- 0.1 dots$xpd <- NA do.call("legend", dots) } coordProj <- function(data, dimens = c(1,2), parameters = NULL, z = NULL, classification = NULL, truth = NULL, uncertainty = NULL, what = c("classification", "error", "uncertainty"), addEllipses = TRUE, fillEllipses = mclust.options("fillEllipses"), symbols = NULL, colors = NULL, scale = FALSE, xlim = NULL, ylim = NULL, cex = 1, PCH = ".", main = FALSE, ...) { if(is.null(dimens)) dimens <- c(1, 2) if(is.null(classification) && !is.null(z)) classification <- map(z) if(is.null(uncertainty) && !is.null(z)) uncertainty <- 1 - apply(z, 1, max) if(!is.null(parameters)) { mu <- parameters$mean L <- ncol(mu) sigma <- parameters$variance$sigma haveParams <- !is.null(mu) && !is.null(sigma) && !any(is.na(mu)) && !any( is.na(sigma)) } else haveParams <- FALSE data <- data[, dimens, drop = FALSE] if(dim(data)[2] != 2) stop("need two dimensions") if(is.null(xlim)) xlim <- range(data[, 1]) if(is.null(ylim)) ylim <- range(data[, 2]) if(scale) { oldpar <- par(no.readonly = TRUE) on.exit(par(oldpar)) par(pty = "s") d <- diff(xlim) - diff(ylim) if(d > 0) { ylim <- c(ylim[1] - d/2, ylim[2] + d/2.) } else { xlim <- c(xlim[1] + d/2, xlim[2] - d/2) } } if(is.null(dnames <- dimnames(data)[[2]])) xlab <- ylab <- "" else { xlab <- dnames[1] ylab <- dnames[2] } main <- if(is.null(main) || is.character(main)) FALSE else as.logical(main) if(haveParams) { G <- ncol(mu) dimpar <- dim(sigma) if(length(dimpar) != 3) { haveParams <- FALSE warning("covariance must be a 3D matrix") } if(G != dimpar[3]) { haveParams <- FALSE warning("means and variance parameters are incompatible") } mu <- array(mu[dimens, ], c(2, G)) sigma <- array(sigma[dimens, dimens, ], c(2, 2, G)) } if(!is.null(truth)) { truth <- as.factor(truth) if(is.null(classification)) { classification <- truth truth <- NULL } } if(!is.null(classification)) { classification <- as.factor(classification) U <- levels(classification) L <- nlevels(classification) noise <- (U[1] == "0") if(is.null(symbols)) { if(L <= length(mclust.options("classPlotSymbols"))) { symbols <- mclust.options("classPlotSymbols")[1:L] if(noise) { symbols <- c(16,symbols)[1:L] } } else if(L <= 9) { symbols <- as.character(1:9) } else if(L <= 26) { symbols <- LETTERS } } else if(length(symbols) == 1) symbols <- rep(symbols, L) if(is.null(colors)) { if(L <= length(mclust.options("classPlotColors"))) { colors <- mclust.options("classPlotColors")[1:L] if(noise) { colors <- unique(c("black", colors))[1:L] } } } else if(length(colors) == 1) colors <- rep(colors, L) if(length(symbols) < L) { warning("more symbols needed to show classification ") symbols <- rep(16,L) } if(length(colors) < L) { warning("more colors needed to show classification ") colors <- rep("black",L) } } if(length(what) > 1) what <- what[1] choices <- c("classification", "error", "uncertainty") m <- charmatch(what, choices, nomatch = 0) if(m) { what <- choices[m] bad <- what == "classification" && is.null(classification) bad <- bad || (what == "uncertainty" && is.null(uncertainty)) bad <- bad || (what == "error" && (is.null(classification) || is.null( truth))) if(bad) warning("insufficient input for specified plot") badClass <- (what == "error" && (length(unique(classification)) != length( unique(truth)))) if(badClass && !bad) warning("classification and truth differ in number of groups") bad <- bad && badClass } else { bad <- !m warning("what improperly specified") } if(bad) what <- "bad" switch(EXPR = what, "classification" = { plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, main = "", ...) if(main) { TITLE <- paste(paste(dimens, collapse = ","), "Coordinate Projection showing Classification") title(main = TITLE) } for(k in 1:L) { I <- classification == U[k] points(data[I, 1], data[I, 2], pch = symbols[k], col = colors[k], cex = if(U[k] == "0") cex/3 else cex) } }, "error" = { ERRORS <- classError(classification, truth)$misclassified plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, main = "", ...) if(main) { TITLE <- paste(paste(dimens, collapse = ","), "Coordinate Projection showing Errors") title(main = TITLE) } CLASSES <- levels(truth) symOpen <- symb2open(mclust.options("classPlotSymbols")) symFill <- symb2fill(mclust.options("classPlotSymbols")) good <- rep(TRUE, length(classification)) good[ERRORS] <- FALSE if(L > length(symOpen)) { points(data[good, 1], data[good, 2], pch = 1, col = colors, cex = cex) points(data[!good, 1], data[!good, 2], pch = 16, cex = cex) } else { for(k in 1:L) { K <- truth == CLASSES[k] if(any(I <- (K & good))) { points(data[I, 1], data[I, 2], pch = symOpen[k], col = colors[k], cex = cex) } if(any(I <- (K & !good))) { points(data[I, 1], data[I, 2], cex = cex, pch = symFill[k], col = "black", bg = "black") } } } }, "uncertainty" = { u <- (uncertainty - min(uncertainty)) / (max(uncertainty) - min(uncertainty) + sqrt(.Machine$double.eps)) b <- bubble(u, cex = cex * c(0.3, 2), alpha = c(0.3, 0.9)) cl <- sapply(classification, function(cl) which(cl == U)) plot(data[, 1], data[, 2], pch = 19, main = "", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, cex = b$cex, col = mapply(adjustcolor, col = colors[cl], alpha.f = b$alpha), ...) if(main) { TITLE <- paste(paste(dimens, collapse = ","), "Coordinate Projection showing Uncertainty") title(main = TITLE) } fillEllipses <- FALSE }, { plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, main = "", ...) if(main) { TITLE <- paste(paste(dimens, collapse = ","), "Coordinate Projection") title(main = TITLE) } points(data[, 1], data[, 2], pch = PCH, cex = cex) } ) if(haveParams && addEllipses) { ## plot ellipsoids for(g in 1:G) mvn2plot(mu = mu[,g], sigma = sigma[,,g], k = 15, fillEllipse = fillEllipses, col = if(fillEllipses) colors[g] else rep("grey30",3)) } invisible() } symb2open <- function(x) { symb <- 0:18 open <- c(0:14,0,1,2,5) open[sapply(x, function(x) which(symb == x))] } symb2fill <- function(x) { symb <- 0:18 fill <- c(15:17, 3:4, 23, 25, 7:9, 20, 11:18) fill[sapply(x, function(x) which(symb == x))] } randProj <- function(data, seeds = NULL, parameters = NULL, z = NULL, classification = NULL, truth = NULL, uncertainty = NULL, what = c("classification", "error", "uncertainty"), quantiles = c(0.75, 0.95), addEllipses = TRUE, fillEllipses = mclust.options("fillEllipses"), symbols = NULL, colors = NULL, scale = FALSE, xlim = NULL, ylim = NULL, xlab = NULL, ylab = NULL, cex = 1, PCH = ".", main = FALSE, ...) { if(is.null(classification) && !is.null(z)) classification <- map(z) if(is.null(uncertainty) && !is.null(z)) uncertainty <- 1 - apply(z, 1, max) if(!is.null(parameters)) { mu <- parameters$mean L <- ncol(mu) sigma <- parameters$variance$sigma haveParams <- !is.null(mu) && !is.null(sigma) && !any(is.na(mu)) && !any(is.na(sigma)) } else haveParams <- FALSE d <- ncol(data) if(haveParams) { G <- ncol(mu) dimpar <- dim(sigma) if(length(dimpar) != 3) { haveParams <- FALSE warning("covariance must be a 3D matrix") } if(G != dimpar[3]) { haveParams <- FALSE warning("means and variance parameters are incompatible") } cho <- array(apply(sigma, 3, chol), c(d, d, G)) } if(!is.null(truth)) { truth <- as.factor(truth) if(is.null(classification)) { classification <- truth truth <- NULL } else { if(length(unique(truth)) != length(unique(classification))) truth <- NULL else truth <- as.character(truth) } } if(!is.null(classification)) { classification <- as.factor(classification) U <- levels(classification) L <- nlevels(classification) noise <- (U[1] == "0") if(is.null(symbols)) { if(L <= length(mclust.options("classPlotSymbols"))) { symbols <- mclust.options("classPlotSymbols")[1:L] if(noise) { symbols <- c(16,symbols)[1:L] } } else if(L <= 9) { symbols <- as.character(1:9) } else if(L <= 26) { symbols <- LETTERS } } else if(length(symbols) == 1) symbols <- rep(symbols, L) if(is.null(colors)) { if(L <= length(mclust.options("classPlotColors"))) { colors <- mclust.options("classPlotColors")[1:L] if(noise) colors <- unique(c("black", colors))[1:L] } } else if(length(colors) == 1) colors <- rep(colors, L) if(length(symbols) < L) { warning("more symbols needed to show classification ") symbols <- rep(16,L) } if (length(colors) < L) { warning("more colors needed to show classification ") colors <- rep("black",L) } } if(is.null(xlab)) xlab <- "randProj1" if(is.null(ylab)) ylab <- "randProj2" what <- match.arg(what, choices = eval(formals(randProj)$what)) bad <- what == "classification" && is.null(classification) bad <- bad || (what == "uncertainty" && is.null(uncertainty)) bad <- bad || (what == "error" && (is.null(classification) || is.null(truth))) if(bad) stop("insufficient input for specified plot") main <- if(is.null(main) || is.character(main)) FALSE else as.logical(main) nullXlim <- is.null(xlim) nullYlim <- is.null(ylim) if(scale || length(seeds) > 1) { oldpar <- par(no.readonly = TRUE) on.exit(par(oldpar)) if(scale) par(pty = "s") if(length(seeds) > 1) par(ask = TRUE) } # if not provided get a seed at random if(length(seeds) == 0) { seeds <- as.numeric(Sys.time()) seeds <- (seeds - floor(seeds))*1e8 } for(seed in seeds) { set.seed(seed) # B <- orth2(d) B <- randomOrthogonalMatrix(d, 2) dataProj <- as.matrix(data) %*% B if(dim(dataProj)[2] != 2) stop("need two dimensions") if(nullXlim) xlim <- range(dataProj[,1]) if(nullYlim) ylim <- range(dataProj[,2]) if(scale) { d <- diff(xlim) - diff(ylim) if(d > 0) { ylim <- c(ylim[1] - d/2, ylim[2] + d/2.) } else { xlim <- c(xlim[1] + d/2, xlim[2] - d/2) } } switch(what, "classification" = { plot(dataProj[,1:2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, ...) for(k in 1:L) { I <- classification == U[k] points(dataProj[I,1:2], pch = symbols[k], col = colors[k], cex = cex) } if(main) { TITLE <- paste("Random Projection showing Classification: seed = ", seed) title(TITLE) } }, "error" = { ERRORS <- classError(classification, truth)$misclassified plot(dataProj[, 1:2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, ...) if(main) { TITLE <- paste("Random Projection showing Errors: seed = ", seed) title(TITLE) } CLASSES <- unique(as.character(truth)) symOpen <- c(2, 0, 1, 5) symFill <- c(17, 15, 16, 18) good <- !ERRORS if(L > 4) { points(dataProj[good, 1:2], pch = 1, col = colors, cex = cex) points(dataProj[!good, 1:2], pch = 16, cex = cex) } else { for(k in 1:L) { K <- which(truth == CLASSES[k]) points(dataProj[K, 1:2], pch = symOpen[k], col = colors[k], cex = cex) if(any(I <- intersect(K, ERRORS))) points(dataProj[I,1:2], pch = symFill[k], cex = cex) } } }, "uncertainty" = { plot(dataProj[, 1:2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, main = "", ...) if(main) { TITLE <- paste("Random Projection showing Uncertainty: seed = ", seed) title(TITLE) } breaks <- quantile(uncertainty, probs = sort(quantiles)) I <- uncertainty <= breaks[1] points(dataProj[I, 1:2], pch = 16, col = "gray75", cex = 0.5 * cex) I <- uncertainty <= breaks[2] & !I points(dataProj[I, 1:2], pch = 16, col = "gray50", cex = 1 * cex) I <- uncertainty > breaks[2] & !I points(dataProj[I, 1:2], pch = 16, col = "black", cex = 1.5 * cex) fillEllipses <- FALSE }, { plot(dataProj[, 1:2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, ...) if(main) { TITLE <- paste("Random Projection: seed = ", seed) title(TITLE) } points(dataProj[, 1:2], pch = PCH, cex = cex) } ) muProj <- crossprod(B, mu) sigmaProj <- array(apply(cho, 3, function(R) crossprod(R %*% B)), c(2, 2, G)) if(haveParams && addEllipses) { ## plot ellipsoids for(g in 1:G) mvn2plot(mu = muProj[,g], sigma = sigmaProj[,,g], k = 15, fillEllipse = fillEllipses, col = if(fillEllipses) colors[g] else rep("grey30",3)) } } invisible(list(basis = B, data = dataProj, mu = muProj, sigma = sigmaProj)) } surfacePlot <- function(data, parameters, what = c("density", "uncertainty"), type = c("contour", "hdr", "image", "persp"), transformation = c("none", "log", "sqrt"), grid = 200, nlevels = 11, levels = NULL, prob = c(0.25, 0.5, 0.75), col = gray(0.5), col.palette = function(...) hcl.colors(..., "blues", rev = TRUE), hdr.palette = blue2grey.colors, xlim = NULL, ylim = NULL, xlab = NULL, ylab = NULL, main = FALSE, scale = FALSE, swapAxes = FALSE, verbose = FALSE, ...) { data <- as.matrix(data) if(dim(data)[2] != 2) stop("data must be two dimensional") if(any(type == "level")) type[type == "level"] <- "hdr" # TODO: to be removed at certain point type <- match.arg(type, choices = eval(formals(surfacePlot)$type)) what <- match.arg(what, choices = eval(formals(surfacePlot)$what)) transformation <- match.arg(transformation, choices = eval(formals(surfacePlot)$transformation)) # densNuncer <- function(modelName, data, parameters) { if(is.null(parameters$variance$cholsigma)) { parameters$variance$cholsigma <- parameters$variance$sigma G <- dim(parameters$variance$sigma)[3] for(k in 1:G) parameters$variance$cholsigma[,,k] <- chol(parameters$variance$sigma[,,k]) } cden <- cdensVVV(data = data, parameters = parameters, logarithm = TRUE) pro <- parameters$pro if(!is.null(parameters$Vinv)) pro <- pro[-length(pro)] z <- sweep(cden, MARGIN = 2, FUN = "+", STATS = log(pro)) logden <- apply(z, 1, logsumexp) z <- sweep(z, MARGIN = 1, FUN = "-", STATS = logden) z <- exp(z) data.frame(density = exp(logden), uncertainty = 1 - apply(z, 1, max)) } pro <- parameters$pro mu <- parameters$mean sigma <- parameters$variance$sigma haveParams <- (!is.null(mu) && !is.null(sigma) && !is.null(pro) && !any(is.na(mu)) && !any(is.na(sigma)) && !(any(is.na(pro)))) if(haveParams) { G <- ncol(mu) dimpar <- dim(sigma) if(length(dimpar) != 3) { haveParams <- FALSE warning("covariance must be a 3D matrix") } if(G != dimpar[3]) { haveParams <- FALSE warning("means and variance parameters are incompatible") } mu <- array(mu, c(2, G)) sigma <- array(sigma, c(2, 2, G)) } else stop("need parameters to compute density") if(swapAxes) { if(haveParams) { parameters$pro <- pro[2:1] parameters$mean <- mu[2:1,] parameters$variance$sigma <- sigma[2:1, 2:1,] } data <- data[, 2:1] } main <- if(is.null(main) || is.character(main)) FALSE else as.logical(main) if(is.null(xlim)) xlim <- range(data[, 1]) if(is.null(ylim)) ylim <- range(data[, 2]) if(scale) { par(pty = "s") d <- diff(xlim) - diff(ylim) if(d > 0) { ylim <- c(ylim[1] - d/2, ylim[2] + d/2) } else { xlim <- c(xlim[1] + d/2, xlim[2] - d/2) } } dnames <- dimnames(data)[[2]] if(is.null(xlab)) { xlab <- if(is.null(dnames)) "" else dnames[1] } if(is.null(ylab)) { ylab <- if(is.null(dnames)) "" else dnames[2] } if(length(grid) == 1) grid <- c(grid, grid) x <- grid1(n = grid[1], range = xlim, edge = TRUE) y <- grid1(n = grid[2], range = ylim, edge = TRUE) xy <- grid2(x, y) if(verbose) message("computing density and uncertainty over grid ...") Z <- densNuncer(modelName = "VVV", data = xy, parameters = parameters) lx <- length(x) ly <- length(y) # switch(what, "density" = { zz <- matrix(Z$density, lx, ly) title2 <- "Density" }, "uncertainty" = { zz <- matrix(Z$uncertainty, lx, ly) title2 <- "Uncertainty" }, stop("what improperly specified")) # switch(transformation, "none" = { title1 <- "" }, "log" = { zz <- log(zz) title1 <- "log" }, "sqrt" = { zz <- sqrt(zz) title1 <- "sqrt" }, stop("transformation improperly specified")) # switch(type, "contour" = { title3 <- "Contour" if(is.null(levels)) levels <- pretty(zz, nlevels) contour(x = x, y = y, z = zz, levels = levels, xlab = xlab, ylab = ylab, col = col, main = "", ...) }, "hdr" = { title3 <- "HDR level" z <- densNuncer(modelName = "VVV", data = data, parameters = parameters)$density levels <- c(sort(hdrlevels(z, prob)), 1.1*max(z)) plot(x, y, type = "n", xlab = xlab, ylab = ylab, ...) fargs <- formals(".filled.contour") dargs <- c(list(x = x, y = y, z = zz, levels = levels, col = hdr.palette(length(levels))), args) dargs <- dargs[names(dargs) %in% names(fargs)] fargs[names(dargs)] <- dargs do.call(".filled.contour", fargs) }, "image" = { title3 <- "Image" col <- col.palette(nlevels) if(length(col) == 1) { if(!is.null(levels)) nlevels <- length(levels) col <- mapply(adjustcolor, col = col, alpha.f = seq(0.1, 1, length = nlevels)) } image(x = x, y = y, z = zz, xlab = xlab, ylab = ylab, col = col, main = "", ...) }, "persp" = { title3 <- "Perspective" dots <- list(...) if(is.null(dots$theta)) dots$theta <- -30 if(is.null(dots$phi)) dots$phi <- 20 if(is.null(dots$expand)) dots$expand <- 0.6 p3d <- do.call("persp", c(list(x = x, y = y, z = zz, border = NA, xlab = xlab, ylab = ylab, col = adjustcolor(col, alpha.f = 0.5), zlab = "Density", main = ""), dots)) ii <- floor(seq(1, length(y), length.out = 2*nlevels)) for(i in ii[-c(1,length(ii))]) lines(trans3d(x, y[i], zz[,i], pmat = p3d)) ii <- floor(seq(1, length(x), length.out = 2*nlevels)) for(i in ii[-c(1,length(ii))]) lines(trans3d(x[i], y, zz[i,], pmat = p3d)) } ) if(main) { TITLE <- paste(c(title1, title2, title3, "Plot"), collapse = " ") title(TITLE) } invisible(list(x = x, y = y, z = zz)) } uncerPlot <- function (z, truth=NULL, ...) { oldpar <- par(no.readonly = TRUE) on.exit(par(oldpar)) par(pty = "m") uncer <- 1 - apply(z, 1, max) ord <- order(uncer) M <- max(uncer) plot(uncer[ord], type = "n", xaxt = "n", ylim = c(-(M/32), M), ylab = "uncertainty", xlab = "observations in order of increasing uncertainty") points(uncer[ord], pch = 15, cex = 0.5) lines(uncer[ord]) abline(h = c(0, 0), lty = 3) if (!is.null(truth)) { truth <- as.numeric(as.factor(truth)) n <- length(truth) result <- map(z) bad <- classError(result, truth)$misclassified if(length(bad)) { for(i in bad) { x <- (1:n)[ord == i] lines(c(x, x), c(-(0.5/32), uncer[i]), lty = 1) } } } invisible() } blue2grey.colors <- function(n) { # manually selected basecol <- c("#E6E6E6", "#bcc9d1", "#6c7f97", "#3e5264") # selected using colorspace::sequential_hcl(5, palette = "blues2") # basecol <- c("#023FA5", "#6A76B2", "#A1A6C8", "#CBCDD9", "#E2E2E2") palette <- grDevices::colorRampPalette(basecol, space = "Lab") palette(n) } bubble <- function(x, cex = c(0.2, 3), alpha = c(0.1, 1)) { x <- as.vector(x) cex <- cex[!is.na(cex)] alpha <- alpha[!is.na(alpha)] x <- (x - min(x))/(max(x) - min(x) + sqrt(.Machine$double.eps)) n <- length(x) r <- sqrt(x/pi) r <- (r - min(r, na.rm = TRUE))/ (max(r, na.rm = TRUE) - min(r, na.rm = TRUE) + sqrt(.Machine$double.eps)) cex <- r * diff(range(cex)) + min(cex) alpha <- x * diff(range(alpha)) + min(alpha) return(list(cex = cex, alpha = alpha)) } grid1 <- function(n, range = c(0, 1), edge = TRUE) { if(any(n < 0 | round(n) != n)) stop("n must be nonpositive and integer") G <- rep(0, n) if(edge) { G <- seq(from = min(range), to = max(range), by = abs(diff(range))/(n-1)) } else { lj <- abs(diff(range)) incr <- lj/(2 * n) G <- seq(from = min(range) + incr, to = max(range) - incr, by = 2 * incr) } return(G) } grid2 <- function(x, y) { lx <- length(x) ly <- length(y) xy <- matrix(0, nrow = lx * ly, ncol = 2) l <- 0 for(j in 1:ly) { for(i in 1:lx) { l <- l + 1 xy[l,] <- c(x[i], y[j]) } } return(xy) } vpoints <- function(x, y, col, cex = 1, ...) { xy <- xy.coords(x, y) symbols(xy$x, xy$y, add = TRUE, inches = 0.2*cex, fg = if(missing(col)) par("col") else col, rectangles = matrix(c(0,1), nrow = length(xy$x), ncol = 2, byrow = TRUE), ...) } # Discriminant coordinates / crimcoords ------------------------------- crimcoords <- function(data, classification, numdir = NULL, unbiased = FALSE, plot = TRUE, ...) { X <- as.matrix(data) n <- nrow(X) p <- ncol(X) classification <- as.vector(classification) stopifnot(length(classification) == n) Z <- unmap(classification) G <- ncol(Z) nk <- colSums(Z) # overall mean M <- matrix(apply(X,2,mean), n, p, byrow=TRUE) # within-group means Mk <- sweep(crossprod(Z, X), 1, FUN = "/", STATS = nk) ZMk <- Z %*% Mk # pooled within-groups covar W <- crossprod(X - ZMk) W <- if(unbiased) W/(n-G) else W/n # between-groups covar B <- crossprod(ZMk - M) B <- if(unbiased) B/(G-1) else B/G # manova identity: (n-1)*var(X) = # if(unbiased) (n-G)*W+(G-1)*B else n*W+G*B # generalized eigendecomposition of B with respect to W SVD <- eigen.decomp(B, W) l <- SVD$l; l <- (l+abs(l))/2 if(is.null(numdir)) numdir <- sum(l > sqrt(.Machine$double.eps)) numdir <- min(p, numdir) basis <- as.matrix(SVD$v)[,seq(numdir),drop=FALSE] dimnames(basis) <- list(colnames(X), paste("crimcoords", 1:numdir, sep="")) proj <- X %*% basis s <- sign(apply(proj,2,median)) proj <- sweep(proj, 2, FUN = "*", STATS = s) basis <- sweep(basis, 2, FUN = "*", STATS = s) obj <- list(means = Mk, B = B, W = W, evalues = l, basis = basis, projection = proj, classification = classification) class(obj) <- "crimcoords" if(plot) plot(obj) invisible(obj) } print.crimcoords <- function(x, digits = getOption("digits"), ...) { cat("\'", class(x)[1], "\' object:\n", sep = "") str(x, max.level = 1, give.attr = FALSE, strict.width = "wrap") invisible() } plot.crimcoords <- function(x, ...) { object <- x # Argh. Really want to use object anyway numdir <- ncol(object$projection) G <- length(unique(object$classification)) if(numdir >= 2) { clPairs(object$projection, object$classification, ...) } else { args <- list(...) cols <- if(!is.null(args$colors)) args$colors else if(!is.null(args$col)) args$col else mclust.options("classPlotColors") cols <- adjustcolor(cols, alpha.f = 0.5)[1:G] br <- pretty(object$projection, n = nclass.Sturges(object$projection)) x <- split(object$projection[,1], object$classification) hist(x[[1]], breaks = br, probability = TRUE, xlim = extendrange(br), main = NULL, xlab = colnames(object$projection), border = FALSE, col = cols[1]) rug(x[[1]], col = cols[1]) for(k in seq_len(G-1)+1) { lines(hist(x[[k]], breaks = br, plot = FALSE), freq = FALSE, border = FALSE, col = cols[k], ann = FALSE) rug(x[[k]], col = cols[k]) } box() } invisible() } mclust/R/mclustdr.R0000644000175000017500000011576014156711341014111 0ustar nileshnilesh###################################################### ## ## ## Dimension reduction for model-based ## ## clustering and classification ## ## ## ## Author: Luca Scrucca ## ###################################################### # GMMDR dimension reduction ----------------------------------------------- MclustDR <- function(object, lambda = 1, normalized = TRUE, Sigma, tol = sqrt(.Machine$double.eps)) { # Dimension reduction for model-based clustering and classification call <- match.call() if(!any(class(object) %in% c("Mclust", "MclustDA"))) stop("object must be of class 'Mclust' or 'MclustDA'") x <- data.matrix(object$data) n <- nrow(x) p <- ncol(x) lambda <- pmax(0, min(lambda, 1)) #----------------------------------------------------------------- # overall parameters mu <- colMeans(x) if(missing(Sigma)) Sigma <- var(x)*(n-1)/n # within-cluster parameters based on fitted mixture model if(inherits(object, "Mclust")) { type <- "Mclust" G <- object$G modelName <- object$modelName y <- object$classification cl2mc <- seq(G) class <- as.factor(y) par <- object$parameters f <- par$pro if(is.null(f)) f <- 1 if(!is.na(object$hypvol)) f <- f[-length(f)] # within-group means mu.G <- matrix(par$mean,p,G) # within-group covars if(p == 1) { Sigma.G <- array(par$variance$sigmasq, c(p,p,G)) } else { Sigma.G <- par$variance$sigma } } else if(inherits(object, "MclustDA")) { type <- object$type modelName <- sapply(object$models, function(m) m$modelName) class <- object$class class <- factor(class, levels = names(object$models)) y <- rep(NA, length(class)) for(i in 1:nlevels(class)) { y[class == levels(class)[i]] <- paste(levels(class)[i], object$models[[i]]$classification, sep =":") } y <- as.numeric(factor(y)) cl2mc <- rep(seq(length(object$models)), sapply(object$models, function(m) m$G)) m <- sapply(object$models, function(mod) mod$n) ncomp <- sapply(object$models, function(mod) mod$G) G <- sum(ncomp) f <- vector(length = G) mu.G <- matrix(as.double(NA), nrow = p, ncol = G) Sigma.G <- array(NA, dim = c(p,p,G)) for(i in 1:length(object$models)) { ii <- seq(c(0,cumsum(ncomp))[i]+1,c(0,cumsum(ncomp))[i+1]) par <- object$models[[i]]$parameters if(is.null(par$pro)) par$pro <- 1 f[ii] <- par$pro * m[i]/sum(m) # within-group means mu.G[,ii] <- par$mean # within-group covars if(p == 1) { Sigma.G[,,ii] <- array(par$variance$sigmasq, c(p,p,1)) } else { Sigma.G[,,ii] <- par$variance$sigma } } } #----------------------------------------------------------------- SVD <- svd(Sigma, nu = 0, nv = min(n,p)) pos <- which(SVD$d > max(tol*SVD$d[1], 0)) SVD$d <- SVD$d[pos] SVD$v <- SVD$v[,pos,drop=FALSE] inv.Sigma <- SVD$v %*% (1/SVD$d * t(SVD$v)) inv.sqrt.Sigma <- SVD$v %*% (1/sqrt(SVD$d) * t(SVD$v)) #----------------------------------------------------------------- # pooled within-group covariance S <- matrix(0, p, p) for(j in seq_len(G)) S <- S + f[j]*Sigma.G[,,j] #----------------------------------------------------------------- # kernel matrix M.I <- crossprod(t(sweep(mu.G, 1, FUN="-", STATS=mu))*sqrt(f)) M.II <- matrix(0, p, p) if(lambda < 1) { for(j in seq_len(G)) M.II <- M.II + f[j]*crossprod(inv.sqrt.Sigma%*%(Sigma.G[,,j]-S)) } # convex combination of M_I and M_II M <- 2*lambda*crossprod(inv.sqrt.Sigma %*% M.I) + 2*(1-lambda)*M.II # regularize the M_II # M <- M.I + lambda*M.II # M <- crossprod(inv.sqrt.Sigma %*% M.I) + # (1-lambda)*M.II + lambda/p * diag(p) # SVD <- eigen.decomp(M, inv.sqrt.Sigma, invsqrt = TRUE) l <- SVD$l; l <- (l+abs(l))/2 numdir <- min(p, sum(l > sqrt(.Machine$double.eps))) basis <- as.matrix(SVD$v)[,seq(numdir),drop=FALSE] sdx <- diag(Sigma) std.basis <- as.matrix(apply(basis, 2, function(x) x*sdx)) if(normalized) { basis <- as.matrix(apply(basis, 2, normalize)) std.basis <- as.matrix(apply(std.basis, 2, normalize)) } dimnames(basis) <- list(colnames(x), paste("Dir", 1:ncol(basis), sep="")) dimnames(std.basis) <- dimnames(basis) Z <- scale(x, scale = FALSE) %*% basis # out = list(call = call, type = type, x = x, Sigma = Sigma, classification = class, mixcomp = y, class2mixcomp = cl2mc, G = G, modelName = modelName, mu = mu.G, sigma = Sigma.G, pro = f, M = M, M.I = M.I, M.II = M.II, lambda = lambda, evalues = l, raw.evectors = as.matrix(SVD$v), basis = basis, std.basis = std.basis, numdir = numdir, dir = Z) class(out) = "MclustDR" return(out) } print.MclustDR <- function(x, digits = getOption("digits"), ...) { txt <- paste0("\'", class(x)[1], "\' model object: ") catwrap(txt) cat("\n") catwrap("\nAvailable components:\n") print(names(x)) # str(x, max.level = 1, give.attr = FALSE, strict.width = "wrap") invisible(x) } summary.MclustDR <- function(object, numdir, std = FALSE, ...) { if(missing(numdir)) numdir <- object$numdir dim <- seq(numdir) if(object$type == "Mclust") { n <- as.vector(table(object$classification)) G <- object$G } else { n <- as.vector(table(object$classification)) G <- as.vector(table(object$class2mixcomp)) } obj <- list(type = object$type, modelName = object$modelName, classes = levels(object$classification), n = n, G = G, basis = object$basis[,seq(dim),drop=FALSE], std = std, std.basis = object$std.basis[,seq(dim),drop=FALSE], evalues = object$evalues[seq(dim)], evalues.cumperc = with(object, { evalues <- evalues[seq(numdir)] cumsum(evalues)/sum(evalues)*100 }) ) class(obj) <- "summary.MclustDR" return(obj) } print.summary.MclustDR <- function(x, digits = max(5, getOption("digits") - 3), ...) { title <- paste("Dimension reduction for model-based clustering and classification") txt <- paste(rep("-", min(nchar(title), getOption("width"))), collapse = "") catwrap(txt) catwrap(title) catwrap(txt) if(x$type == "Mclust") { tab <- data.frame(n = x$n) rownames(tab) <- x$classes tab <- as.matrix(tab) names(dimnames(tab)) <- c("Clusters", "") cat("\n") catwrap(paste0("Mixture model type: ", x$type, " (", x$modelName, ", ", x$G, ")")) print(tab, quote = FALSE, right = TRUE) } else if(x$type == "MclustDA" | x$type == "EDDA") { tab <- data.frame(n = x$n, Model = x$modelName, G = x$G) rownames(tab) <- x$classes tab <- as.matrix(tab) names(dimnames(tab)) <- c("Classes", "") cat("\n") catwrap(paste("Mixture model type:", x$type)) print(tab, quote = FALSE, right = TRUE) } else stop("invalid model type") cat("\n") if(x$std) { catwrap("Standardized basis vectors using predictors scaled to have std.dev. equal to one:") print(x$std.basis, digits = digits) } else { catwrap("Estimated basis vectors:") print(x$basis, digits = digits) } cat("\n") evalues <- rbind("Eigenvalues" = x$evalues, "Cum. %" = x$evalues.cumperc) colnames(evalues) <- colnames(x$basis) print(evalues, digits=digits) invisible() } projpar.MclustDR <- function(object, dim, center = TRUE, raw = FALSE) { # Transform estimated parameters to projection subspace given by # 'dim' directions x <- object$x p <- ncol(x) n <- nrow(x) G <- object$G numdir <- object$numdir if(missing(dim)) dim <- seq(numdir) numdir <- length(dim) if(raw) V <- object$raw.evectors[,dim,drop=FALSE] else V <- object$basis[,dim,drop=FALSE] # mu <- t(object$mu) if(center) mu <- scale(mu, center = apply(x,2,mean), scale = FALSE) Mu <- mu %*% V # sigma <- object$sigma cho <- array(apply(sigma, 3, chol), c(p, p, G)) Sigma <- array(apply(cho, 3, function(R) crossprod(R %*% V)), c(numdir, numdir, G)) # return(list(mean = Mu, variance = Sigma)) } predict.MclustDR <- function(object, dim = 1:object$numdir, newdata, eval.points, ...) { dim <- dim[dim <= object$numdir] if(missing(newdata) & missing(eval.points)) { dir <- object$dir[,dim,drop=FALSE] } else if(!missing(newdata)) { newdata <- as.matrix(newdata) newdata <- scale(newdata, center = colMeans(object$x), scale = FALSE) dir <- newdata %*% object$basis[,dim,drop=FALSE] } else if(!missing(eval.points)) { dir <- as.matrix(eval.points) } n <- nrow(dir) G <- object$G # num. components nclass <- nlevels(object$classification) # num. classes par <- projpar.MclustDR(object, dim) Mu <- par$mean Sigma <- par$variance # old version # cden <- array(NA, c(n, G)) # for(j in 1:G) # { cden[,j] <- dmvnorm(dir, Mu[j,], Sigma[,,j], log = FALSE) } # z <- sweep(cden, 2, FUN = "*", STATS = object$pro) # den <- apply(z, 1, sum) # z <- sweep(z, 1, FUN = "/", STATS = den) # new version: more efficient and accurate z <- array(NA, c(n, G)) for(j in 1:G) { z[,j] <- dmvnorm(dir, Mu[j,], Sigma[,,j], log = TRUE) } z <- sweep(z, 2, FUN = "+", STATS = log(object$pro)) logden <- apply(z, 1, logsumexp) z <- sweep(z, 1, FUN = "-", STATS = logden) z <- exp(z) # zz <- matrix(0, n, nclass) for(j in seq(nclass)) { zz[,j] <- rowSums(z[,object$class2mixcomp == j,drop=FALSE]) } z <- zz; rm(zz) class <- factor(apply(z,1,which.max), levels = 1:nclass, labels = levels(object$classification)) out <- list(dir = dir, density = exp(logden), z = z, uncertainty = 1 - apply(z,1,max), classification = class) return(out) } predict2D.MclustDR <- function(object, dim = 1:2, ngrid = 100, xlim, ylim) { dim <- dim[1:2] dir <- object$dir[,dim,drop=FALSE] G <- object$G par <- projpar.MclustDR(object, dim) Mu <- par$mean Sigma <- par$variance if(missing(xlim)) xlim <- range(dir[,1]) # +c(-1,1)*0.05*diff(range(x))) if(missing(ylim)) ylim <- range(dir[,2]) # +c(-1,1)*0.05*diff(range(x))) xygrid <- cbind(seq(xlim[1], xlim[2], length = ngrid), seq(ylim[1], ylim[2], length = ngrid)) grid <- expand.grid(xygrid[,1], xygrid[,2]) pred <- predict.MclustDR(object, dim = dim, eval.points = grid) out <- list(x = xygrid[,1], y = xygrid[,2], density = matrix(pred$density, ngrid, ngrid), z = array(pred$z, c(ngrid, ngrid, ncol(pred$z))), uncertainty = matrix(pred$uncertainty, ngrid, ngrid), classification = matrix(pred$classification, ngrid, ngrid)) return(out) } plot.MclustDR <- function(x, dimens, what = c("scatterplot", "pairs", "contour", "classification", "boundaries", "density", "evalues"), symbols, colors, col.contour = gray(0.7), col.sep = grey(0.4), ngrid = 200, nlevels = 5, asp = NULL, ...) { object <- x x <- object$x p <- ncol(x) n <- nrow(x) G <- object$G y <- object$mixcomp class <- as.numeric(object$classification) nclass <- length(table(class)) dir <- object$dir numdir <- object$numdir dimens <- if(missing(dimens)) seq(numdir) else intersect(as.numeric(dimens), seq(numdir)) if(length(dimens) == 0) stop("invalid 'dimens' value(s) provided") if(missing(symbols)) { if(G <= length(mclust.options("classPlotSymbols"))) { symbols <- mclust.options("classPlotSymbols") } else if(G <= 26) { symbols <- LETTERS } } if(length(symbols) == 1) symbols <- rep(symbols,nclass) if(length(symbols) < nclass) { warning("more symbols needed to show classification") symbols <- rep(16, nclass) } if(missing(colors)) { colors <- mclust.options("classPlotColors") } if(length(colors) == 1) colors <- rep(colors,nclass) if(length(colors) < nclass) { warning("more colors needed to show mixture components") colors <- rep("black", nclass) } #################################################################### what <- match.arg(what, several.ok = TRUE) oldpar <- par(no.readonly = TRUE) # on.exit(par(oldpar)) if(any(i <- (what == "pairs")) & (length(dimens) == 2)) { what[i] <- "scatterplot" } if(length(dimens) == 1) { what[!(what == "classification" | what == "density" | what == "evalues")] <- "classification" } what <- unique(what) plot.MclustDR.scatterplot <- function(...) { dir <- dir[,dimens,drop=FALSE] plot(dir, col = colors[class], pch = symbols[class], xlab = colnames(dir)[1], ylab = colnames(dir)[2], asp = asp, ...) } plot.MclustDR.pairs <- function(...) { dir <- dir[,dimens,drop=FALSE] pairs(dir, col = colors[class], pch = symbols[class], gap = 0.2, asp = asp, ...) } plot.MclustDR.density <- function(...) { dimens <- dimens[1] dir <- object$dir[,dimens,drop=FALSE] par <- projpar.MclustDR(object, dimens) Mu <- par$mean Sigma <- par$variance xgrid <- extendrange(dir, f = 0.1) xgrid <- seq(min(xgrid), max(xgrid), length=2*ngrid) dens <- matrix(as.double(NA), length(xgrid), G) for(j in 1:G) dens[,j] <- dnorm(xgrid, Mu[j,], sqrt(Sigma[,,j])) # if(object$type == "MclustDA") { d <- t(apply(dens, 1, function(x, p = object$pro) p*x)) dens <- matrix(as.double(NA), length(xgrid), nclass) tab <- table(y, class) for(i in 1:ncol(tab)) { j <- which(tab[,i] > 0) dens[,i] <- apply(d[,j,drop=FALSE],1,sum) } } plot(0, 0, type = "n", xlab = colnames(dir), ylab = "Density", xlim = range(xgrid), ylim = range(0, dens*1.1)) for(j in 1:ncol(dens)) lines(xgrid, dens[,j], col = colors[j]) rug(dir, lwd = 1) } plot.MclustDR.contour <- function(...) { dimens <- dimens[1:2] dir <- object$dir[,dimens,drop=FALSE] par <- projpar.MclustDR(object, dimens) Mu <- par$mean Sigma <- par$variance # draw contours for each class or cluster plot(dir, type = "n", asp = asp) for(k in seq(nclass)) { i <- which(object$class2mixcomp == k) parameters <- list(pro = object$pro[i]/sum(object$pro[i]), mean = t(par$mean[i,,drop=FALSE]), variance = list(G = length(i), d = 2, sigma = par$variance[,,i,drop=FALSE])) surfacePlot(dir, parameters, col = col.contour, nlevels = nlevels, grid = ngrid, xlim = par("usr")[1:2], ylim = par("usr")[3:4], asp = asp, add = TRUE) } points(dir, col = colors[class], pch = symbols[class], ...) } plot.MclustDR.classification.Mclust <- function(...) { if(object$numdir == 1) { dir <- object$dir[,1] boxplot(dir ~ class, horizontal = TRUE, col = adjustcolor(mclust.options("classPlotColors"), alpha.f = 0.3)[1:nclass], border = mclust.options("classPlotColors")[1:nclass], ylab = "Classification", xlab = "Dir1", xlim = c(0,nclass+0.5)) points(dir, rep(0,n), pch = "|") return() } # numdir >= 2 dimens <- dimens[1:2] dir <- object$dir[,dimens,drop=FALSE] pred <- predict2D.MclustDR(object, dimens, ngrid, xlim = extendrange(dir[,1], f = 0.05), ylim = extendrange(dir[,2], f = 0.05)) pred$classification <- apply(pred$z, 1:2, which.max) image(pred$x, pred$y, pred$classification, col = adjustcolor(colors[1:G], alpha.f = 0.1), xlab = colnames(dir)[1], ylab = colnames(dir)[2], xaxs = "i", yaxs = "i", asp = asp) points(dir, col = colors[class], pch = symbols[class], ...) } plot.MclustDR.classification.MclustDA <- function(...) { dimens <- dimens[1:2] dir <- object$dir[,dimens,drop=FALSE] pred <- predict2D.MclustDR(object, dimens, ngrid, xlim = extendrange(dir[,1], f = 0.05), ylim = extendrange(dir[,2], f = 0.05)) pred$classification <- apply(pred$z, 1:2, which.max) image(pred$x, pred$y, pred$classification, col = adjustcolor(colors[1:nclass], alpha.f = 0.1), xlab = colnames(dir)[1], ylab = colnames(dir)[2], xaxs = "i", yaxs = "i", asp = asp) points(dir, col = colors[class], pch = symbols[class], ...) } plot.MclustDR.boundaries.Mclust <- function(...) { dimens <- dimens[1:2] dir <- object$dir[,dimens,drop=FALSE] pred <- predict2D.MclustDR(object, dimens, ngrid, xlim = extendrange(dir[,1], f = 0.05), ylim = extendrange(dir[,2], f = 0.05)) pred$uncertainty[c(1,ngrid),] <- NA pred$uncertainty[,c(1,ngrid)] <- NA image(pred$x, pred$y, pred$uncertainty, col = rev(gray.colors(10, start = 0, end = 1)), breaks = seq(0, 1-1/nclass, length = 11), xlab = colnames(dir)[1], ylab = colnames(dir)[2], xaxs = "i", yaxs = "i", asp = asp) points(dir, col = colors[class], pch = symbols[class], ...) } plot.MclustDR.boundaries.MclustDA <- function(...) { dimens <- dimens[1:2] dir <- object$dir[,dimens,drop=FALSE] pred <- predict2D.MclustDR(object, dimens, ngrid, xlim = extendrange(dir[,1], f = 0.05), ylim = extendrange(dir[,2], f = 0.05)) pred$uncertainty[c(1,ngrid),] <- NA pred$uncertainty[,c(1,ngrid)] <- NA image(pred$x, pred$y, pred$uncertainty, col = rev(gray.colors(10, start = 0, end = 1)), breaks = seq(0, 1-1/nclass, length = 11), xlab = colnames(dir)[1], ylab = colnames(dir)[2], xaxs = "i", yaxs = "i", asp = asp) points(dir, col = colors[class], pch = symbols[class], ...) } plot.MclustDR.evalues <- function(...) { plotEvalues.MclustDR(object, numdir = max(dimens), plot = TRUE) } if(interactive() & length(what) > 1) { title <- "Dimension reduction for model-based clustering and classification plots:" # present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) while(choice != 0) { if(what[choice] == "scatterplot") plot.MclustDR.scatterplot(...) if(what[choice] == "pairs") plot.MclustDR.pairs(...) if(what[choice] == "contour") plot.MclustDR.contour(...) if(what[choice] == "classification" & object$type == "Mclust") plot.MclustDR.classification.Mclust(...) if(what[choice] == "classification" & (object$type == "EDDA" | object$type == "MclustDA")) plot.MclustDR.classification.MclustDA(...) if(what[choice] == "boundaries" & object$type == "Mclust") plot.MclustDR.boundaries.Mclust(...) if(what[choice] == "boundaries" & (object$type == "EDDA" | object$type == "MclustDA")) plot.MclustDR.boundaries.MclustDA(...) if(what[choice] == "density") plot.MclustDR.density(...) if(what[choice] == "evalues") plot.MclustDR.evalues(...) # re-present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) } } else { if(any(what == "scatterplot")) plot.MclustDR.scatterplot(...) if(any(what == "pairs")) plot.MclustDR.pairs(...) if(any(what == "contour")) plot.MclustDR.contour(...) if(any(what == "classification" & object$type == "Mclust")) plot.MclustDR.classification.Mclust(...) if(any(what == "classification" & (object$type == "EDDA" | object$type == "MclustDA"))) plot.MclustDR.classification.MclustDA(...) if(any(what == "boundaries" & object$type == "Mclust")) plot.MclustDR.boundaries.Mclust(...) if(any(what == "boundaries" & (object$type == "EDDA" | object$type == "MclustDA"))) plot.MclustDR.boundaries.MclustDA(...) if(any(what == "density")) plot.MclustDR.density(...) if(any(what == "evalues")) plot.MclustDR.evalues(...) } invisible() } plotEvalues.MclustDR <- function(x, numdir, plot = FALSE, legend = TRUE, ylim, ...) { object <- x G <- object$G f <- object$pro lambda <- object$lambda # dim <- if(missing(numdir)) seq(object$numdir) else seq(numdir) if(missing(numdir)) numdir <- object$numdir dim <- seq(numdir) d <- length(dim) par <- projpar.MclustDR(object, dim = dim, center = TRUE, raw = TRUE) mu <- par$mean Sigma.G <- par$variance # M1 <- t(mu) %*% diag(f) %*% mu l1 <- 2*lambda*diag(crossprod(M1)) # S <- matrix(0, d, d) for(j in seq(G)) S <- S + f[j]*Sigma.G[,,j] M2 <- matrix(0, d, d) for(j in 1:G) { C <- (Sigma.G[,,j]-S) M2 <- M2 + f[j] * tcrossprod(C) } l2 <- 2*(1-lambda)*diag(M2) # l <- object$evalues[dim] # if(plot) { if(missing(ylim)) ylim <- range(0, max(l)+diff(range(l))*0.05) plot(dim, l, type="b", lty = 1, pch = 16, cex = 1.5, xaxt = "n", ylim = ylim, xlab = "MclustDR directions", ylab = "Eigenvalues", panel.first = { abline(v = dim, col = "lightgray", lty = "dotted") abline(h = axTicks(2,par("yaxp")), col = "lightgray", lty = "dotted") } ) axis(1, at = dim, labels = dim) lines(dim, l1, type="b", lty = 2, pch = 22, cex = 1.5) lines(dim, l2, type="b", lty = 2, pch = 2, cex = 1.5) if(legend) { legend("topright", lty = c(1,2,2), pch = c(16,22,2), legend = c("Eigenvalues", "from means", "from vars"), bg = ifelse(par("bg")=="transparent", "white", par("bg")), inset = 0.01, pt.cex = 1.5) } } out <- list(dim = dim, evalues = l, mean.contrib = l1, var.contrib = l2) if(plot) invisible(out) else return(out) } # Auxiliary functions ----------------------------------------------------- # TODO: remove # mvdnorm <- function(x, mu, sigma, log = FALSE, tol = sqrt(.Machine$double.eps)) # { # if(is.vector(x)) # { x <- matrix(x, ncol = length(x)) } # else # { x <- as.matrix(x) } # SVD <- svd(sigma) # pos <- (SVD$d > max(tol*SVD$d[1], 0)) # in case of not full rank covar matrix # inv.sigma <- SVD$v[,pos,drop=FALSE] %*% (1/SVD$d[pos] * # t(SVD$u[,pos,drop=FALSE])) # z <- mahalanobis(x, center = mu, cov = inv.sigma, inverted = TRUE) # # logdet <- sum(log(eigen(sigma, symmetric = TRUE, only.values = TRUE)$values)) # logdet <- sum(log(SVD$d[pos])) # logdens <- -(ncol(x) * log(2 * pi) + logdet + z)/2 # if(log) return(logdens) # else return(exp(logdens)) # } ellipse <- function(c, M, r, npoints = 100) { # Returns the cartesian coordinates of points x on the ellipse # (x-c)'M(x-c) = r^2, # where x = x(theta) and theta varies from 0 to 2*pi radians in npoints steps. # local functions circle <- function(theta, r) r*c(cos(theta),sin(theta)) ellip <- function(theta, r, lambda) lambda*circle(theta, r) point <- function(theta) c+c(gamma %*% ellip(theta, r, lam)) # SVD <- svd(M) lam <- 1/sqrt(SVD$d) gamma <- SVD$v coord <- t(sapply(seq(0, 2*pi, length=npoints), function(th) point(th))) return(coord) } eigen.decomp <- function(A, B, invsqrt = FALSE) { # # Generalized eigenvalue decomposition of A with respect to B. # # A generalized eigenvalue problem AV = BLV is said to be symmetric positive # definite if A is symmetric and B is positive definite. V is the matrix of # generalized eigenvectors, and L is the diagonal matrix of generalized # eigenvalues (Stewart, 2001, pag. 229-230). # # Properties: # V'AV = L # V'BV = I # # The algorithm implemented is described in Stewart (2001, pag. 234) and used # by Li (2000). # # References: # Li, K.C., 2000. High dimensional data analysis via the SIR-PHD approach, # Stewart, G.W., 2001. Matrix Algorithms: vol II Eigensystems, SIAM. if(!invsqrt) { SVD <- svd(B, nu=0) # in case of not full rank covar matrix tol <- .Machine$double.eps pos <- which(SVD$d > max(tol*SVD$d[1], 0)) SVD$d <- SVD$d[pos] SVD$v <- SVD$v[,pos,drop=FALSE] # Computes inverse square root matrix such that: # t(inv.sqrt.B) %*% inv.sqrt.B = inv.sqrt.B %*% t(inv.sqrt.B) = solve(B) inv.sqrt.B <- SVD$v %*% (1/sqrt(SVD$d) * t(SVD$v)) } else { inv.sqrt.B <- B } # Compute B^(-1/2)' A B^(-1/2) = UDU' # evectors = B^(-1/2) U # evalues = D A <- t(inv.sqrt.B) %*% A %*% inv.sqrt.B SVD <- svd(A, nu=0) list(l = SVD$d, v = inv.sqrt.B %*% SVD$v) } # Subset selection of GMMDR/GMMDRC directions ----------------------------- MclustDRsubsel <- function(object, G = 1:9, modelNames = mclust.options("emModelNames"), ..., bic.stop = 0, bic.cutoff = 0, mindir = 1, verbose = interactive()) { # Subset selection for GMMDR directions based on bayes factors. # # object = a MclustDR object # G = a vector of cluster sizes for searching # modelNames = a vector of models for searching # ... = further arguments passed through Mclust/MclustDA # bic.stop = criterion to stop the search. If maximal BIC difference is # less than bic.stop the algorithm stops. # Two tipical values are: # 0 = stops when BIC difference becomes negative (default) # -Inf = stops when all directions have been selected # bic.cutoff = select simplest ``best'' model within bic.cutoff from the # maximum value achieved. Setting this to 0 (default) simply # select the model with the largest BIC difference. # mindir = the minimum number of diretions to be estimated # verbose = if 0 no trace info is shown; if 1 a trace of each step # of the search is printed; if 2 a detailed trace info is # is shown. if(class(object) != "MclustDR") stop("Not a 'MclustDR' object") hcUse <- mclust.options("hcUse") mclust.options("hcUse" = "VARS") on.exit(mclust.options("hcUse" = hcUse)) mc <- match.call(expand.dots = TRUE) mc[[1]] <- switch(object$type, "Mclust" = as.name("MclustDRsubsel_cluster"), "EDDA" = as.name("MclustDRsubsel_classif"), "MclustDA" = as.name("MclustDRsubsel_classif"), stop("Not allowed 'MclustDR' type!")) eval(mc, parent.frame()) } MclustDRsubsel_cluster <- function(object, G = 1:9, modelNames = mclust.options("emModelNames"), ..., bic.stop = 0, bic.cutoff = 0, mindir = 1, verbose = interactive()) { drmodel <- object mclustType <- drmodel$type lambda <- drmodel$lambda numdir <- drmodel$numdir numdir0 <- numdir+1 dir <- drmodel$dir[,seq(numdir),drop=FALSE] mindir <- max(1, as.numeric(mindir), na.rm = TRUE) verbose <- as.numeric(verbose) ncycle <- 0 while(numdir < numdir0) { ncycle <- ncycle+1 if(verbose > 0) cat("\nCycle", ncycle, "...\n") out <- MclustDRsubsel1cycle(drmodel, G, modelNames, bic.stop = bic.stop, bic.cutoff = bic.cutoff, verbose = if(verbose > 1) TRUE else FALSE) if(verbose > 0) { cat("\n"); print(out$tab) } mod <- do.call("Mclust", list(data = dir[,out$subset,drop=FALSE], G = G, modelNames = if(length(out$subset) > 1) modelNames else c("E", "V"), verbose = FALSE, ...)) numdir0 <- numdir drmodel0 <- MclustDR(mod, lambda = lambda) if(drmodel0$numdir < mindir) break drmodel <- drmodel0 numdir <- drmodel$numdir dir <- drmodel$dir[,seq(numdir),drop=FALSE] } # format object using original data obj <- drmodel obj$basisx <- MclustDRrecoverdir(obj, data = object$x, std = FALSE) obj$std.basisx <- MclustDRrecoverdir(obj, data = object$x, std = TRUE) class(obj) <- c("MclustDRsubsel", class(obj)) return(obj) } MclustDRsubsel1cycle <- function(object, G = 1:9, modelNames = mclust.options("emModelNames"), bic.stop = 0, bic.cutoff = 0, verbose = interactive()) { # Single cycle of subset selection for GMMDR directions based on bayes factors. if(class(object) != "MclustDR") stop("Not a 'MclustDR' object") d <- object$numdir dir <- object$dir[,seq(d),drop=FALSE] n <- nrow(dir) if(is.null(colnames(dir))) colnames(dir) = paste("[,", 1:d, "]", sep="") dir.names <- colnames(dir) BIC <- Model1 <- Model2 <- tab <- NULL; Model1$bic <- 0 bic.stop <- bic.stop + sqrt(.Machine$double.eps) bic.cutoff <- bic.cutoff + sqrt(.Machine$double.eps) inc <- NULL; excl <- seq(1,d) model1D <- if(any(grep("V", modelNames))) c("E", "V") else "E" # hskip <- paste(rep(" ",4),collapse="") if(verbose) cat("\n", hskip, "Start greedy search...\n", sep="") while(length(excl)>0) { if(verbose) { cat(hskip, rep("-",15), "\n", sep="") cat(paste(hskip, "Step", length(inc)+1, "\n")) } for(j in excl) { # Select simplest model with smallest num. of components # within bic.cutoff bic <- mclustBIC(dir[,sort(c(inc, j)),drop=FALSE], G = G, modelNames = if(length(inc)>0) modelNames else model1D, verbose = FALSE) bic.tab <- (as.matrix(max(bic, na.rm=TRUE) - bic) < bic.cutoff)*1 bestG <- which(rowSums(bic.tab, na.rm=TRUE) > 0)[1] bestmod <- which(bic.tab[bestG,,drop=FALSE] > 0)[1] out <- data.frame(Variable = dir.names[j], model = colnames(bic.tab)[bestmod], G = G[bestG], bic = c(bic[bestG,bestmod]), bic.diff = c(bic[bestG,bestmod] - Model1$bic - MclustDRBICreg(dir[,j], dir[,inc])) ) # Model2 <- rbind(Model2, out) } if(verbose) print(cbind(" " = hskip, Model2), row.names = FALSE) # stop if max BIC difference is < than cut-off bic.stop if(max(Model2$bic.diff) < bic.stop & length(inc) > 0) { break } # otherwise keep selecting i <- which.max(Model2$bic.diff) inc <- append(inc, excl[i]) excl <- setdiff(excl, excl[i]) tab <- rbind(tab, Model2[i,]) Model1 <- Model2[i,] Model2 <- NULL } rownames(tab) <- 1:nrow(tab) colnames(tab) <- c("Variable", "Model", "G", "BIC", "BIC.dif") subsets <- sapply(1:nrow(tab), function(x) list(inc[1:x])) # return(list(subset = subsets[[length(subsets)]], tab = tab)) } MclustDRsubsel_classif <- function(object, G = 1:9, modelNames = mclust.options("emModelNames"), ..., bic.stop = 0, bic.cutoff = 0, mindir = 1, verbose = interactive()) { drmodel <- object mclustType <- drmodel$type lambda <- drmodel$lambda numdir <- drmodel$numdir numdir0 <- numdir+1 dir <- drmodel$dir[,seq(numdir),drop=FALSE] mindir <- max(1, as.numeric(mindir), na.rm = TRUE) verbose <- as.numeric(verbose) ncycle <- 0 while(numdir < numdir0) { ncycle <- ncycle+1 if(verbose > 0) cat("\nCycle", ncycle, "...\n") out <- MclustDRCsubsel1cycle(drmodel, G, modelNames, bic.stop = bic.stop, bic.cutoff = bic.cutoff, verbose = if(verbose > 1) TRUE else FALSE) if(verbose > 0) { cat("\n"); print(out$tab) } mod <- do.call("MclustDA", list(data = dir[,out$subset,drop=FALSE], class = object$classification, G = G, modelNames = if(length(out$subset) > 1) modelNames else if(any(grep("V", modelNames))) c("E", "V") else "E", modelType = mclustType, verbose = FALSE, ...)) numdir0 <- numdir drmodel0 <- MclustDR(mod, lambda = lambda) if(drmodel0$numdir < mindir) break drmodel <- drmodel0 numdir <- drmodel$numdir dir <- drmodel$dir[,seq(numdir),drop=FALSE] } # format object using original data obj <- drmodel obj$basisx <- MclustDRrecoverdir(obj, data = object$x, std = FALSE) obj$std.basisx <- MclustDRrecoverdir(obj, data = object$x, std = TRUE) class(obj) <- c("MclustDRsubsel", class(obj)) return(obj) } MclustDRCsubsel1cycle <- function(object, G = 1:9, modelNames = mclust.options("emModelNames"), bic.stop = 0, bic.cutoff = 0, verbose = TRUE) { # Single cycle of subset selection for GMMDRC directions based on bayes factors. if(class(object) != "MclustDR") stop("Not a 'MclustDR' object") d <- object$numdir dir <- object$dir[,seq(d),drop=FALSE] n <- nrow(dir) if(is.null(colnames(dir))) colnames(dir) = paste("[,", seq(d), "]", sep="") dir.names <- colnames(dir) BIC <- Model1 <- Model2 <- tab <- NULL; Model1$bic <- 0 bic.stop <- bic.stop + sqrt(.Machine$double.eps) bic.cutoff <- bic.cutoff + sqrt(.Machine$double.eps) inc <- NULL; excl <- seq(d) model1D <- if(any(grep("V", modelNames))) c("E", "V") else "E" # hskip <- paste(rep(" ",4),collapse="") if(verbose) cat("\n", hskip, "Start greedy search...\n", sep="") while(length(excl)>0) { if(verbose) { cat(hskip, rep("-",15), "\n", sep="") cat(paste(hskip, "Step", length(inc)+1, "\n")) } for(j in excl) { # Select simplest model with smallest num. of components # within bic.cutoff mod <- MclustDA(dir[,sort(c(inc, j)),drop=FALSE], class = object$classification, G = G, modelNames = if(length(inc)>0) modelNames else model1D, modelType = object$type, verbose = FALSE) out <- data.frame(Variable = dir.names[j], model = paste(sapply(mod$models, function(m) m$modelName),collapse="|"), G = paste(sapply(mod$models, function(m) m$G),collapse="|"), bic = mod$bic, bic.diff = c(mod$bic - # (Model1$bic + bic.reg(z2, z1)) Model1$bic - MclustDRBICreg(dir[,j], dir[,inc])) ) # Model2 <- rbind(Model2, out) } if(verbose) print(cbind(" " = hskip, Model2), row.names = FALSE) # stop if max BIC difference is < than cut-off bic.stop if(max(Model2$bic.dif) < bic.stop & length(inc) > 0) { break } # otherwise keep selecting i <- which.max(Model2$bic.dif) inc <- append(inc, excl[i]) excl <- setdiff(excl, excl[i]) tab <- rbind(tab, Model2[i,]) Model1 <- Model2[i,] Model2 <- NULL } rownames(tab) <- 1:nrow(tab) colnames(tab) <- c("Variable", "Model", "G", "BIC", "BIC.dif") subsets <- sapply(1:nrow(tab), function(x) list(inc[1:x])) # return(list(subset = subsets[[length(subsets)]], tab = tab)) } # BICreg <- function(y, x) # { n <- length(y) # mod <- lm.fit(cbind(rep(1,n), x), y) # rss <- sum(mod$residuals^2) # -n*log(2*pi) - n*log(rss/n) - n - (n - mod$df.residual + 1) * log(n) # } MclustDRBICreg <- function(y, x, stepwise = TRUE) { x <- as.matrix(x) y <- as.vector(y) n <- length(y) mod0 <- lm(y ~ 1) if(ncol(x) >= 1) { mod1 <- lm(y ~ 1+x) if(stepwise) { mod <- step(mod0, k = log(n), trace = 0, scope = list(lower = formula(mod0), upper = formula(mod1)), direction = "forward") } else mod <- mod1 } else mod <- mod0 rss <- sum(mod$residuals^2) p <- (n - mod$df.residual + 1) -n*log(2*pi) - n*log(rss/n) - n - p*log(n) } normalize <- function(x) { # Normalize the vector x to have unit length x <- as.vector(x) x <- x/sqrt(as.vector(crossprod(x))) return(x) } MclustDRrecoverdir <- function(object, data, normalized = TRUE, std = FALSE) { # Recover coefficients of the linear combination defining the MclustDR # directions. This is useful if the directions are obtained from other # directions if(!any(class(object) == "MclustDR")) stop("object must be of class 'mclustsir'") if(missing(data)) x <- object$x else x <- as.matrix(data) x <- scale(x, center=TRUE, scale=FALSE) numdir <- object$numdir dir <- object$dir[,seq(numdir),drop=FALSE] # B <- as.matrix(coef(lm(dir ~ x)))[-1,,drop=FALSE] # ok but old B <- qr.solve(x, dir) if(std) { sdx <- sd(x) B <- apply(B, 2, function(x) x*sdx) } if(normalized) { B <- as.matrix(apply(B, 2, normalize)) } rownames(B) <- colnames(x) return(B) } ## Define print and summary methods for showing basis coefs ## in the original scale of variables print.MclustDRsubsel <- function(x, ...) { x$basis <- x$basisx class(x) <- class(x)[2] NextMethod() } summary.MclustDRsubsel <- function(object, ...) { object$basis <- object$basisx object$std.basis <- object$std.basisx class(object) <- class(object)[2] NextMethod() } mclust/R/impute.R0000644000175000017500000002030713477457724013570 0ustar nileshnileshimputeData <- function(data, categorical = NULL, seed = NULL, verbose = interactive()) { if(!requireNamespace("mix", quietly = TRUE)) stop("imputeData function require 'mix' package to be installed!") fac <- apply(data, 2, is.factor) if(is.null(categorical)) { categorical <- fac } else { if(any(!categorical & fac)) { stop("data has a factor that is not designated as categorical") } if(any(categorical | !fac)) { warning("a categorical is not designated as a factor") for(i in which(categorical | !fac)) data[[i]] <- as.factor(data[[i]]) } } # remove categorical variables and add a dummy variable if(nocat <- !any(categorical)) { data <- cbind(as.factor(1), data) categorical <- c(TRUE, categorical) } ord <- c(which(categorical), which(!categorical)) # do the imputations s <- mix::prelim.mix(data[,ord], p = sum(categorical)) if(is.null(seed)) seed <- runif(1, min = .Machine$integer.max/1024, max = .Machine$integer.max) # find ML estimate thetahat <- mix::em.mix(s, showits = verbose) # set random number generator seed mix::rngseed(seed) # data augmentation from posterior newtheta <- mix::da.mix(s, thetahat, steps = 100, showits = verbose) # impute under newtheta dataImp <- mix::imp.mix(s, newtheta) # there is a bug, so it needs to refix the seed and impute again mix::rngseed(seed) dataImp <- mix::imp.mix(s, newtheta) if(nocat) dataImp[,-1] else dataImp[,order(ord)] } imputePairs <- function(data, dataImp, symbols = c(1, 16), colors = c("black", "red"), labels, panel = points, ..., lower.panel = panel, upper.panel = panel, diag.panel = NULL, text.panel = textPanel, label.pos = 0.5 + has.diag/3, cex.labels = NULL, font.labels = 1, row1attop = TRUE, gap = 0.2) { textPanel <- function(x = 0.5, y = 0.5, txt, cex, font) text(x, y, txt, cex = cex, font = font) localAxis <- function(side, x, y, xpd, bg, col = NULL, main, oma, ...) { if (side%%2 == 1) Axis(x, side = side, xpd = NA, ...) else Axis(y, side = side, xpd = NA, ...) } localPlot <- function(..., main, oma, font.main, cex.main) plot(...) localLowerPanel <- function(..., main, oma, font.main, cex.main) lower.panel(...) localUpperPanel <- function(..., main, oma, font.main, cex.main) upper.panel(...) localDiagPanel <- function(..., main, oma, font.main, cex.main) diag.panel(...) dots <- list(...) nmdots <- names(dots) if (!is.matrix(data)) { data <- as.data.frame(data) for (i in seq_along(names(data))) { if (is.factor(data[[i]]) || is.logical(data[[i]])) data[[i]] <- as.numeric(data[[i]]) if (!is.numeric(unclass(data[[i]]))) stop("non-numeric argument to 'pairs'") } } else if (!is.numeric(data)) stop("non-numeric argument to 'pairs'") panel <- match.fun(panel) if ((has.lower <- !is.null(lower.panel)) && !missing(lower.panel)) lower.panel <- match.fun(lower.panel) if ((has.upper <- !is.null(upper.panel)) && !missing(upper.panel)) upper.panel <- match.fun(upper.panel) if ((has.diag <- !is.null(diag.panel)) && !missing(diag.panel)) diag.panel <- match.fun(diag.panel) if (row1attop) { tmp <- lower.panel lower.panel <- upper.panel upper.panel <- tmp tmp <- has.lower has.lower <- has.upper has.upper <- tmp } nc <- ncol(data) if (nc < 2) stop("only one column in the argument to 'pairs'") has.labs <- TRUE if (missing(labels)) { labels <- colnames(data) if (is.null(labels)) labels <- paste("var", 1:nc) } else if (is.null(labels)) has.labs <- FALSE oma <- if ("oma" %in% nmdots) dots$oma else NULL main <- if ("main" %in% nmdots) dots$main else NULL if (is.null(oma)) { oma <- c(4, 4, 4, 4) if (!is.null(main)) oma[3] <- 6 } opar <- par(mfrow = c(nc, nc), mar = rep(gap/2, 4), oma = oma) on.exit(par(opar)) for (i in if (row1attop) 1:nc else nc:1) for (j in 1:nc) { localPlot(dataImp[, j], dataImp[, i], xlab = "", ylab = "", axes = FALSE, type = "n", ...) if (i == j || (i < j && has.lower) || (i > j && has.upper)) { box() if (i == 1 && (!(j%%2) || !has.upper || !has.lower)) localAxis(1 + 2 * row1attop, dataImp[, j], dataImp[, i], ...) if (i == nc && (j%%2 || !has.upper || !has.lower)) localAxis(3 - 2 * row1attop, dataImp[, j], dataImp[, i], ...) if (j == 1 && (!(i%%2) || !has.upper || !has.lower)) localAxis(2, dataImp[, j], dataImp[, i], ...) if (j == nc && (i%%2 || !has.upper || !has.lower)) localAxis(4, dataImp[, j], dataImp[, i], ...) mfg <- par("mfg") if (i == j) { if (has.diag) localDiagPanel(as.vector(dataImp[, i]), ...) if (has.labs) { par(usr = c(0, 1, 0, 1)) if (is.null(cex.labels)) { l.wid <- strwidth(labels, "user") cex.labels <- max(0.8, min(2, 0.9/max(l.wid))) } text.panel(0.5, label.pos, labels[i], cex = cex.labels, font = font.labels) } } else if (i < j) { classification <- as.numeric(apply(data[,c(i,j)], 1, function(x) any(is.na(x)))) + 1 localLowerPanel(as.vector(dataImp[, j]), as.vector(dataImp[,i]), pch = symbols[classification], col = colors[classification], ...) } else { classification <- as.numeric(apply(data[,c(i,j)], 1, function(x) any(is.na(x)))) + 1 localUpperPanel(as.vector(dataImp[, j]), as.vector(dataImp[, i]), pch = symbols[classification], col = colors[classification], ...) } if (any(par("mfg") != mfg)) stop("the 'panel' function made a new plot") } else par(new = FALSE) } if (!is.null(main)) { font.main <- if ("font.main" %in% nmdots) dots$font.main else par("font.main") cex.main <- if ("cex.main" %in% nmdots) dots$cex.main else par("cex.main") mtext(main, 3, 3, TRUE, 0.5, cex = cex.main, font = font.main) } invisible(NULL) } # LS: old to be removed # matchCluster <- function(group, cluster) # { # if(length(group) != length(cluster)) # stop("arguments must be vector of the same length") # group <- as.factor(group) # cluster <- as.factor(cluster) # tab <- table(group,cluster) # j <- apply(tab,2,which.max) # cluster <- factor(cluster, labels = levels(group)[j]) # cluster <- as.character(cluster) # group <- as.character(group) # misclassified <- !(cluster == group) # out <- list(cluster = cluster, misclassified = misclassified, ord = j) # return(out) # } matchCluster <- function(group, cluster) { if(length(group) != length(cluster)) stop("arguments must be vector of the same length") group <- as.factor(group) cluster <- as.factor(cluster) map <- mapClass(as.numeric(group), as.numeric(cluster)) map1 <- unlist(map[[1]]); names(map1) <- NULL map2 <- unlist(map[[2]]); names(map2) <- NULL cl <- cluster levels(cl) <- map2 cl <- as.character(levels(cl)[as.numeric(cl)]) cl <- as.character(cl) group <- as.character(group) misclassified <- !(cluster == group) out <- list(cluster = cl, misclassified = misclassified, ord = map1) return(out) } majorityVote <- function(x) { # local function to find the maximum position in a vector, # breaking ties at random whichMax <- function (x) { m <- seq_along(x)[x == max(x, na.rm = TRUE)] if(length(m) > 1) sample(m, size = 1) else m } x <- as.vector(x) tab <- table(x) m <- whichMax(tab) out <- list(table = tab, ind = m, majority = names(tab)[m]) return(out) } mclust/R/weights.R0000644000175000017500000000410414125667133013720 0ustar nileshnilesh############################################################################### ## Weights for MCLUST ## ## Written by Thomas Brendan Murphy ## Bugs fix by Luca Scrucca ############################################################################# me.weighted <- function(data, modelName, z, weights = NULL, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { data <- as.matrix(data) N <- nrow(data) if(is.null(warn)) warn <- mclust.options("warn") if(is.null(weights)) { weights <- rep(1,N) } if(any(weights < 0)| any(!is.finite(weights))) { stop("Weights must be positive and finite") } if(!is.vector(weights)) { stop("Weights must be a vector") } if(max(weights) > 1) { if(warn) warning("Weights rescaled to have maximum equal to 1") weights <- weights/max(weights) } zw <- z*weights llold <- -Inf eps <- .Machine$double.eps criterion <- TRUE iter <- 0 while(criterion) { iter <- iter+1 fit.m <- do.call("mstep", list(data = data, z = zw, modelName = modelName, prior = prior, control = control, Vinv = Vinv, warn = warn)) fit.m$parameters$pro <- fit.m$parameters$pro/mean(weights) fit.e <- do.call("estep", c(list(data = data, control = control, Vinv = Vinv, warn = warn), fit.m)) zw <- pmax(fit.e$z*weights, eps) criterion <- criterion & (iter < control$itmax[1]) ldens <- do.call("dens", c(list(data = data, logarithm=TRUE, warn = warn), fit.m)) ll <- sum(weights*ldens) criterion <- criterion & (ll-llold > control$tol[1]) llold <- ll } fit <- fit.m fit$z <- fit.e$z fit$weights <- weights fit$loglik <- ll return(fit) } mclust/R/mclustaddson.R0000644000175000017500000023653513547605004014761 0ustar nileshnilesh############################################################################## ### EVV model #### ############################################################################## emEVV <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepEVV(data, parameters = parameters, warn = warn)$z meEVV(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } #### meEVV <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K # if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EVV", d = p, G = G, scale = NA, shape = rep(NA,p), orientation = array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(NA,p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="EVV", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p)) storage.mode(z) <- "double" # # # MICHAEL from here------------------------------------------ # # without prior specification if(is.null(prior)) { temp <- .Fortran( "meevv", x = as.double(data), z = as.double(z), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(K), mu = double(p*G), O = double(p*p*G), shape.o = double(p*p*G), scale = double(G), shape = double(p*G), pro = double(K), Vinv = as.double( if (is.null(Vinv)) -1 else Vinv ), loglik = double(1), eqpro = as.logical(control$equalPro), itmax = as.integer(control$itmax[1]), tol = as.double(control$tol[1]), eps = as.double(control$eps), niterout = integer(1), errout = double(1), lwork = as.integer(lwork), info = as.integer(0), PACKAGE = "mclust") } else { # with prior priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EVV"), prior[names(prior) != "functionName"])) # temp <- .Fortran("meevvp", ...) temp <- list(x = data, z = z, n = n, p = p, G = G, mu = double(p*G), O = double(p*p*G), shape.o = double(p*p*G), scale = double(G), shape = double(p*G), pro = double(K), loglik = NA, eqpro = as.logical(control$equalPro), itmax = as.integer(control$itmax[1]), tol = as.double(control$tol[1]), eps = as.double(control$eps), niterout = integer(1), errout = double(1), lwork = as.integer(lwork), info = FALSE) WARNING <- "EVV model is not available with prior" if(warn) warning(WARNING) temp <- structure(temp, info = NA, WARNING = WARNING, returnCode = -1) return(temp) } # z <- matrix(temp$z, n,K) loglik <- temp$loglik mu <- matrix(temp$mu, p,G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp$scale[1] shape <- matrix(temp$shape, p,G) O <- aperm( array(temp$O, c(p,p,G)), c(2,1,3) ) shape.o <- array( temp$shape.o, c(p,p,G) ) pro <- temp$pro niterout <- temp$niterout errout <- temp$errout lapackSVDinfo <- temp$info if( !is.finite(loglik) | any(scale > signif(.Machine$double.xmax, 6)) | any(shape > signif(.Machine$double.xmax, 6)) | any(O > signif(.Machine$double.xmax, 6)) | any(is.nan(scale)) | any(is.nan(shape)) | any(is.nan(O)) ) { loglik <- .Machine$double.xmax } # WARNING <- NULL if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DGESVD fails to converge" } else { WARNING <- "input error for LAPACK DGESVD" } if(warn) warning(WARNING) z[] <- O[] <- shape[] <- NA scale <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 # } else if( loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) shape[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "a z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { # scale <- sum(scale)/n sigma <- scale * shape.o if(niterout >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) niterout <- - niterout ret <- 1 } else ret <- 0 } info <- list(iterations = niterout, error = errout) # info <- c(iterations = its, error = err) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) ## Sigma = scale * O %*% diag(shape) %*% t(O) variance <- list(modelName = "EVV", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "EVV", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } #### mstepEVV <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) ## z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EVV", d = p, G = G, scale = NA, shape = rep(NA,p), orientation=array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(NA,p,G), variance=variance) return(structure(list(modelName="EVV", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), G) # # MICHAEL from here------------------------------------------------------- # # without prior specification if(is.null(prior)) { temp <- .Fortran( "msevv", x = as.double(data), z = as.double(z), n = as.integer(n), p = as.integer(p), G = as.integer(G), mu = double(p*G), O = double(p*p*G), shape.o = double(p*p*G), scale = double(G), shape = double(p*G), pro = double(G), lwork = as.integer(lwork), info = as.integer(0), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") } else { # with prior priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EVV"), prior[names(prior) != "functionName"])) # temp <- .Fortran("meevvp", ...) temp <- list(x = data, z = z, n = n, p = p, G = G, mu = double(p*G), O = double(p*p*G), shape.o = double(p*p*G), scale = double(G), shape = double(p*G), pro = double(G), lwork = as.integer(lwork), info = FALSE, eps = as.double(.Machine$double.eps)) WARNING <- "EVV model is not available with prior" if(warn) warning(WARNING) } # lapackSVDinfo <- temp$info mu <- matrix(temp$mu, p,G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp$scale[1] # lambda O <- aperm( array(temp$O, c(p,p,G)), c(2,1,3) ) shape.o <- array( temp$shape.o, c(p,p,G) ) shape <- matrix(temp$shape, p,G) pro <- temp$pro WARNING <- NULL # if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DGESVD fails to converge" ret <- -4 } else { WARNING <- "input error for LAPACK DGESVD" ret <- -5 } if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA sigma <- array(NA, c(p, p, G)) # } else if( any(abs(c(scale, shape)) > signif(.Machine$double.xmax, 6)) ) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- scale <- shape[] <- O[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else { # scale <- sum(scale)/n # scale <- sum(scale)/sum(z) # lambda --> if noise, see help(mstep) sigma <- scale * shape.o ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- dimnames(shape) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EVV", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "EVV", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } #### estepEVV <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) scale <- parameters$variance$scale shape <- parameters$variance$shape O <- parameters$variance$orientation G <- ncol(mu) noise <- l == G + 1 if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(NA,n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "EVV", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") # # MICHAEL from here---------------------------------------------- # temp <- .Fortran( "esevv", x = as.double(data), z = double(n*K), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(K), mu = as.double(mu), O = as.double( aperm(O, c(2,1,3)) ), scale = as.double(scale), shape = as.double(shape), pro = as.double(pro), Vinv = as.double( if (is.null(Vinv)) -1 else Vinv ), loglik = double(1), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") # loglik <- temp$loglik z <- matrix(temp$z, n,K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "EVV", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } #### cdensEVV <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) scale <- parameters$variance$scale shape <- parameters$variance$shape O <- parameters$variance$orientation p <- ncol(data) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(NA,n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "EVV", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") # temp <- .Fortran( "esevv", x = as.double(data), z = double(n*G), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(G), mu = as.double(mu), O = as.double( aperm(O, c(2,1,3)) ), scale = as.double(scale), shape = as.double(shape), pro = as.double(-1), Vinv = as.double(-1), loglik = double(1), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") # loglik <- temp$loglik z <- matrix(temp$z, n,G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "EVV", WARNING = WARNING, returnCode = ret) } ### simEVV <- function(parameters, n, seed = NULL, ...) { if (!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if (any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warn <- "parameters are missing" warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "EVV")) } pro <- parameters$pro if (is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins = G) x <- matrix(0, n, d) rtshape <- sqrt(parameters$variance$shape) if (dim(rtshape)[1] != d | dim(rtshape)[2] != G) stop("shape incompatible with mean") rtscale <- sqrt(parameters$variance$scale) for (k in 1:G) { m <- ctabel[k] sss <- rtscale * rtshape[,k] cholSigma <- t(parameters$variance$orientation[,,k]) * sss x[clabels == k, ] <- sweep( matrix(rnorm(m*d), nrow = m, ncol = d) %*% cholSigma, MARGIN = 2, STATS = mu[,k], FUN = "+" ) } dimnames(x) <- list(NULL, 1:d) structure(cbind(group = clabels, x), modelName = "EVV") } ############################################################################## ### VEE model #### ############################################################################## emVEE <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepVEE(data, parameters = parameters, warn = warn)$z meVEE(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } #### meVEE <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VEE", d = p, G = G, scale=rep(NA,G), shape=rep(NA,p), orientation=array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(NA,p,G), variance=variance) return(structure(list(modelName="VEE", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), p + G) storage.mode(z) <- "double" # # MICHAEL from here------------------------------------------------------- # # without prior specification if(is.null(prior)) { temp <- .Fortran("mevee", x = as.double(data), z = as.double(z), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(K), mu = double(p*G), C = double(p*p), U = double(p*p*G), scale = double(G), shape = double(p), pro = double(K), Vinv = as.double( if (is.null(Vinv)) -1 else Vinv ), loglik = double(1), eqpro = as.logical(control$equalPro), itmaxin = as.integer(control$itmax[2]), tolin = as.double(control$tol[2]), itmaxout = as.integer(control$itmax[1]), tolout = as.double(control$tol[1]), eps = as.double(control$eps), niterin = integer(1), errin = double(1), niterout = integer(1), errout = double(1), lwork = as.integer(lwork), info = as.integer(0), PACKAGE = "mclust") # } else { # with prior priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VEE"), prior[names(prior) != "functionName"])) # temp <- .Fortran("meveep", ...) temp <- list(x = data, z = z, n = n, p = p, G = G, mu = double(p*G), C = double(p*p), U = double(p*p*G), scale = double(G), shape = double(p), pro = double(K), loglik = NA, eqpro = as.logical(control$equalPro), itmaxin = as.integer(control$itmax[2]), tolin = as.double(control$tol[2]), itmaxout = as.integer(control$itmax[1]), tolout = as.double(control$tol[1]), eps = as.double(control$eps), niterin = integer(1), errin = double(1), niterout = integer(1), errout = double(1), lwork = as.integer(lwork), info = FALSE) WARNING <- "VEE model is not available with prior" if(warn) warning(WARNING) temp <- structure(temp, info = NA, WARNING = WARNING, returnCode = -1) return(temp) } z <- matrix(temp$z, n,K) niterin <- temp$niterin errin <- temp$errin niterout <- temp$niterout errout <- temp$errout loglik <- temp$loglik lapackSVDinfo <- temp$info mu <- matrix(temp$mu, p,G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp$scale shape <- temp$shape shape.o <- matrix(temp$C, p,p) O <- if(any(is.nan(shape.o))) shape.o else svd(shape.o, nu = 0)$v pro <- temp$pro if( !is.finite(loglik) | any(scale > signif(.Machine$double.xmax, 6)) | any(shape > signif(.Machine$double.xmax, 6)) | any(O > signif(.Machine$double.xmax, 6)) | any(is.nan(scale)) | any(is.nan(shape)) | any(is.nan(O)) ) { loglik <- .Machine$double.xmax } # WARNING <- NULL if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DSYEV or DPOTRI fails to converge" } else { WARNING <- "input error for LAPACK DPOTRF, DSYEV or DPOTRI" } if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 # } else if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { sigma <- sweep( array(shape.o, c(p,p,G)), 3, FUN = "*", STATS = scale ) if(niterin >= control$itmax[2]) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) niterin <- - niterin ret <- 2 } else if(niterout >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) niterout <- - niterout ret <- 1 } else ret <- 0 } info <- structure(c(niterout = niterout, errout = errout), inner = c(niterin = niterin, errin = errin)) # info <- structure(c(iterations = its, error = err), # inner = c(iterations = inner, error = inerr)) dimnames(z) <- list(dimnames(data)[[1]],NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) ## Sigma = scale * O %*% diag(shape) %*% t(O) variance <- list(modelName = "VEE", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "VEE", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } #### mstepVEE <- function(data, z, prior = NULL, warn = NULL, control = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) ## z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VEE", d = p, G = G, scale = rep(NA,G), shape = rep(NA,p), orientation = array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(NA,p,G), variance=variance) return(structure(list(modelName="VEE", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) WARNING <- "z is missing" if(warn) warning(WARNING) return(structure(list( n = n, d = p, G = G, mu = matrix(NA,p, G), sigma = array(NA, c(p, p, G)), decomp = list(d = p, G = G, scale = rep(NA, G), shape = rep(NA, p), orientation = array(NA, c(p, p, G))), pro = rep(NA,G), modelName = "VEE", prior = prior), WARNING = WARNING)) } # shape <- sqrt(rev(sort(shape/exp(sum(log(shape))/p)))) if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if (is.null(control)) control <- emControl() itmax <- if(length(control$itmax) == 1) control$itmax else control$itmax[2] tol <- if(length(control$tol) == 1) control$tol else control$tol[2] lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), p + G) # # MICHAEL from here------------------------------------------------------- # # without prior specification if(is.null(prior)) { temp <- .Fortran( "msvee", x = as.double(data), z = as.double(z), n = as.integer(n), p = as.integer(p), G = as.integer(G), mu = double(p*G), U = double(p*p*G), C = double(p*p), scale = as.double( rep(1,G) ), pro = double(G), lwork = as.integer(lwork), info = as.integer(0), itmax = as.integer(itmax), tol = as.double(tol), niterin = integer(1), errin = double(1), # eps = as.double(.Machine$double.eps), PACKAGE = "mclust") } else { # with prior priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VEE"), prior[names(prior) != "functionName"])) # # temp <- .Fortran("msveep", ...) temp <- list(x = data, z = z, n = n, p = p, G = G, mu = double(p*G), U = double(p*p*G), C = double(p*p), scale = double(G), pro = double(G), lwork = as.integer(lwork), info = FALSE, itmax = as.integer(itmax), tol = as.double(tol), niterin = integer(1), errin = double(1), eps = as.double(control$eps)) WARNING <- "VEE model is not available with prior" if(warn) warning(WARNING) } lapackSVDinfo <- temp$info errin <- temp$errin niterin <- temp$niterin mu <- matrix(temp$mu, p,G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp$scale shape.o <- matrix(temp$C, p,p) SVD <- svd(shape.o, nu = 0) shape <- SVD$d O <- SVD$v pro <- temp$pro WARNING <- NULL # if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DSYEV or DPOTRI fails to converge" } else { WARNING <- "input error for LAPACK DPOTRF, DSYEV or DPOTRI" } if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 # } else if(any(c(scale, shape) > signif(.Machine$double.xmax, 6))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- O[] <- shape[] <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else { sigma <- sweep( array(shape.o, c(p,p,G)), 3, FUN = "*", STATS = scale ) if(niterin >= itmax) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) niterin <- - niterin } ret <- 2 } info <- c(iteration = niterin, error = errin) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VEE", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "VEE", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control), info = info, WARNING = WARNING, returnCode = ret) } ### estepVEE <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) scale <- parameters$variance$scale shape <- parameters$variance$shape O <- parameters$variance$orientation G <- ncol(mu) noise <- l == G + 1 if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(NA,n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "VEE", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") # # MICHAEL from here------------------------------------------------------- # temp <- .Fortran( "esvee", x = as.double(data), z = double(n*K), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(K), mu = as.double(mu), O = as.double( t(O) ), scale = as.double(scale), shape = as.double(shape), pro = as.double(pro), Vinv = as.double( if (is.null(Vinv)) -1 else Vinv ), loglik = double(1), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") # loglik <- temp$loglik z <- matrix(temp$z, n,K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "VEE", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } #### cdensVEE <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) scale <- parameters$variance$scale shape <- parameters$variance$shape O <- parameters$variance$orientation p <- ncol(data) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(NA,n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "VEE", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") # # MICHAEL from here------------------------------------------------------- # temp <- .Fortran( "esvee", x = as.double(data), z = double(n*G), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(G), mu = as.double(mu), O = as.double( t(O) ), scale = as.double(scale), shape = as.double(shape), pro = as.double(-1), Vinv = as.double(-1), loglik = double(1), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") # loglik <- temp$loglik z <- matrix(temp$z, n,G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "VEE", WARNING = WARNING, returnCode = ret) } ### simVEE <- function(parameters, n, seed = NULL, ...) { if (!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if (any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warn <- "parameters are missing" warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "VEE")) } pro <- parameters$pro if (is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins = G) x <- matrix(0, n, d) rtshape <- sqrt(parameters$variance$shape) if (length(rtshape) != d) stop("shape incompatible with mean") rtscale <- sqrt(parameters$variance$scale) if (length(rtscale) != G) stop("scale incompatible with mean") for (k in 1:G) { m <- ctabel[k] sss <- rtscale[k] * rtshape cholSigma <- t(parameters$variance$orientation) * sss x[clabels == k, ] <- sweep( matrix(rnorm(m*d), nrow = m, ncol = d) %*% cholSigma, MARGIN = 2, STATS = mu[,k], FUN = "+" ) } dimnames(x) <- list(NULL, 1:d) structure(cbind(group = clabels, x), modelName = "VEE") } ############################################################################## ### EVE model #### ############################################################################## emEVE <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepEVE(data, parameters = parameters, warn = warn)$z meEVE(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } #### meEVE <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EVE", d = p, G = G, scale=rep(NA,G), shape=rep(NA,p), orientation=array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(NA,p,G), variance=variance) return(structure(list(modelName="EVE", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), p + G) storage.mode(z) <- "double" # # MICHAEL from here------------------------------------------------------- # # without prior specification if(is.null(prior)) { temp <- .Fortran("meeve", x = as.double(data), z = as.double(z), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(K), mu = double(p*G), O = as.double( diag(p) ), U = double(p*p*G), scale = double(1), shape = as.double( matrix(1, p,G) ), pro = double(K), Vinv = as.double( if (is.null(Vinv)) -1 else Vinv ), loglik = double(1), eqpro = as.logical(control$equalPro), itmaxin = as.integer(control$itmax[2]), tolin = as.double(control$tol[2]), itmaxout = as.integer(control$itmax[1]), tolout = as.double(control$tol[1]), eps = as.double(control$eps), niterin = integer(1), errin = double(1), niterout = integer(1), errout = double(1), lwork = as.integer(lwork), info = as.integer(0), PACKAGE = "mclust") # } else { # with prior priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EVE"), prior[names(prior) != "functionName"])) # temp <- .Fortran("meevep", ...) temp <- list(x = data, z = z, n = n, p = p, G = G, mu = double(p*G), O = double(p*p), U = double(p*p*G), scale = double(1), shape = double(p*G), pro = double(G), loglik = NA, eqpro = as.logical(control$equalPro), itmaxin = as.integer(control$itmax[2]), tolin = as.double(control$tol[2]), itmaxout = as.integer(control$itmax[1]), tolout = as.double(control$tol[1]), eps = as.double(control$eps), niterin = integer(1), errin = double(1), niterout = integer(1), errout = double(1), lwork = as.integer(lwork), info = FALSE) WARNING <- "EVE model is not available with prior" if(warn) warning(WARNING) temp <- structure(temp, info = NA, WARNING = WARNING, returnCode = -1) return(temp) } z <- matrix(temp$z, n,K) niterin <- temp$niterin errin <- temp$errin niterout <- temp$niterout errout <- temp$errout loglik <- temp$loglik lapackSVDinfo <- temp$info mu <- matrix(temp$mu, p,G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp$scale shape <- matrix(temp$shape, p,G) O <- t( matrix(temp$O, p,p) ) pro <- temp$pro if( !is.finite(loglik) | any(scale > signif(.Machine$double.xmax, 6)) | any(shape > signif(.Machine$double.xmax, 6)) | any(O > signif(.Machine$double.xmax, 6)) | any(is.nan(scale)) | any(is.nan(shape)) | any(is.nan(O)) ) { loglik <- .Machine$double.xmax } # WARNING <- NULL if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DSYEV or DGESVD fails to converge" } else { WARNING <- "input error for LAPACK DSYEV or DGESVD" } if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 # } else if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { sigma <- array( apply(shape, 2, function(sh) scale * O%*%diag(sh)%*%t(O)), c(p,p,G) ) if(niterin >= control$itmax[2]) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) niterin <- - niterin ret <- 2 } else if(niterout >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) niterout <- - niterout ret <- 1 } else ret <- 0 } info <- structure(c(niterout = niterout, errout = errout), inner = c(niterin = niterin, errin = errin)) # info <- structure(c(iterations = its, error = err), # inner = c(iterations = inner, error = inerr)) dimnames(z) <- list(dimnames(data)[[1]],NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) ## Sigma = scale * O %*% diag(shape) %*% t(O) variance <- list(modelName = "EVE", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "EVE", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } #### mstepEVE <- function(data, z, prior = NULL, warn = NULL, control = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) ## z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EVE", d = p, G = G, scale = rep(NA,G), shape = rep(NA,p), orientation = array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(NA,p,G), variance=variance) return(structure(list(modelName="EVE", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) WARNING <- "z is missing" if(warn) warning(WARNING) return(structure(list( n = n, d = p, G = G, mu = matrix(NA,p, G), sigma = array(NA, c(p, p, G)), decomp = list(d = p, G = G, scale = rep(NA, G), shape = rep(NA, p), orientation = array(NA, c(p, p, G))), pro = rep(NA,G), modelName = "EVE", prior = prior), WARNING = WARNING)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if (is.null(control)) control <- emControl() itmax <- if(length(control$itmax) == 1) control$itmax else control$itmax[2] tol <- if(length(control$tol) == 1) control$tol else control$tol[2] lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), p + G) # # MICHAEL from here------------------------------------------------------- # # without prior specification if(is.null(prior)) { temp <- .Fortran("mseve", x = as.double(data), z = as.double(z), n = as.integer(n), p = as.integer(p), G = as.integer(G), mu = double(p*G), U = double(p*p*G), O = as.double( diag(p) ), scale = as.double(1), shape = as.double( matrix(1, p,G) ), pro = double(G), lwork = as.integer(lwork), info = as.integer(0), itmax = as.integer(itmax), tol = as.double(tol), niterin = integer(1), errin = double(1), eps = as.double(.Machine$double.eps), # d = 100000, # trgtvec = as.double(100000), PACKAGE = "mclust") } else { # with prior priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EVE"), prior[names(prior) != "functionName"])) # # temp <- .Fortran("msevep", ...) temp <- list(x = data, z = z, n = n, p = p, G = G, mu = double(p*G), U = double(p*p*G), O = double(p*p), scale = double(1), pro = double(G), shape = double(p*G), lwork = as.integer(lwork), info = FALSE, itmax = as.integer(itmax), tol = as.double(tol), niterin = integer(1), errin = double(1), eps = as.double(.Machine$double.eps)) WARNING <- "EVE model is not available with prior" if(warn) warning(WARNING) } lapackSVDinfo <- temp$info errin <- temp$errin niterin <- temp$niterin mu <- matrix(temp$mu, p,G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp$scale O <- t( matrix(temp$O, p,p) ) shape <- matrix(temp$shape, p,G) pro <- temp$pro WARNING <- NULL # if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DSYEV or DGESVD fails to converge" } else { WARNING <- "input error for LAPACK DSYEV or DGESVD" } if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 # } else if( any(c(scale, shape) > signif(.Machine$double.xmax, 6)) ) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- O[] <- shape[] <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else { sigma <- array( apply(shape, 2, function(sh) scale * O%*%diag(sh)%*%t(O)), c(p,p,G) ) if(niterin >= itmax) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) niterin <- - niterin } ret <- 2 } info <- c(iteration = niterin, error = errin) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EVE", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "EVE", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control), info = info, WARNING = WARNING, returnCode = ret) } ### estepEVE <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) scale <- parameters$variance$scale shape <- parameters$variance$shape O <- parameters$variance$orientation G <- ncol(mu) noise <- l == G + 1 if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(NA,n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "EVE", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") # # MICHAEL from here------------------------------------------------------- # temp <- .Fortran( "eseve", x = as.double(data), z = double(n*K), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(K), mu = as.double(mu), O = as.double( t(O) ), scale = as.double(scale), shape = as.double(shape), pro = as.double(pro), Vinv = as.double( if (is.null(Vinv)) -1 else Vinv ), loglik = double(1), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") # loglik <- temp$loglik z <- matrix(temp$z, n,K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "EVE", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } #### cdensEVE <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) scale <- parameters$variance$scale shape <- parameters$variance$shape O <- parameters$variance$orientation p <- ncol(data) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(NA,n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "EVE", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") # # MICHAEL from here------------------------------------------------------- # temp <- .Fortran( "eseve", x = as.double(data), z = double(n*G), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(G), mu = as.double(mu), O = as.double( t(O) ), scale = as.double(scale), shape = as.double(shape), pro = as.double(-1), Vinv = as.double(-1), loglik = double(1), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") # loglik <- temp$loglik z <- matrix(temp$z, n,G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "EVE", WARNING = WARNING, returnCode = ret) } ### simEVE <- function(parameters, n, seed = NULL, ...) { if (!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if (any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warn <- "parameters are missing" warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "EVE")) } pro <- parameters$pro if (is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins = G) x <- matrix(0, n, d) rtshape <- sqrt(parameters$variance$shape) if (dim(rtshape)[1] != d | dim(rtshape)[2] != G) stop("shape incompatible with mean") rtscale <- sqrt(parameters$variance$scale) for (k in 1:G) { m <- ctabel[k] sss <- rtscale * rtshape[,k] cholSigma <- t(parameters$variance$orientation) * sss x[clabels == k, ] <- sweep( matrix(rnorm(m*d), nrow = m, ncol = d) %*% cholSigma, MARGIN = 2, STATS = mu[,k], FUN = "+" ) } dimnames(x) <- list(NULL, 1:d) structure(cbind(group = clabels, x), modelName = "EVE") } ############################################################################## ### VVE model #### ############################################################################## emVVE <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepVVE(data, parameters = parameters, warn = warn)$z meVVE(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } #### meVVE <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VVE", d = p, G = G, scale=rep(NA,G), shape=rep(NA,p), orientation=array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(NA,p,G), variance=variance) return(structure(list(modelName="VVE", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), p + G) storage.mode(z) <- "double" # MICHAEL from here------------------------------------------------------- # # without prior specification if(is.null(prior)) { temp <- .Fortran("mevve", x = as.double(data), z = as.double(z), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(K), mu = double(p*G), O = as.double( diag(p) ), U = double(p*p*G), scale = as.double( rep(1, G) ), shape = as.double( matrix(1, p,G) ), pro = double(K), Vinv = as.double( if (is.null(Vinv)) -1 else Vinv ), loglik = double(1), eqpro = as.logical(control$equalPro), itmaxin = as.integer(control$itmax[2]), tolin = as.double(control$tol[2]), itmaxout = as.integer(control$itmax[1]), tolout = as.double(control$tol[1]), eps = as.double(control$eps), niterin = integer(1), errin = double(1), niterout = integer(1), errout = double(1), lwork = as.integer(lwork), info = as.integer(0), PACKAGE = "mclust") # } else { # with prior priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VVE"), prior[names(prior) != "functionName"])) # temp <- .Fortran("mevvep", ...) temp <- list(x = data, z = z, n = n, p = p, G = G, mu = double(p*G), O = double(p*p), U = double(p*p*G), scale = as.double(rep(1, G)), shape = double(p*G), pro = double(G), loglik = NA, eqpro = as.logical(control$equalPro), itmaxin = as.integer(control$itmax[2]), tolin = as.double(control$tol[2]), itmaxout = as.integer(control$itmax[1]), tolout = as.double(control$tol[1]), eps = as.double(control$eps), niterin = integer(1), errin = double(1), niterout = integer(1), errout = double(1), lwork = as.integer(lwork), info = FALSE) WARNING <- "VVE model is not available with prior" if(warn) warning(WARNING) temp <- structure(temp, info = NA, WARNING = WARNING, returnCode = -1) return(temp) } z <- matrix(temp$z, n,K) niterin <- temp$niterin errin <- temp$errin niterout <- temp$niterout errout <- temp$errout loglik <- temp$loglik lapackSVDinfo <- temp$info mu <- matrix(temp$mu, p,G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp$scale shape <- matrix(temp$shape, p,G) O <- t( matrix(temp$O, p,p) ) pro <- temp$pro if( !is.finite(loglik) | any(scale > signif(.Machine$double.xmax, 6)) | any(shape > signif(.Machine$double.xmax, 6)) | any(O > signif(.Machine$double.xmax, 6)) | any(is.nan(scale)) | any(is.nan(shape)) | any(is.nan(O)) ) { loglik <- .Machine$double.xmax } # WARNING <- NULL if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DSYEV or DGESVD fails to converge" } else { WARNING <- "input error for LAPACK DSYEV or DGESVD" } if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 # } else if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { sigma <- array(NA, c(p,p,G)) for ( g in 1:G ) sigma[,,g] <- scale[g] * O %*% diag(shape[,g]) %*% t(O) if(niterin >= control$itmax[2]) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) niterin <- - niterin ret <- 2 } else if(niterout >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) niterout <- - niterout ret <- 1 } else ret <- 0 } info <- structure(c(niterout = niterout, errout = errout), inner = c(niterin = niterin, errin = errin)) # info <- structure(c(iterations = its, error = err), # inner = c(iterations = inner, error = inerr)) dimnames(z) <- list(dimnames(data)[[1]],NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) ## Sigma = scale * O %*% diag(shape) %*% t(O) variance <- list(modelName = "VVE", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "VVE", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } #### mstepVVE <- function(data, z, prior = NULL, warn = NULL, control = NULL, ...) { if (is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) ## z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VVE", d = p, G = G, scale = rep(NA,G), shape = rep(NA,p), orientation = array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(NA,p,G), variance=variance) return(structure(list(modelName="VVE", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) WARNING <- "z is missing" if(warn) warning(WARNING) return(structure(list( n = n, d = p, G = G, mu = matrix(NA,p, G), sigma = array(NA, c(p, p, G)), decomp = list(d = p, G = G, scale = rep(NA, G), shape = rep(NA, p), orientation = array(NA, c(p, p, G))), pro = rep(NA,G), modelName = "VVE", prior = prior), WARNING = WARNING)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if (is.null(control)) control <- emControl() itmax <- if(length(control$itmax) == 1) control$itmax else control$itmax[2] tol <- if(length(control$tol) == 1) control$tol else control$tol[2] lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), p + G) # # MICHAEL from here------------------------------------------------------- # # without prior specification if(is.null(prior)) { temp <- .Fortran("msvve", x = as.double(data), z = as.double(z), n = as.integer(n), p = as.integer(p), G = as.integer(G), mu = double(p*G), U = double(p*p*G), O = as.double( diag(p) ), scale = as.double( rep(1, G) ), shape = as.double( matrix(1, p,G) ), pro = double(G), lwork = as.integer(lwork), info = as.integer(0), itmax = as.integer(itmax), tol = as.double(tol), niterin = integer(1), errin = double(1), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") } else { # with prior priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VVE"), prior[names(prior) != "functionName"])) # # temp <- .Fortran("msvvep", ...) temp <- list(x = data, z = z, n = n, p = p, G = G, mu = double(p*G), U = double(p*p*G), O = double(p*p), scale = double(1), pro = double(G), shape = double(p*G), lwork = as.integer(lwork), info = FALSE, itmax = as.integer(itmax), tol = as.double(tol), niterin = integer(1), errin = double(1), eps = as.double(.Machine$double.eps)) WARNING <- "VVE model is not available with prior" if(warn) warning(WARNING) } lapackSVDinfo <- temp$info errin <- temp$errin niterin <- temp$niterin mu <- matrix(temp$mu, p,G) dimnames(mu) <- list(NULL, as.character(1:G)) O <- t( matrix(temp$O, p,p) ) shape <- matrix(temp$shape, p,G) scale <- temp$scale pro <- temp$pro WARNING <- NULL # if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DSYEV or DGESVD fails to converge" } else { WARNING <- "input error for LAPACK DSYEV or DGESVD" } if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 # } else if(any(c(scale, shape) > signif(.Machine$double.xmax, 6))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- O[] <- shape[] <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else { # sigma <- array( apply(shape, 2, function(sh) O%*%diag(sh)%*%t(O)), c(p,p,G) ) sigma <- array(NA, c(p,p,G)) for ( g in 1:G ) sigma[,,g] <- scale[g] * O %*% diag(shape[,g]) %*% t(O) if(niterin >= itmax) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) niterin <- - niterin } ret <- 2 } info <- c(iteration = niterin, error = errin) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VVE", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "VVE", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control), info = info, WARNING = WARNING, returnCode = ret) } ### estepVVE <- function(data, parameters, warn = NULL, ...) { if (is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) scale <- parameters$variance$scale shape <- parameters$variance$shape O <- parameters$variance$orientation G <- ncol(mu) noise <- l == G + 1 if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(NA,n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "VVE", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") # # MICHAEL from here------------------------------------------------------- # temp <- .Fortran( "esvve", x = as.double(data), z = double(n*K), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(K), mu = as.double(mu), O = as.double( t(O) ), scale = as.double(scale), shape = as.double(shape), pro = as.double(pro), Vinv = as.double( if (is.null(Vinv)) -1 else Vinv ), loglik = double(1), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") # loglik <- temp$loglik z <- matrix(temp$z, n,K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "VVE", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } #### cdensVVE <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) scale <- parameters$variance$scale shape <- parameters$variance$shape O <- parameters$variance$orientation p <- ncol(data) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(NA,n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "VVE", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") # # MICHAEL from here------------------------------------------------------- # temp <- .Fortran( "esvve", x = as.double(data), z = double(n*G), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(G), mu = as.double(mu), O = as.double( t(O) ), scale = as.double(scale), shape = as.double(shape), pro = as.double(-1), Vinv = as.double(-1), loglik = double(1), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") # loglik <- temp$loglik z <- matrix(temp$z, n,G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "VVE", WARNING = WARNING, returnCode = ret) } ### simVVE <- function(parameters, n, seed = NULL, ...) { if (!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if (any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warn <- "parameters are missing" warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "VVE")) } pro <- parameters$pro if (is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins = G) x <- matrix(0, n, d) rtshape <- sqrt(parameters$variance$shape) if (dim(rtshape)[1] != d | dim(rtshape)[2] != G) stop("shape incompatible with mean") rtscale <- sqrt(parameters$variance$scale) if (length(rtscale) != G) stop("scale incompatible with mean") for (k in 1:G) { m <- ctabel[k] sss <- rtscale[k] * rtshape[,k] cholSigma <- t(parameters$variance$orientation) * sss x[clabels == k, ] <- sweep( matrix(rnorm(m*d), nrow = m, ncol = d) %*% cholSigma, MARGIN = 2, STATS = mu[,k], FUN = "+" ) } dimnames(x) <- list(NULL, 1:d) structure(cbind(group = clabels, x), modelName = "VVE") } ############################################################################# # Examples of some simple R wrapper functions fcrossprod <- function(X, Y, ...) { out <- .Fortran("crossprodf", X = as.matrix(X), Y = as.matrix(Y), n = as.integer(nrow(X)), p = as.integer(ncol(X)), q = as.integer(ncol(Y)), XTY = matrix(0, ncol(X), ncol(Y)), PACKAGE = "mclust") return(out$XTY) }mclust/R/mclust.R0000644000175000017500000100442614156711260013560 0ustar nileshnileshMclust <- function(data, G = NULL, modelNames = NULL, prior = NULL, control = emControl(), initialization = NULL, warn = mclust.options("warn"), x = NULL, verbose = interactive(), ...) { call <- match.call() data <- data.matrix(data) d <- ncol(data) if(!is.null(x)) { if(!inherits(x, "mclustBIC")) stop("If provided, argument x must be an object of class 'mclustBIC'") } mc <- match.call(expand.dots = TRUE) mc[[1]] <- as.name("mclustBIC") mc[[2]] <- data BIC <- eval(mc, parent.frame()) # get the best model from BIC table G <- attr(BIC, "G") modelNames <- attr(BIC, "modelNames") Sumry <- summary(BIC, data, G = G, modelNames = modelNames) if(length(Sumry)==0) { if(warn) warning("no model(s) could be fitted. Try adjusting G and modelNames arguments") return() } if(!(length(G) == 1)) { bestG <- length(tabulate(Sumry$cl)) if(warn) { if(bestG == max(G) & warn) warning("optimal number of clusters occurs at max choice") else if(bestG == min(G) & warn) warning("optimal number of clusters occurs at min choice") } } oldClass(Sumry) <- NULL Sumry$bic <- Sumry$bic[1] Sumry$icl <- icl.Mclust(Sumry) Sumry$hypvol <- if(is.null(attr(BIC, "Vinv"))) as.double(NA) else 1/attr(BIC, "Vinv") # df <- (2*Sumry$loglik - Sumry$bic)/log(Sumry$n) df <- if(is.null(Sumry$modelName)) NULL else with(Sumry, nMclustParams(modelName, d, G, noise = (!is.na(hypvol)), equalPro = attr(Sumry, "control")$equalPro)) ans <- c(list(call = call, data = data, BIC = BIC, df = df), Sumry) orderedNames <- c("call", "data", "modelName", "n", "d", "G", "BIC", "loglik", "df", "bic", "icl", "hypvol", "parameters", "z", "classification", "uncertainty") structure(ans[orderedNames], class = "Mclust") } print.Mclust <- function(x, digits = getOption("digits"), ...) { txt <- paste0("\'", class(x)[1], "\' model object: ") noise <- !is.null(attr(x$BIC, "Vinv")) if(x$G == 0 & noise) { txt <- paste0(txt, "single noise component") } else { txt <- paste0(txt, "(", x$model, ",", x$G, ")", if(noise) " + noise component") } catwrap(txt) cat("\n") catwrap("\nAvailable components:\n") print(names(x)) invisible(x) } summary.Mclust <- function(object, classification = TRUE, parameters = FALSE, ...) { classification <- as.logical(classification) parameters <- as.logical(parameters) # collect info G <- object$G noise <- if(is.na(object$hypvol)) FALSE else object$hypvol pro <- object$parameters$pro if(is.null(pro)) pro <- 1 names(pro) <- if(noise) c(seq_len(G),0) else seq(G) mean <- object$parameters$mean if(object$d > 1) { sigma <- object$parameters$variance$sigma } else { sigma <- rep(object$parameters$variance$sigmasq, object$G)[1:object$G] names(sigma) <- names(mean) } if(is.null(object$density)) { title <- paste("Gaussian finite mixture model fitted by EM algorithm") printClassification <- classification classification <- if(printClassification) { factor(object$classification, levels = { l <- seq_len(object$G) if(is.numeric(noise)) l <- c(l,0) l }) } else NULL } else { title <- paste("Density estimation via Gaussian finite mixture modeling") printClassification <- FALSE classification <- NULL } # obj <- list(title = title, n = object$n, d = object$d, G = G, modelName = object$modelName, loglik = object$loglik, df = object$df, bic = object$bic, icl = object$icl, pro = pro, mean = mean, variance = sigma, noise = noise, prior = attr(object$BIC, "prior"), printParameters = parameters, printClassification = printClassification, classification = classification) class(obj) <- "summary.Mclust" return(obj) } print.summary.Mclust <- function(x, digits = getOption("digits"), ...) { txt <- paste(rep("-", min(nchar(x$title), getOption("width"))), collapse = "") catwrap(txt) catwrap(x$title) catwrap(txt) # cat("\n") if(x$G == 0) { catwrap("Mclust model with only a noise component:") } else { catwrap(paste0("Mclust ", x$modelName, " (", mclustModelNames(x$modelName)$type, ") model with ", x$G, ifelse(x$G > 1, " components", " component"), if(x$noise) " and a noise term", ":")) } cat("\n") # if(!is.null(x$prior)) { catwrap(paste0("Prior: ", x$prior$functionName, "(", paste(names(x$prior[-1]), x$prior[-1], sep = " = ", collapse = ", "), ")", sep = "")) cat("\n") } # tab <- data.frame("log-likelihood" = x$loglik, "n" = x$n, "df" = x$df, "BIC" = x$bic, "ICL" = x$icl, row.names = "", check.names = FALSE) print(tab, digits = digits) # if(x$printClassification) { cat("\nClustering table:") print(table(x$classification), digits = digits) } # if(x$printParameters) { cat("\nMixing probabilities:\n") print(x$pro, digits = digits) cat("\nMeans:\n") print(x$mean, digits = digits) cat("\nVariances:\n") if(x$d > 1) { for(g in 1:x$G) { cat("[,,", g, "]\n", sep = "") print(x$variance[,,g], digits = digits) } } else print(x$variance, digits = digits) if(x$noise) { cat("\nHypervolume of noise component:\n") cat(signif(x$noise, digits = digits), "\n") } } # invisible(x) } plot.Mclust <- function(x, what = c("BIC", "classification", "uncertainty", "density"), dimens = NULL, xlab = NULL, ylab = NULL, addEllipses = TRUE, main = FALSE, ...) { object <- x # Argh. Really want to use object anyway if(!inherits(object, "Mclust")) stop("object not of class 'Mclust'") data <- object$data p <- ncol(data) if(p == 1) colnames(data) <- deparse(object$call$data) dimens <- if(is.null(dimens)) seq(p) else dimens[dimens <= p] d <- length(dimens) main <- if(is.null(main) || is.character(main)) FALSE else as.logical(main) what <- match.arg(what, several.ok = TRUE) oldpar <- par(no.readonly = TRUE) plot.Mclust.bic <- function(...) plot.mclustBIC(object$BIC, xlab = xlab, ...) plot.Mclust.classification <- function(...) { if(d == 1) { mclust1Dplot(data = data[,dimens,drop=FALSE], what = "classification", classification = object$classification, z = object$z, xlab = if(is.null(xlab)) colnames(data)[dimens] else xlab, main = main, ...) } if(d == 2) { pars <- object$parameters pars$mean <- pars$mean[dimens,,drop=FALSE] pars$variance$d <- length(dimens) pars$variance$sigma <- pars$variance$sigma[dimens,dimens,,drop=FALSE] mclust2Dplot(data = data[,dimens,drop=FALSE], what = "classification", classification = object$classification, parameters = if(addEllipses) pars else NULL, xlab = if(is.null(xlab)) colnames(data)[dimens][1] else xlab, ylab = if(is.null(ylab)) colnames(data)[dimens][2] else ylab, main = main, ...) } if(d > 2) { pars <- object$parameters pars$mean <- pars$mean[dimens,,drop=FALSE] pars$variance$d <- length(dimens) pars$variance$sigma <- pars$variance$sigma[dimens,dimens,,drop=FALSE] on.exit(par(oldpar)) par(mfrow = c(d, d), mar = rep(0.2/2,4), oma = rep(3,4)) for(i in seq(d)) { for(j in seq(d)) { if(i == j) { plot(data[, dimens[c(j, i)]], type = "n", xlab = "", ylab = "", axes = FALSE) text(mean(par("usr")[1:2]), mean(par("usr")[3:4]), labels = colnames(data[, dimens])[i], cex = 1.5, adj = 0.5) box() } else { coordProj(data = data, dimens = dimens[c(j,i)], what = "classification", classification = object$classification, parameters = object$parameters, addEllipses = addEllipses, main = FALSE, xaxt = "n", yaxt = "n", ...) } if(i == 1 && (!(j%%2))) axis(3) if(i == d && (j%%2)) axis(1) if(j == 1 && (!(i%%2))) axis(2) if(j == d && (i%%2)) axis(4) } } } } plot.Mclust.uncertainty <- function(...) { pars <- object$parameters if(d > 1) { pars$mean <- pars$mean[dimens,,drop=FALSE] pars$variance$d <- length(dimens) pars$variance$sigma <- pars$variance$sigma[dimens,dimens,,drop=FALSE] } # if(p == 1 || d == 1) { mclust1Dplot(data = data[,dimens,drop=FALSE], what = "uncertainty", parameters = pars, z = object$z, xlab = if(is.null(xlab)) colnames(data)[dimens] else xlab, main = main, ...) } if(p == 2 || d == 2) { mclust2Dplot(data = data[,dimens,drop=FALSE], what = "uncertainty", parameters = pars, # uncertainty = object$uncertainty, z = object$z, classification = object$classification, xlab = if(is.null(xlab)) colnames(data)[dimens][1] else xlab, ylab = if(is.null(ylab)) colnames(data)[dimens][2] else ylab, addEllipses = addEllipses, main = main, ...) } if(p > 2 && d > 2) { on.exit(par(oldpar)) par(mfrow = c(d, d), mar = rep(0,4), mar = rep(0.2/2,4), oma = rep(3,4)) for(i in seq(d)) { for(j in seq(d)) { if(i == j) { plot(data[, dimens[c(j, i)]], type="n", xlab = "", ylab = "", axes = FALSE) text(mean(par("usr")[1:2]), mean(par("usr")[3:4]), labels = colnames(data[,dimens])[i], cex = 1.5, adj = 0.5) box() } else { coordProj(data = data, what = "uncertainty", parameters = object$parameters, # uncertainty = object$uncertainty, z = object$z, classification = object$classification, dimens = dimens[c(j,i)], main = FALSE, addEllipses = addEllipses, xaxt = "n", yaxt = "n", ...) } if(i == 1 && (!(j%%2))) axis(3) if(i == d && (j%%2)) axis(1) if(j == 1 && (!(i%%2))) axis(2) if(j == d && (i%%2)) axis(4) } } } } plot.Mclust.density <- function(...) { if(p == 1) { objdens <- as.densityMclust(object) plotDensityMclust1(objdens, xlab = if(is.null(xlab)) colnames(data)[dimens] else xlab, main = if(main) main else NULL, ...) # mclust1Dplot(data = data, # parameters = object$parameters, # # z = object$z, # what = "density", # xlab = if(is.null(xlab)) colnames(data)[dimens] else xlab, # main = main, ...) } if(p == 2) { surfacePlot(data = data, parameters = object$parameters, what = "density", xlab = if(is.null(xlab)) colnames(data)[1] else xlab, ylab = if(is.null(ylab)) colnames(data)[2] else ylab, main = main, ...) } if(p > 2) { objdens <- as.densityMclust(object) objdens$data <- objdens$data[,dimens,drop=FALSE] objdens$varname <- colnames(data)[dimens] objdens$range <- apply(data, 2, range) objdens$d <- d objdens$parameters$mean <- objdens$parameters$mean[dimens,,drop=FALSE] objdens$parameters$variance$d <- d objdens$parameters$variance$sigma <- objdens$parameters$variance$sigma[dimens,dimens,,drop=FALSE] # if (d == 1) plotDensityMclust1(objdens, ...) else if (d == 2) plotDensityMclust2(objdens, ...) else plotDensityMclustd(objdens, ...) } } if(interactive() & length(what) > 1) { title <- "Model-based clustering plots:" # present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) while(choice != 0) { if(what[choice] == "BIC") plot.Mclust.bic(...) if(what[choice] == "classification") plot.Mclust.classification(...) if(what[choice] == "uncertainty") plot.Mclust.uncertainty(...) if(what[choice] == "density") plot.Mclust.density(...) # re-present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) } } else { if(any(what == "BIC")) plot.Mclust.bic(...) if(any(what == "classification")) plot.Mclust.classification(...) if(any(what == "uncertainty")) plot.Mclust.uncertainty(...) if(any(what == "density")) plot.Mclust.density(...) } invisible() } logLik.Mclust <- function(object, ...) { if(is.null(object$loglik)) l <- sum(do.call("dens", c(object, logarithm = TRUE))) else l <- object$loglik if(is.null(object$df)) { noise <- if(is.null(object$hypvol)) FALSE else (!is.na(object$hypvol)) equalPro <- if(is.null(object$BIC)) FALSE else attr(object$BIC, "control")$equalPro df <- with(object, nMclustParams(modelName, d, G, noise = noise, equalPro = equalPro)) } else df <- object$df attr(l, "nobs") <- object$n attr(l, "df") <- df class(l) <- "logLik" return(l) } predict.Mclust <- function(object, newdata, ...) { if(!inherits(object, "Mclust")) stop("object not of class 'Mclust'") if(missing(newdata)) { newdata <- object$data } newdata <- as.matrix(newdata) if(ncol(object$data) != ncol(newdata)) { stop("newdata must match ncol of object data") } # object$data <- newdata z <- do.call("cdens", c(object, list(logarithm = TRUE))) pro <- object$parameters$pro pro <- pro/sum(pro) noise <- (!is.na(object$hypvol)) z <- if(noise) cbind(z, log(object$parameters$Vinv)) else cbind(z) # drop redundant attributes z <- sweep(z, MARGIN = 2, FUN = "+", STATS = log(pro)) z <- sweep(z, MARGIN = 1, FUN = "-", STATS = apply(z, 1, logsumexp)) z <- exp(z) cl <- c(seq(object$G), if(noise) 0) colnames(z) <- cl cl <- cl[apply(z, 1, which.max)] out <- list(classification = cl, z = z) return(out) } mclustBIC <- function(data, G = NULL, modelNames = NULL, prior = NULL, control = emControl(), initialization = list(hcPairs = NULL, subset = NULL, noise = NULL), Vinv = NULL, warn = mclust.options("warn"), x = NULL, verbose = interactive(), ...) { dimData <- dim(data) oneD <- (is.null(dimData) || length(dimData[dimData > 1]) == 1) if(!oneD && length(dimData) != 2) stop("data must be a vector or a matrix") if(oneD) { data <- drop(as.matrix(data)) n <- length(data) d <- 1 } else { data <- as.matrix(data) n <- nrow(data) d <- ncol(data) } if(is.null(x)) { if(is.null(modelNames)) { if(d == 1) { modelNames <- c("E", "V") } else { modelNames <- mclust.options("emModelNames") if(n <= d) { # select only spherical and diagonal models m <- match(modelNames, c("EII", "VII", "EEI", "VEI", "EVI", "VVI"), nomatch = 0) modelNames <- modelNames[m] } } } if(!is.null(prior)) { # remove models not available with prior modelNames <- setdiff(modelNames, c("EVE","VEE","VVE","EVV")) } if(is.null(G)) { G <- if (is.null(initialization$noise)) 1:9 else 0:9 } else { G <- sort(as.integer(unique(G))) } if(is.null(initialization$noise)) { if (any(G > n)) G <- G[G <= n] } else { noise <- initialization$noise if(is.logical(noise)) noise <- which(noise) if(any(match(noise, 1:n, nomatch = 0) == 0)) stop("numeric or logical vector for noise must correspond to row indexes of data") initialization$noise <- noise nnoise <- length(noise) if(any(G > (n-nnoise))) G <- G[G <= n-nnoise] } if(!is.null(initialization$subset)) { subset <- initialization$subset if(is.logical(subset)) subset <- which(subset) initialization$subset <- subset if(any(G > n)) G <- G[G <= n] } Gall <- G Mall <- modelNames } else { if(!missing(prior) || !missing(control) || !missing(initialization) || !missing(Vinv)) stop("only G and modelNames may be specified as arguments when x is supplied") prior <- attr(x,"prior") control <- attr(x,"control") initialization <- attr(x,"initialization") Vinv <- attr(x,"Vinv") warn <- attr(x,"warn") Glabels <- dimnames(x)[[1]] Mlabels <- dimnames(x)[[2]] if(is.null(G)) G <- Glabels if(is.null(modelNames)) modelNames <- Mlabels Gmatch <- match(as.character(G), Glabels, nomatch = 0) Mmatch <- match(modelNames, Mlabels, nomatch = 0) if(all(Gmatch) && all(Mmatch)) { out <- x[as.character(G),modelNames,drop=FALSE] mostattributes(out) <- attributes(x) attr(out, "dim") <- c(length(G), length(modelNames)) attr(out, "dimnames") <- list(G, modelNames) attr(out, "G") <- as.numeric(G) attr(out, "modelNames") <- modelNames attr(out, "returnCodes") <- attr(x, "returnCodes")[as.character(G),modelNames,drop=FALSE] return(out) } Gall <- sort(as.numeric(unique(c(as.character(G), Glabels)))) Mall <- unique(c(modelNames, Mlabels)) } if(any(as.logical(as.numeric(G))) < 0) { if(is.null(initialization$noise)) { stop("G must be positive") } else { stop("G must be nonnegative") } } if(d == 1 && any(nchar(modelNames) > 1)) { Emodel <- any(sapply(modelNames, function(x) charmatch("E", x, nomatch = 0)[1]) == 1) Vmodel <- any(sapply(modelNames, function(x) charmatch("V", x, nomatch = 0)[1]) == 1) modelNames <- c("E", "V")[c(Emodel, Vmodel)] } # set subset for initialization when subset is not, no hcPairs is provided, and # data size is larger than the value specified in mclust.options() if(is.null(initialization$subset) & is.null(initialization$hcPairs) & n > mclust.options("subset")) { initialization$subset <- sample(seq.int(n), size = mclust.options("subset"), replace = FALSE) } l <- length(Gall) m <- length(Mall) if(verbose) { cat("fitting ...\n") flush.console() pbar <- txtProgressBar(min = 0, max = l*m+1, style = 3) on.exit(close(pbar)) ipbar <- 0 } EMPTY <- -.Machine$double.xmax BIC <- RET <- matrix(EMPTY, nrow = l, ncol = m, dimnames = list(as.character(Gall), as.character(Mall))) if(!is.null(x)) { BIC[dimnames(x)[[1]],dimnames(x)[[2]]] <- x RET[dimnames(x)[[1]],dimnames(x)[[2]]] <- attr(x, "returnCodes") BIC <- BIC[as.character(G),modelNames,drop=FALSE] RET <- RET[as.character(G),modelNames,drop=FALSE] } G <- as.numeric(G) Glabels <- as.character(G) Gout <- G if(is.null(initialization$noise)) { ## standard case ---- if (G[1] == 1) { for(mdl in modelNames[BIC["1",] == EMPTY]) { out <- mvn(modelName = mdl, data = data, prior = prior) BIC["1", mdl] <- bic(modelName = mdl, loglik = out$loglik, n = n, d = d, G = 1, equalPro = FALSE) RET["1", mdl] <- attr(out, "returnCode") if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } } if (l == 1) { BIC[BIC == EMPTY] <- NA if(verbose) { ipbar <- l*m+1; setTxtProgressBar(pbar, ipbar) } return(structure(BIC, G = G, modelNames = modelNames, prior = prior, control = control, initialization = initialization, warn = warn, n = n, d = d, oneD = oneD, returnCodes = RET, class = "mclustBIC")) } G <- G[-1] Glabels <- Glabels[-1] } if (is.null(initialization$subset)) { ## all data in initial hierarchical clustering phase (no subset) ---- if (is.null(initialization$hcPairs)) { if (d != 1) { if (n > d) { hcPairs <- hc(data = data, modelName = mclust.options("hcModelName"), use = mclust.options("hcUse")) } else { hcPairs <- hc(data = data, modelName = "EII", use = mclust.options("hcUse")) } } else { hcPairs <- NULL # hcPairs <- hc(data = data, modelName = "E") } } else hcPairs <- initialization$hcPairs if (d > 1 || !is.null(hcPairs)) clss <- hclass(hcPairs, G) for (g in Glabels) { if (d > 1 || !is.null(hcPairs)) { cl <- clss[,g] } else { cl <- qclass(data, as.numeric(g)) } if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } z <- unmap(cl, groups = 1:max(cl)) if(any(apply( z, 2, max) == 0) & warn) { # missing groups if(warn) warning("there are missing groups") small <- sqrt(.Machine$double.neg.eps) z[z < small] <- small z <- t(apply( z, 1, function(x) x/sum(x))) } for(modelName in na.omit(modelNames[BIC[g,] == EMPTY])) { out <- me(data = data, modelName = modelName, z = z, prior = prior, control = control, warn = warn) BIC[g, modelName] <- bic(modelName = modelName, loglik = out$loglik, n = n, d = d, G = as.numeric(g), equalPro = control$equalPro) RET[g, modelName] <- attr(out, "returnCode") if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } } } } else { ## initial hierarchical clustering phase on a subset ---- subset <- initialization$subset if (is.null(initialization$hcPairs)) { if (d != 1) { if (n > d) { hcPairs <- hc(data = data[subset,], modelName = mclust.options("hcModelName"), use = mclust.options("hcUse")) } else { hcPairs <- hc(data = data[subset,], modelName = "EII", use = mclust.options("hcUse")) } } else { hcPairs <- NULL # hcPairs <- hc(modelName = "E", data = data[subset]) } } else hcPairs <- initialization$hcPairs if (d > 1 || !is.null(hcPairs)) clss <- hclass(hcPairs, G) for (g in Glabels) { if (d > 1 || !is.null(hcPairs)) { cl <- clss[, g] } else { cl <- qclass(data[subset], as.numeric(g)) } if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } z <- unmap(cl, groups = 1:max(cl)) if(any(apply( z, 2, max) == 0) & warn) { # missing groups if(warn) warning("there are missing groups") small <- sqrt(.Machine$double.neg.eps) z[z < small] <- small z <- t(apply( z, 1, function(x) x/sum(x))) } for (modelName in modelNames[!is.na(BIC[g,])]) { ms <- mstep(data = as.matrix(data)[initialization$subset,], modelName = modelName, z = z, prior = prior, control = control, warn = warn) # # ctrl <- control # ctrl$itmax[1] <- 1 # ms <- me( data = as.matrix(data)[initialization$subset, ], # modelName = modelName, z = z, prior = prior, control = ctrl) # es <- do.call("estep", c(list(data = data, warn = warn), ms)) out <- me(data = data, modelName = modelName, z = es$z, prior = prior, control = control, warn = warn) BIC[g, modelName] <- bic(modelName = modelName, loglik = out$loglik, n = n, d = d, G = as.numeric(g), equalPro = control$equalPro) RET[g, modelName] <- attr(out, "returnCode") if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } } } } } else { ## noise case ---- noise <- initialization$noise if (is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) if (is.null(initialization$subset)) { ## all data in initial hierarchical clustering phase (no subset) ---- if(nnoise == n) stop("All observations cannot be initialised as noise!") if (!G[1]) { hood <- n * log(Vinv) BIC["0",] <- 2 * hood - log(n) if (l == 1) { return(structure(BIC, G = G, modelNames = modelNames, prior = prior, control = control, initialization = list(hcPairs = hcPairs, noise = initialization$noise), warn = warn, n = n, d = d, oneD = oneD, returnCodes = RET, class = "mclustBIC")) } G <- G[-1] Glabels <- Glabels[-1] } if (is.null(initialization$hcPairs)) { if (d != 1) { if (n > d) { hcPairs <- hc(data = data[-noise,], modelName = mclust.options("hcModelName"), use = mclust.options("hcUse")) } else { hcPairs <- hc(data = data[-noise,], modelName = "EII", use = mclust.options("hcUse")) } } else { hcPairs <- NULL # hcPairs <- hc(modelName = "E", data = data[-noise]) } } else hcPairs <- initialization$hcPairs if (d > 1 || !is.null(hcPairs)) clss <- hclass(hcPairs, G) if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } z <- matrix(0, n, max(G) + 1) for (g in Glabels) { z[] <- 0 k <- as.numeric(g) if(d > 1 || !is.null(hcPairs)) { cl <- clss[,g] } else { cl <- qclass(data[-noise], k = k) } z[-noise,1:k] <- unmap(cl, groups = 1:max(cl)) if(any(apply(z[-noise,1:k,drop=FALSE], 2, max) == 0) & warn) { # missing groups if(warn) warning("there are missing groups") # todo: should be pmax(...) qui sotto?? z[-noise,1:k] <- max(z[-noise,1:k], sqrt(.Machine$double.neg.eps)) # todo: should be t(...) qui sotto?? z[-noise,1:k] <- apply(z[-noise,1:k,drop=FALSE], 1, function(z) z/sum(z)) } z[noise, k+1] <- 1 K <- 1:(k+1) for (modelName in na.omit(modelNames[BIC[g,] == EMPTY])) { out <- me(data = data, modelName = modelName, z = z[, K], prior = prior, Vinv = Vinv, control = control, warn = warn) BIC[g, modelName] <- bic(modelName = modelName, loglik = out$loglik, n = n, d = d, G = k, noise = TRUE, equalPro = control$equalPro) RET[g, modelName] <- attr(out, "returnCode") if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } } } } else { ## initial hierarchical clustering phase on a subset ---- subset <- initialization$subset subset <- setdiff(subset, noise) # remove from subset noise obs initialization$subset <- subset if(length(subset) == 0) stop("No observations in the initial subset after removing the noise!") if (!G[1]) { hood <- n * log(Vinv) BIC["0",] <- 2 * hood - log(n) if (l == 1) { return(structure(BIC, G = G, modelNames = modelNames, prior = prior, control = control, initialization = list(hcPairs = hcPairs, subset = initialization$subset), warn = warn, n = n, d = d, oneD = oneD, returnCodes = RET, class = "mclustBIC")) } G <- G[-1] Glabels <- Glabels[-1] } if (is.null(initialization$hcPairs)) { if (d != 1) { if (n > d) { hcPairs <- hc(data = data[subset,], modelName = mclust.options("hcModelName"), use = mclust.options("hcUse")) } else { hcPairs <- hc(data = data[subset,], modelName = "EII", use = mclust.options("hcUse")) } } else { hcPairs <- NULL # hcPairs <- hc(modelName = "E", data = data[subset]) } } else hcPairs <- initialization$hcPairs if (d > 1 || !is.null(hcPairs)) clss <- hclass(hcPairs, G) if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } for (g in Glabels) { k <- as.numeric(g) if (d > 1 || !is.null(hcPairs)) { cl <- clss[, g] } else { cl <- qclass(data[subset], k = k) } z <- unmap(cl, groups = 1:max(cl)) if(any(apply(z, 2, max) == 0) & warn) { # missing groups if(warn) warning("there are missing groups") small <- sqrt(.Machine$double.neg.eps) z[z < small] <- small z <- t(apply( z, 1, function(x) x/sum(x))) } for (modelName in na.omit(modelNames[BIC[g,] == EMPTY])) { ms <- mstep(data = as.matrix(data)[subset,], modelName = modelName, z = z, prior = prior, control = control, warn = warn) es <- do.call("estep", c(list(data = data, warn = warn), ms)) if(is.na(es$loglik)) { BIC[g, modelName] <- NA RET[g, modelName] <- attr(es, "returnCode") } else { es$z <- cbind(es$z, 0) es$z[noise,] <- matrix(c(rep(0,k),1), byrow = TRUE, nrow = length(noise), ncol = k+1) out <- me(data = data, modelName = modelName, z = es$z, prior = prior, Vinv = Vinv, control = control, warn = warn) BIC[g, modelName] <- bic(modelName = modelName, loglik = out$loglik, n = n, d = d, G = k, noise = TRUE, equalPro = control$equalPro) RET[g, modelName] <- attr(out, "returnCode") } if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } } } } } if(verbose) { ipbar <- l*m+1; setTxtProgressBar(pbar, ipbar) } if(!is.null(prior) & any(is.na(BIC))) warning("The presence of BIC values equal to NA is likely due to one or more of the mixture proportions being estimated as zero, so that the model estimated reduces to one with a smaller number of components.") structure(BIC, G = Gout, modelNames = modelNames, prior = prior, Vinv = Vinv, control = control, initialization = list(hcPairs = hcPairs, subset = initialization$subset, noise = initialization$noise), warn = warn, n = n, d = d, oneD = oneD, criterion = "BIC", returnCodes = RET, class = "mclustBIC") } print.mclustBIC <- function(x, pick = 3, ...) { subset <- !is.null(attr(x, "subset")) oldClass(x) <- attr(x, "args") <- NULL attr(x, "criterion") <- NULL attr(x, "control") <- attr(x, "initialization") <- NULL attr(x, "oneD") <- attr(x, "warn") <- attr(x, "Vinv") <- NULL attr(x, "prior") <- attr(x, "G") <- attr(x, "modelNames") <- NULL ret <- attr(x, "returnCodes") == -3 n <- attr(x, "n") d <- attr(x, "d") attr(x, "returnCodes") <- attr(x, "n") <- attr(x, "d") <- NULL catwrap("Bayesian Information Criterion (BIC):") NextMethod("print") cat("\n") catwrap(paste("Top", pick, "models based on the BIC criterion:")) print(pickBIC(x, pick), ...) invisible() } summary.mclustBIC <- function(object, data, G, modelNames, ...) { mc <- match.call(expand.dots = FALSE) if(missing(data)) { if(!missing(G)) object <- object[rownames(object) %in% G,,drop=FALSE] if(!missing(modelNames)) object <- object[,colnames(object) %in% modelNames,drop=FALSE] ans <- pickBIC(object, ...) class(ans) <- "summary.mclustBIC" } else { if(is.null(attr(object,"initialization")$noise)) { mc[[1]] <- as.name("summaryMclustBIC") } else { mc[[1]] <- as.name("summaryMclustBICn") } warn <- attr(object, "warn") ans <- eval(mc, parent.frame()) if(length(ans) == 0) return(ans) Glabels <- dimnames(object)[[1]] if(length(Glabels) != 1 && (!missing(G) && length(G) > 1)) { Grange <- range(as.numeric(Glabels)) if(match(ans$G, Grange, nomatch = 0) & warn) warning("best model occurs at the min or max of number of components considered!") } } ans } summaryMclustBIC <- function (object, data, G = NULL, modelNames = NULL, ...) { dimData <- dim(data) oneD <- (is.null(dimData) || length(dimData[dimData > 1]) == 1) if (!oneD && length(dimData) != 2) stop("data must be a vector or a matrix") if (oneD) { data <- drop(as.matrix(data)) n <- length(data) d <- 1 } else { data <- as.matrix(data) n <- nrow(data) d <- ncol(data) } initialization <- attr(object, "initialization") hcPairs <- initialization$hcPairs subset <- initialization$subset prior <- attr(object, "prior") control <- attr(object, "control") warn <- attr(object, "warn") oldClass(object) <- NULL attr(object, "prior") <- attr(object, "warn") <- NULL attr(object, "modelNames") <- attr(object, "oneD") <- NULL attr(object, "initialization") <- attr(object, "control") <- NULL d <- if (is.null(dim(data))) 1 else ncol(data) if(is.null(G)) G <- dimnames(object)[[1]] if(is.null(modelNames)) modelNames <- dimnames(object)[[2]] bestBICs <- pickBIC(object[as.character(G), modelNames, drop = FALSE], k = 3) if(all(is.na(bestBICs))) { return(structure(list(), bestBICvalues = bestBICs, prior = prior, control = control, initialization = initialization, class = "summary.mclustBIC")) } temp <- unlist(strsplit(names(bestBICs)[1], ",")) bestModel <- temp[1] G <- as.numeric(temp[2]) if(G == 1) { out <- mvn(modelName = bestModel, data = data, prior = prior) ans <- c(list(bic = bestBICs, z = unmap(rep(1,n)), classification = rep(1, n), uncertainty = rep(0, n)), out) orderedNames <- c("modelName", "n", "d", "G", "bic", "loglik", "parameters", "z", "classification", "uncertainty") return(structure(ans[orderedNames], bestBICvalues = bestBICs, prior = prior, control = control, initialization = initialization, class = "summary.mclustBIC")) } if(is.null(subset)) { if(d > 1 || !is.null(hcPairs)) { z <- unmap(hclass(hcPairs, G)) } else { z <- unmap(qclass(data, G), groups = 1:G) } out <- me(data = data, modelName = bestModel, z = z, prior = prior, control = control, warn = warn) if(sum((out$parameters$pro - colMeans(out$z))^2) > sqrt(.Machine$double.eps)) { # perform extra M-step and update parameters ms <- mstep(data = data, modelName = bestModel, z = out$z, prior = prior, warn = warn) if(attr(ms, "returnCode") == 0) out$parameters <- ms$parameters } } else { if(d > 1 || !is.null(hcPairs)) { z <- unmap(hclass(hcPairs, G)) } else { z <- unmap(qclass(data[subset], G)) } ms <- mstep(data = as.matrix(data)[subset,], modelName = bestModel, prior = prior, z = z, control = control, warn = warn) es <- do.call("estep", c(list(data = data), ms)) out <- me(data = data, modelName = bestModel, z = es$z, prior = prior, control = control, warn = warn) # perform extra M-step and update parameters ms <- mstep(data = data, modelName = bestModel, z = out$z, prior = prior, warn = warn) if(attr(ms, "returnCode") == 0) out$parameters <- ms$parameters } obsNames <- if (is.null(dim(data))) names(data) else dimnames(data)[[1]] classification <- map(out$z, warn = warn) uncertainty <- 1 - apply(out$z, 1, max) names(classification) <- names(uncertainty) <- obsNames ans <- c(list(bic = bic(bestModel, out$loglik, out$n, out$d, out$G, noise = FALSE, equalPro = control$equalPro), # bic = as.vector(bestBICs[1]), classification = classification, uncertainty = uncertainty), out) orderedNames <- c("modelName", "n", "d", "G", "bic", "loglik", "parameters", "z", "classification", "uncertainty") structure(ans[orderedNames], bestBICvalues = bestBICs, prior = prior, control = control, initialization = initialization, class = "summary.mclustBIC") } summaryMclustBICn <- function(object, data, G = NULL, modelNames = NULL, ...) { dimData <- dim(data) oneD <- is.null(dimData) || length(dimData[dimData > 1]) == 1 if(!oneD && length(dimData) != 2) stop("data must be a vector or a matrix") if(oneD) { data <- drop(as.matrix(data)) n <- length(data) d <- 1 } else { data <- as.matrix(data) n <- nrow(data) d <- ncol(data) } initialization <- attr(object, "initialization") hcPairs <- initialization$hcPairs subset <- initialization$subset noise <- initialization$noise if(is.logical(noise)) noise <- which(noise) prior <- attr(object, "prior") control <- attr(object, "control") warn <- attr(object, "warn") Vinv <- attr(object, "Vinv") oldClass(object) <- NULL attr(object, "control") <- attr(object, "initialization") <- NULL attr(object, "prior") <- attr(object, "Vinv") <- NULL attr(object, "warn") <- NULL ## if (is.null(G)) G <- dimnames(object)[[1]] if (is.null(modelNames)) modelNames <- dimnames(object)[[2]] bestBICs <- pickBIC(object[as.character(G), modelNames, drop = FALSE], k = 3) if(all(is.na(bestBICs))) { return(structure(list(), bestBICvalues = bestBICs, prior = prior, control = control, initialization = initialization, class = "summary.mclustBIC")) } temp <- unlist(strsplit(names(bestBICs)[1], ",")) bestModel <- temp[1] G <- as.numeric(temp[2]) if(G == 0) { ans <- list(bic = bestBICs[1], z = unmap(rep(0,n)), classification = rep(0, n), uncertainty = rep(0, n), n = n, d = ncol(data), modelName = bestModel, G = 0, loglik = n * log(Vinv), Vinv = Vinv, parameters = NULL) orderedNames <- c("modelName", "n", "d", "G", "bic", "loglik", "Vinv", "parameters", "z", "classification", "uncertainty") return(structure(ans[orderedNames], bestBICvalues = bestBICs, prior = prior, control = control, initialization = initialization, class = "summary.mclustBIC")) } G1 <- G + 1 if(is.null(subset)) { z <- matrix(0, n, G1) if(d > 1 || !is.null(hcPairs)) { z[-noise, 1:G] <- unmap(hclass(hcPairs, G)) } else { z[-noise, 1:G] <- unmap(qclass(data[-noise], G)) } z[noise, G1] <- 1 out <- me(data = data, modelName = bestModel, z = z, prior = prior, Vinv = Vinv, control = control, warn = warn) } else { subset <- setdiff(subset, noise) # set subset among those obs not noise if(d > 1 || !is.null(hcPairs)) { z <- unmap(hclass(hcPairs, G)) } else { z <- unmap(qclass(data[subset], G)) } ms <- mstep(data = as.matrix(data)[subset,], modelName = bestModel, z = z, prior = prior, control = control, warn = warn) es <- do.call("estep", c(list(data = data, warn = warn), ms)) es$z <- cbind(es$z, 0) es$z[noise,] <- matrix(c(rep(0,G),1), byrow = TRUE, nrow = length(noise), ncol = G+1) out <- me(data = data, modelName = bestModel, z = es$z, prior = prior, Vinv = Vinv, control = control, warn = warn) } obsNames <- if(is.null(dim(data))) names(data) else dimnames(data)[[1]] classification <- map(out$z, warn = warn) classification[classification == G1] <- 0 uncertainty <- 1 - apply(out$z, 1, max) names(classification) <- names(uncertainty) <- obsNames ans <- c(list(bic = as.vector(bestBICs[1]), classification = classification, uncertainty = uncertainty, Vinv = Vinv), out) orderedNames <- c("modelName", "n", "d", "G", "bic", "loglik", "parameters", "Vinv", "z", "classification", "uncertainty") structure(ans[orderedNames], bestBICvalues = bestBICs, prior = prior, control = control, initialization = initialization, class = "summary.mclustBIC") } print.summary.mclustBIC <- function(x, digits = getOption("digits"), ...) { if("classification" %in% names(x)) { bic <- attr(x,"bestBICvalues") l <- length(bic) if(l == 1) { cat("BIC value:\n") print(bic, digits = digits) } else { cat("Best BIC values:\n") bic <- drop(as.matrix(bic)) bic <- rbind(BIC = bic, "BIC diff" = bic - max(bic)) print(bic, digits = digits) } cat("\n") catwrap(paste0("Classification table for model (", if(l == 1) names(bic)[1] else colnames(bic)[1], "):")) print(table(x$classification), digits = digits, ...) } else { cat("Best BIC values:\n") x <- if(length(x) == 0) attr(x,"bestBICvalues") else drop(as.matrix(x)) x <- rbind(BIC = x, "BIC diff" = x - max(x)) print(x, digits = digits) } invisible() } plot.mclustBIC <- function(x, G = NULL, modelNames = NULL, symbols = NULL, colors = NULL, xlab = NULL, ylab = "BIC", legendArgs = list(x = "bottomright", ncol = 2, cex = 1, inset = 0.01), ...) { args <- list(...) if(is.null(xlab)) xlab <- "Number of components" subset <- !is.null(attr(x, "initialization")$subset) noise <- !is.null(attr(x, "initialization")$noise) ret <- attr(x, "returnCodes") == -3 # legendArgsDefault <- list(x = "bottomright", ncol = 2, cex = 1, inset = 0.01) legendArgsDefault <- eval(formals(plot.mclustBIC)$legendArgs) legendArgs <- append(as.list(legendArgs), legendArgsDefault) legendArgs <- legendArgs[!duplicated(names(legendArgs))] n <- ncol(x) dnx <- dimnames(x) x <- matrix(as.vector(x), ncol = n) dimnames(x) <- dnx if(is.null(modelNames)) modelNames <- dimnames(x)[[2]] if(is.null(G)) G <- as.numeric(dimnames(x)[[1]]) # BIC <- x[as.character(G), modelNames, drop = FALSE] # X <- is.na(BIC) # nrowBIC <- nrow(BIC) # ncolBIC <- ncol(BIC) if(is.null(symbols)) { colNames <- dimnames(x)[[2]] m <- length(modelNames) if(is.null(colNames)) { symbols <- if(m > 9) LETTERS[1:m] else as.character(1:m) names(symbols) <- modelNames } else { symbols <- mclust.options("bicPlotSymbols")[modelNames] } } if(is.null(colors)) { colNames <- dimnames(x)[[2]] if(is.null(colNames)) { colors <- 1:m names(colors) <- modelNames } else { # colors <- mclust.options("bicPlotColors")[modelNames] colors <- mclust.options("bicPlotColors") if(!is.null(names(colors)) & !any(names(colors) == "")) colors <- colors[modelNames] } } x <- x[,modelNames, drop = FALSE] ylim <- if(is.null(args$ylim)) range(as.vector(x[!is.na(x)])) else args$ylim matplot(as.numeric(dnx[[1]]), x, type = "b", xaxt = "n", xlim = range(G), ylim = ylim, pch = symbols, col = colors, lty = 1, xlab = xlab, ylab = ylab, main = "") axis(side = 1, at = as.numeric(dnx[[1]])) if(!is.null(legendArgs)) { do.call("legend", c(list(legend = modelNames, col = colors, pch = symbols), legendArgs)) } invisible(symbols) } pickBIC <- function(x, k = 3, ...) { if(!is.matrix(x)) { warning("sorry, the pickBIC function cannot be applied to the provided argument!") return() } Glabels <- dimnames(x)[[1]] modelNames <- dimnames(x)[[2]] mis <- is.na(x) if(all(mis) & mclust.options("warn")) { warning("none of the selected models could be fitted") return(rep(NA,k)) } x[mis] <- - .Machine$double.xmax x <- data.frame(as.vector(x), Glabels[as.vector(row(x))], modelNames[as.vector(col(x))]) # x <- x[rev(order(x[,1])),] # order by including first simpler models if ties are present x <- x[order(-x[, 1], x[,2], x[,3]),] namesx <- apply(x[,-1,drop = FALSE], 1, function(z) paste(as.character(z[2]), as.character(z[1]), sep = ",")) k <- min(k, nrow(x)) x <- x[1:k,1] x[x == - .Machine$double.xmax] <- NA namesx <- namesx[1:k] namesx[is.na(x)] <- " " names(x) <- namesx x } mclustBICupdate <- function(BIC, ...) { args <- list(...) nargs <- length(args) BIC1 <- BIC if(length(args) > 1) { # recursively call the function when multiple arguments BIC2 <- mclustBICupdate(args[[1]], args[[-1]]) } else { BIC2 <- args[[1]] } if(is.null(BIC1)) return(BIC2) if(is.null(BIC2)) return(BIC1) stopifnot(inherits(BIC1, c("mclustBIC", "mclustSBIC", "mclustICL")) & inherits(BIC2, c("mclustBIC", "mclustSBIC", "mclustICL"))) stopifnot(all.equal(attributes(BIC1)[c("n", "d")], attributes(BIC2)[c("n", "d")])) G <- unique(c(rownames(BIC1), rownames(BIC2))) modelNames <- unique(c(colnames(BIC1), colnames(BIC2))) BIC <- matrix(as.double(NA), nrow = length(G), ncol = length(modelNames), dimnames = list(G, modelNames)) BIC[rownames(BIC1),colnames(BIC1)] <- BIC1[rownames(BIC1),colnames(BIC1)] BIC[rownames(BIC2),colnames(BIC2)] <- BIC2[rownames(BIC2),colnames(BIC2)] r <- intersect(rownames(BIC1), rownames(BIC2)) c <- intersect(colnames(BIC1), colnames(BIC2)) BIC[r,c] <- pmax(BIC1[r,c], BIC2[r,c], na.rm = TRUE) attr <- if(pickBIC(BIC2,1) > pickBIC(BIC1,1)) attributes(BIC2) else attributes(BIC1) attr$dim <- dim(BIC) attr$dimnames <- dimnames(BIC) attr$G <- as.numeric(G) attr$modelNames <- modelNames attr$returnCodes <- NULL attributes(BIC) <- attr return(BIC) } mclustLoglik <- function(object, ...) { stopifnot(inherits(object, "mclustBIC")) BIC <- object G <- as.numeric(rownames(BIC)) modelNames <- colnames(BIC) n <- attr(BIC, "n") d <- attr(BIC, "d") noise <- if(is.null(attr(BIC, "noise"))) FALSE else TRUE loglik <- matrix(as.double(NA), nrow = length(G), ncol = length(modelNames), dimnames = list(G, modelNames)) for(i in seq_along(G)) for(j in seq_along(modelNames)) { npar <- nMclustParams(G = G[i], modelName = modelNames[j], d = d, noise = noise) loglik[i,j] <- 0.5*(BIC[i,j] + npar*log(n)) } mostattributes(loglik) <- attributes(BIC) attr(loglik, "criterion") <- "loglik" class(loglik) <- "mclustLoglik" return(loglik) } print.mclustLoglik <- function(x, ...) { oldClass(x) <- attr(x, "args") <- NULL attr(x, "criterion") <- NULL attr(x, "control") <- attr(x, "initialization") <- NULL attr(x, "oneD") <- attr(x, "warn") <- attr(x, "Vinv") <- NULL attr(x, "prior") <- attr(x, "G") <- attr(x, "modelNames") <- NULL attr(x, "returnCodes") <- attr(x, "n") <- attr(x, "d") <- NULL catwrap("Log-likelihood:") NextMethod("print") invisible() } mclustModel <- function(data, BICvalues, G=NULL, modelNames=NULL, ...) { mc <- match.call(expand.dots = FALSE) if (is.null(attr(BICvalues,"initialization")$noise)) { mc[[1]] <- as.name("summaryMclustBIC") } else { mc[[1]] <- as.name("summaryMclustBICn") } nm <- names(mc) mc[1:3] <- mc[c(1,3,2)] nm[1:3] <- nm[c(1,3,2)] nm[nm == "BICvalues"] <- "object" names(mc) <- nm ans <- eval(mc, parent.frame()) ans$classification <- ans$uncertainty <- NULL attr( ans, "bestBICvalues") <- NULL attr( ans, "prior") <- NULL attr( ans, "control") <- NULL attr( ans, "initialization") <- NULL oldClass(ans) <- "mclustModel" ans } mclustModelNames <- function(model) { type <- switch(EXPR = as.character(model), "E" = "univariate, equal variance", "V" = "univariate, unequal variance", "EII" = "spherical, equal volume", "VII" = "spherical, varying volume", "EEI" = "diagonal, equal volume and shape", "VEI" = "diagonal, equal shape", "EVI" = "diagonal, equal volume, varying shape", "VVI" = "diagonal, varying volume and shape", "EEE" = "ellipsoidal, equal volume, shape and orientation", "EVE" = "ellipsoidal, equal volume and orientation", "VEE" = "ellipsoidal, equal shape and orientation", "VVE" = "ellipsoidal, equal orientation", "EEV" = "ellipsoidal, equal volume and shape", "VEV" = "ellipsoidal, equal shape", "EVV" = "ellipsoidal, equal volume", "VVV" = "ellipsoidal, varying volume, shape, and orientation", "X" = "univariate normal", "XII" = "spherical multivariate normal", "XXI" = "diagonal multivariate normal", "XXX" = "ellipsoidal multivariate normal", warning("invalid model")) return(list(model = model, type = type)) } defaultPrior <- function(data, G, modelName, ...) { aux <- list(...) if(is.null(aux$shrinkage)) { shrinkage <- 0.01 } else if(is.na(aux$shrinkage) || !aux$shrinkage) { shrinkage <- 0 } else if(aux$shrinkage < 0) { stop("negative value given for shrinkage") } else { shrinkage <- aux$shrinkage } if(is.null(aux$mean)) { mean <- if (is.null(dim(data))) mean(data) else colMeans(data) } else if(any(is.na(aux$mean))) { if(shrinkage) stop("positive shrinkage with no prior mean specified") mean <- if (is.null(dim(data))) mean(data) else colMeans(data) } else { if(!shrinkage) stop("prior mean specified but not shrinkage") mean <- aux$mean } switch(EXPR = modelName, E = , V = , X = { dof <- 3 if(is.null(aux$scale)) { scale <- var(data)/G^2 } else { scale <- aux$scale } list(shrinkage = shrinkage, mean = mean, dof = dof, scale = scale) }, ## EII = , VII = , XII = , EEI = , EVI = , VEI = , VVI = , XXI = { n <- nrow(data) p <- ncol(data) dof <- p + 2 if(is.null(aux$scale)) { fac <- (1/G)^(2/p) scale <- (fac * sum(apply(data, 2, var)))/ p } else { scale <- aux$scale } list(shrinkage = shrinkage, mean = mean, dof = dof, scale = scale) }, ## EEE = , EVE = , VEE = , VVE = , EEV = , VEV = , EVV = , VVV = , XXX = { n <- nrow(data) p <- ncol(data) dof <- p + 2 if(is.null(aux$scale)) { fac <- (1/G)^(2/p) if(n > p) { scale <- fac * var(data) } else { scale <- fac * diag(apply(data, 2, var)) } } else { scale <- aux$scale } list(shrinkage = shrinkage, mean = mean, dof = dof, scale = scale) }, stop("no default prior for this model")) } emControl <- function(eps = .Machine$double.eps, tol = c(1.0e-05, sqrt(.Machine$double.eps)), itmax = c(.Machine$integer.max, .Machine$integer.max), equalPro = FALSE) { if(any(eps < 0)) stop("eps is negative") if(any(eps >= 1)) stop("eps is not less than 1") if(any(tol < 0)) stop("tol is negative") if(any(tol >= 1)) stop("tol is not less than 1") if(any(itmax < 0)) stop("itmax is negative") if(length(tol) == 1) tol <- rep(tol, 2) if(length(itmax) == 1) itmax <- c(itmax, .Machine$integer.max) i <- is.infinite(itmax) if(any(i)) itmax[i] <- .Machine$integer.max list(eps = eps, tol = tol, itmax = itmax, equalPro = equalPro) } priorControl <- function(functionName = "defaultPrior", ...) { c(list(functionName = functionName), list(...)) } cdensEEE <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) > 2) stop("data must be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- as.matrix(parameters$mean) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "EEE", WARNING = WARNING, returnCode = 9)) } if(is.null(parameters$variance$cholSigma)) stop("variance parameters are missing") temp <- .Fortran("eseee", as.logical(1), as.double(data), as.double(mu), as.double(parameters$variance$cholSigma), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(p), double(1), double(n * G), PACKAGE = "mclust")[10:12] lapackCholInfo <- temp[[1]][1] loglik <- temp[[2]] z <- matrix(temp[[3]], n, G) WARNING <- NULL if(lapackCholInfo) { if(lapackCholInfo > 0) { WARNING <- "sigma is not positive definite" if(warn) warning(WARNING) } else { WARNING <- "input error for LAPACK DPOTRF" if(warn) warning(WARNING) } z[] <- NA ret <- -9 } else if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "EEE", WARNING = WARNING, returnCode = ret) } emEEE <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepEEE(data, parameters = parameters, warn = warn)$z meEEE(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepEEE <- function(data, parameters, warn = NULL, ...) { if (is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) > 2) stop("data must be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "EEE", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$cholSigma)) stop("variance parameters are missing") temp <- .Fortran("eseee", as.logical(1), as.double(data), as.double(mu), as.double(parameters$variance$cholSigma), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(p), double(1), double(n * K), PACKAGE = "mclust")[10:12] lapackCholInfo <- temp[[1]][1] loglik <- temp[[2]] z <- matrix(temp[[3]], n, K) WARNING <- NULL if(lapackCholInfo) { if(lapackCholInfo > 0) { WARNING <- "sigma is not positive definite" warning(WARNING) ret <- -4 } else { WARNING <- "input error for LAPACK DPOTRF" warning(WARNING) ret <- -5 } z[] <- loglik <- NA } else if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "EEE", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meEEE <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EEE", d = p, G = G, Sigma = matrix(as.double(NA), p, p), cholSigma = matrix(as.double(NA), p, p)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="EEE", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("meeee", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(p * p), double(K), double(p), PACKAGE = "mclust")[7:13] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EEE"), prior[names(prior) != "functionName"])) temp <- .Fortran("meeeep", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(if(any(priorParams$scale != 0)) chol(priorParams$ scale) else priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(p * p), double(K), double(p), PACKAGE = "mclust")[c(11:17, 10)] } z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] mu <- matrix(temp[[5]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) cholSigma <- matrix(temp[[6]], p, p) pro <- temp[[7]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) Sigma <- matrix( NA, p, p) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- z[] <- loglik <- logprior <- NA sigma <- array(NA, c(p, p, G)) Sigma <- matrix(as.double(NA), p, p) ret <- if(control$equalPro) -2 else -3 } else { Sigma <- unchol(cholSigma, upper = TRUE) sigma <- array(0, c(p, p, G)) for(k in 1:G) sigma[, , k] <- Sigma if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = err) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(Sigma) <- dimnames(cholSigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EEE", d = p, G = G, sigma = sigma, Sigma = Sigma, cholSigma = cholSigma) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "EEE", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepEEE <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) ## z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EEE", d = p, G = G, sigma <- array(NA, c(p,p, G)), Sigma = matrix(as.double(NA), p, p), cholSigma = matrix(as.double(NA), p, p)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="EEE", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if(is.null(prior)) { temp <- .Fortran("mseee", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), double(p), double(p * G), double(p * p), double(G), PACKAGE = "mclust")[7:9] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EEE"), prior[names(prior) != "functionName"])) temp <- .Fortran("mseeep", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(if(any(priorParams$scale != 0)) chol(priorParams$scale) else priorParams$scale), as.double(priorParams$dof), double(p), double(p * G), double(p * p), double(G), PACKAGE = "mclust")[11:13] } mu <- matrix(temp[[1]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) cholSigma <- matrix(temp[[2]], p, p) pro <- temp[[3]] sigma <- array(0, c(p, p, G)) Sigma <- unchol(cholSigma, upper = TRUE) for(k in 1:G) sigma[, , k] <- Sigma WARNING <- NULL if(any(mu > signif(.Machine$double.xmax, 6))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- sigma[] <- Sigma[] <- cholSigma[] <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(Sigma) <- dimnames(cholSigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EEE", d = p, G = G, sigma = sigma, Sigma = Sigma, cholSigma= cholSigma) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "EEE", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simEEE <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "EEE")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) if(is.null(cholSigma <- parameters$variance$cholSigma)) { if(is.null(Sigma <- parameters$variance$Sigma)) { stop("variance parameters must inlcude either Sigma or cholSigma" ) } cholSigma <- chol(Sigma) } for(k in 1:G) { m <- ctabel[k] x[clabels == k,] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% cholSigma, MARGIN = 2, STATS = mu[,k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "EEE") } cdensEEI <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- as.matrix(parameters$mean) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "EEI", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape)) stop("variance parameters are missing") temp <- .Fortran("eseei", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(1), double(n * G), PACKAGE = "mclust")[10:11] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "EEI", WARNING = WARNING, returnCode = ret) } cdensEII <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- as.matrix(parameters$mean) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "EII", WARNING = WARNING, returnCode = 9)) } sigmasq <- parameters$variance$sigmasq if(sigmasq < 0) stop("sigma-squared is negative") if(!sigmasq) { WARNING <- "sigma-squared vanishes" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "EII", WARNING = WARNING, returnCode = 9)) } temp <- .Fortran("eseii", as.double(data), as.double(mu), as.double(sigmasq), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(1), double(n * G), PACKAGE = "mclust")[9:10] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "EII", WARNING = WARNING, returnCode = ret) } emEEI <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepEEI(data, parameters = parameters, warn = warn)$z meEEI(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepEEI <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "EEI", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape)) stop("variance parameters are missing") temp <- .Fortran("eseei", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(1), double(n * K), PACKAGE = "mclust")[10:11] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "EEI", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meEEI <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) > 2) stop("data should be in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EEI", d = p, G = G, scale = NA, shape = rep(NA,p)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="EEI", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("meeei", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(1), double(p), double(K), PACKAGE = "mclust")[7:14] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EEI"), prior[names(prior) != "functionName"])) temp <- .Fortran("meeeip", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(1), double(p), double(K), PACKAGE = "mclust")[11:18] } z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] mu <- matrix(temp[[5]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp[[6]] shape <- temp[[7]] pro <- temp[[8]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) sigma <- array(NA, c(p, p, G)) Sigma <- matrix(as.double(NA), p, p) mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } sigma <- array(NA, c(p, p, G)) Sigma <- matrix(as.double(NA), p, p) mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA ret <- if(control$equalPro) -2 else -3 } else { sigma <- array(0, c(p, p, G)) Sigma <- diag(scale * shape) for(k in 1:G) sigma[, , k] <- Sigma if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = err) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(Sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EEI", d = p, G = G, sigma = sigma, Sigma = Sigma, scale = scale, shape = shape) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "EEI", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepEEI <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EEI", d = p, G = G, scale = NA, shape = rep(NA,p)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="EEI", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if(is.null(prior)) { temp <- .Fortran("mseei", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), double(p * G), double(1), double(p), double(G), PACKAGE = "mclust")[6:9] } else { storage.mode(z) <- "double" priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EEI"), prior[names( prior) != "functionName"])) temp <- .Fortran("mseeip", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(p * G), double(1), double(p), double(G), PACKAGE = "mclust")[10:13] } mu <- matrix(temp[[1]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp[[2]] shape <- temp[[3]] pro <- temp[[4]] WARNING <- NULL if(any(c(shape, scale) > signif(.Machine$double.xmax, 6)) || any(!c( scale, shape))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- scale <- shape[] <- NA sigma <- Sigma <- array(NA, c(p, p, G)) ret <- -1 } else { sigma <- array(0, c(p, p, G)) Sigma <- diag(scale * shape) for(k in 1:G) sigma[, , k] <- Sigma ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(Sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EEI", d = p, G = G, sigma = sigma, Sigma = Sigma, scale = scale, shape = shape) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "EEI", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simEEI <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "EEI")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) shape <- parameters$variance$shape if(length(shape) != d) stop("shape incompatible with mean") cholSigma <- diag(sqrt(parameters$variance$scale * shape)) for(k in 1:G) { m <- ctabel[k] x[clabels == k, ] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% cholSigma, MARGIN = 2, STATS = mu[, k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "EEI") } cdensE <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one-dimensional") data <- drop(data) n <- length(data) mu <- drop(parameters$mean) G <- length(mu) if(any(is.na(unlist(parameters[c("mean", "variance")]))) || any(is.null(parameters[c("mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(names(data), NULL) return(structure(z, logarithm = logarithm, modelName = "E", WARNING = WARNING, returnCode = 9)) } sigmasq <- parameters$variance$sigmasq if(is.null(sigmasq)) stop("variance parameters are missing") if(length(sigmasq) > 1) if(warn) warning("more than one sigma-squared given") if(sigmasq < 0) stop("sigma-squared is negative") if(!sigmasq) { WARNING <- "sigma-squared vanishes" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(names(data), NULL) return(structure(z, logarithm = logarithm, modelName = "E", WARNING = WARNING, returnCode = 9)) } temp <- .Fortran("es1e", as.double(data), as.double(mu), as.double(sigmasq), as.double(-1), as.integer(n), as.integer(G), as.double(-1), double(1), double(n * G), PACKAGE = "mclust")[8:9] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(names(data),NULL) structure(z, logarithm = logarithm, modelName = "E", WARNING = WARNING, returnCode = ret) } emE <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepE(data, parameters = parameters, warn = warn)$z meE(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepE <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one-dimensional") data <- drop(data) n <- length(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- drop(parameters$mean) G <- length(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(names(data), NULL) return(structure(list(modelName = "E", n=n, d=1, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } sigmasq <- parameters$variance$sigmasq if(is.null(sigmasq)) stop("variance parameters are missing") if(length(sigmasq) > 1) if(warn) warning("more than one sigma-squared specified") if(sigmasq < 0) stop("sigma-squared is negative") if(!sigmasq) { WARNING <- "sigma-squared vanishes" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(names(data), NULL) return(structure(list(modelName = "E", n=n, d=1, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = -1)) } temp <- .Fortran("es1e", as.double(data), as.double(mu), as.double(sigmasq), as.double(pro), as.integer(n), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(1), double(n * K), PACKAGE = "mclust")[8:9] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(names(data),NULL) structure(list(modelName = "E", n = n, d = 1, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } cdensEEV <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- as.matrix(parameters$mean) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "EEV", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") temp <- .Fortran("eseev", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(aperm(parameters$variance$orientation,c(2,1,3))), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(p), double(p), double(1), double(n * G), PACKAGE = "mclust")[13:14] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "EEV", WARNING = WARNING, returnCode = ret) } emEEV <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepEEV(data, parameters = parameters, warn = warn)$z meEEV(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepEEV <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "EEV", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") temp <- .Fortran("eseev", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(aperm(parameters$variance$orientation,c(2,1,3))), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(p), double(p), double(1), double(n * K), PACKAGE = "mclust")[13:14] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "EEV", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meEEV <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EEV", d = p, G = G, scale = NA, shape = rep(NA,p), orientation = array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="EEV", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p)) storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("meeev", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), as.integer(lwork), double(p * G), double(1), double(p), double(p * p * G), double(K), double(lwork), double(p), PACKAGE = "mclust")[7:16] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EEV"), prior[names(prior) !="functionName"])) temp <- .Fortran("meeevp", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(if(any(priorParams$scale != 0)) chol(priorParams$ scale) else priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), as.integer(lwork), double(p * G), double(1), double(p), double(p * p * G), double(K), double(lwork), double(p), PACKAGE = "mclust")[11:20] } z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] lapackSVDinfo <- temp[[5]] mu <- matrix(temp[[6]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp[[7]] shape <- temp[[8]] O <- aperm(array(temp[[9]], c(p, p, G)),c(2,1,3)) pro <- temp[[10]] WARNING <- NULL if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DGESVD fails to converge" } else { WARNING <- "input error for LAPACK DGESVD" } z[] <- O[] <- shape[] <- NA scale <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 } else if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) shape[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "a z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { sigma <- scale * shapeO(shape, O, transpose = FALSE) if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = err) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) ## Sigma = scale * O %*% diag(shape) %*% t(O) variance <- list(modelName = "EEV", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "EEV", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepEEV <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) ## z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EEV", d = p, G = G, scale = NA, shape = rep(NA,p), orientation=array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="EEV", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters), WARNING = WARNING, returnCode = 9)) } # shape <- sqrt(rev(sort(shape/exp(sum(log(shape))/p)))) if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop( "improper specification of z") lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), G) if(is.null(prior)) { temp <- .Fortran("mseev", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), double(lwork), as.integer(lwork), double(p * G), double(1), double(p), double(p * p * G), double(G), PACKAGE = "mclust")[7:12] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EEV"), prior[names(prior) != "functionName"])) temp <- .Fortran("mseevp", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(if(any(priorParams$scale != 0)) chol(priorParams$ scale) else priorParams$scale), as.double(priorParams$dof), double(lwork), as.integer(lwork), double(p * G), double(1), double(p), double(p * p * G), double(G), PACKAGE = "mclust")[11:16] } lapackSVDinfo <- temp[[1]] mu <- matrix(temp[[2]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp[[3]] shape <- temp[[4]] O <- aperm( array(temp[[5]], c(p, p, G)), c(2,1,3)) pro <- temp[[6]] WARNING <- NULL if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DGESVD fails to converge" if(warn) warning(WARNING) ret <- -4 } else { WARNING <- "input error for LAPACK DGESVD" if(warn) warning(WARNING) ret <- -5 } O[] <- shape[] <- scale <- NA sigma <- array(NA, c(p, p, G)) } else if(any(c(abs(scale), shape) > signif(.Machine$double.xmax, 6))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- scale <- O[] <- shape[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else { sigma <- scale * shapeO(shape, O, transpose = FALSE) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EEV", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "EEV", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simEEV <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "EEV")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) shape <- parameters$variance$shape if(length(shape) != d) stop("shape incompatible with mean") sss <- sqrt(parameters$variance$scale * shape) for(k in 1:G) { m <- ctabel[k] cholSigma <- t(parameters$variance$orientation[, , k]) * sss x[clabels == k, ] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% cholSigma, MARGIN = 2, STATS = mu[, k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "EEV") } emEII <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepEII(data, parameters = parameters, warn = warn)$z meEII(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepEII <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) p <- ncol(data) n <- nrow(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "EII", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } sigmasq <- parameters$variance$sigmasq if(is.null(sigmasq)) if(warn) warning("variance parameters are missing") if(sigmasq < 0) stop("sigma-squared is negative") if(!sigmasq) { WARNING <- "sigma-squared vanishes" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "EII", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = -1)) } temp <- .Fortran("eseii", as.double(data), as.double(mu), as.double(sigmasq), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(1), double(n * K), PACKAGE = "mclust")[9:10] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "EII", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meEII <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) > 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] # number of groups if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EII", d = p, G = G, sigmasq = NA) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="EII", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("meeii", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(1), double(K), PACKAGE = "mclust")[7:13] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EII"), prior[names(prior) != "functionName"])) temp <- .Fortran("meeiip", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(1), double(K), PACKAGE = "mclust")[c(11:17, 10)] } mu <- matrix(temp[[5]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] sigmasq <- temp[[6]] Sigma <- diag(rep(sigmasq, p)) pro <- temp[[7]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6) || sigmasq <= max(control$eps,0)) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) mu[] <- pro[] <- sigmasq <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- sigmasq <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { sigma <- array(0, c(p, p, G)) for(k in 1:G) sigma[, , k] <- Sigma if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = err) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(Sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EII", d = p, G = G, sigma = sigma, Sigma = Sigma, sigmasq = sigmasq, scale = sigmasq) parameters <- list(pro=pro, mean=mu, variance = variance, Vinv=Vinv) structure(list(modelName = "EII", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepEII <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EII", d = p, G = G, sigmasq = NA) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="EII", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("mseii", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), double(p * G), double(1), double(G), PACKAGE = "mclust")[6:8] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EII"), prior[names(prior) !="functionName"])) temp <- .Fortran("mseiip", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(p * G), double(1), double(G), PACKAGE = "mclust")[10:12] } mu <- matrix(temp[[1]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) sigmasq <- temp[[2]] pro <- temp[[3]] sigma <- array(0, c(p, p, G)) Sigma <- diag(rep(sigmasq, p)) for(k in 1:G) sigma[, , k] <- Sigma WARNING <- NULL if(sigmasq > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(Sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EII", d = p, G = G, sigma = sigma, Sigma = Sigma, sigmasq = sigmasq, scale = sigmasq) parameters <- list(pro=pro, mean=mu, variance = variance) structure(list(modelName = "EII", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simEII <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d), modelName = "EII")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) sigmasq <- parameters$variance$sigmasq cholSigma <- diag(rep(sqrt(sigmasq), d)) for(k in 1:G) { m <- ctabel[k] x[clabels == k, ] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% cholSigma, MARGIN = 2, STATS = mu[, k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "EII") } meE <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be 1 dimensional") data <- as.vector(data) n <- length(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal length of data") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if (Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "E", d = 1, G = G, sigmasq = NA) parameters <- list(pro=rep(NA,G), mean=rep(NA,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="E", prior=prior, n=n, d=1, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("me1e", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(G), double(1), double(K), PACKAGE = "mclust")[6:12] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "E"), prior[names(prior) != "functionName"])) temp <- .Fortran("me1ep", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(G), double(1), double(K), PACKAGE = "mclust")[c(10:16, 9)] } mu <- temp[[5]] names(mu) <- as.character(1:G) z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] sigmasq <- temp[[6]] pro <- temp[[7]] ## log post <- temp[[8]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6) || sigmasq <= max(control$eps,0)) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) mu[] <- pro[] <- sigmasq <- z[] <- loglik <- logprior <- NA ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- sigmasq <- z[] <- loglik <- NA ret <- if(control$equalPro) -2 else -3 } else if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 info <- c(iterations = its, error = err) dimnames(z) <- list(names(data), NULL) variance <- list(modelName = "E", d = 1, G = G, sigmasq = sigmasq) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "E", prior = prior, n = n, d = 1, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepE <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one-dimensional") data <- as.vector(data) n <- length(data) ## z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") # number of groups G <- dimz[2] ## if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName="E", d=1, G=G, sigmasq=NA) parameters <- list(pro=rep(NA,G), mean=rep(NA,G), variance=variance) return(structure(list(modelName="E", prior=prior, n=n, d=1, G=G, z = z, parameters=parameters), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if(is.null(prior)) { temp <- .Fortran("ms1e", as.double(data), as.double(z), as.integer(n), as.integer(G), double(G), double(1), double(G), PACKAGE = "mclust")[5:7] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "E"), prior[names(prior) != "functionName"])) storage.mode(z) <- "double" temp <- .Fortran("ms1ep", as.double(data), z, as.integer(n), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(G), double(1), double(G), PACKAGE = "mclust")[9:11] } mu <- temp[[1]] names(mu) <- as.character(1:G) sigmasq <- temp[[2]] pro <- temp[[3]] WARNING <- NULL if(sigmasq > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) pro[] <- mu[] <- sigmasq <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(names(data), NULL) variance <- list(modelName = "E", d = 1, G = G, sigmasq = sigmasq) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "E", prior = prior, n = n, d = 1, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simE <- function(parameters, n, seed = NULL, ...) { if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, 2), modelName = "E")) } if(!is.null(seed)) set.seed(seed) mu <- parameters$mean G <- length(mu) pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- rep(0, n) sd <- sqrt(parameters$variance$sigmasq) for(k in 1:G) { x[clabels == k] <- mu[k] + rnorm(ctabel[k], sd = sd) } structure(cbind(group = clabels, "1" = x), modelName = "E") } cdensEVI <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- parameters$mean G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "EVI", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape)) stop("variance parameters are missing") temp <- .Fortran("esevi", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(1), double(n * G), PACKAGE = "mclust")[10:11] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "EVI", WARNING = WARNING, returnCode = ret) } emEVI <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepEVI(data, parameters = parameters, warn = warn)$z meEVI(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepEVI <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "EVI", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape)) stop("variance parameters are missing") temp <- .Fortran("esevi", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(1), double(n * K), PACKAGE = "mclust")[10:11] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "EVI", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meEVI <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) > 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if (Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EVI", d = p, G = G, scale = NA, shape = matrix(as.double(NA),p,G)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="EVI", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("meevi", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(1), double(p * G), double(K), PACKAGE = "mclust")[7:14] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EVI"), prior[names(prior) != "functionName"])) temp <- .Fortran("meevip", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(1), double(p * G), double(K), PACKAGE = "mclust")[11:18] } z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] mu <- matrix(temp[[5]], p, G) scale <- temp[[6]] shape <- matrix(temp[[7]], p, G) dimnames(mu) <- dimnames(shape) <- list(NULL, as.character(1:G)) pro <- temp[[8]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { if(warn) warning("z column sum fell below threshold") WARNING <- "z column sum fell below threshold" } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { sigma <- array(apply(scale * shape, 2, diag), c(p, p, G)) if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = err) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EVI", d = p, G = G, sigma = sigma, scale = scale, shape = shape) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "EVI", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepEVI <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EVI", d = p, G = G, scale = NA, shape = matrix(as.double(NA),p,G)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="EVI", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if(is.null(prior)) { temp <- .Fortran("msevi", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), double(p * G), double(1), double(p * G), double(G), PACKAGE = "mclust")[6:9] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EVI"), prior[names( prior) != "functionName"])) temp <- .Fortran("msevip", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(p * G), double(1), double(p * G), double(G), PACKAGE = "mclust")[10:13] } mu <- matrix(temp[[1]], p, G) scale <- temp[[2]] shape <- matrix(temp[[3]], p, G) dimnames(mu) <- dimnames(shape) <- list(NULL, as.character(1:G)) pro <- temp[[4]] WARNING <- NULL if(any(c(scale, shape) > signif(.Machine$double.xmax, 6)) || any(!c( scale, shape))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- scale <- shape[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else { sigma <- array(apply(scale * shape, 2, diag), c(p, p, G)) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EVI", d = p, G = G, sigma = sigma, scale = scale, shape = shape) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "EVI", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simEVI <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "EVI")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) shape <- as.matrix(parameters$variance$shape) if(!all(dim(shape) == dim(mean))) stop("shape incompatible with mean") sss <- sqrt(parameters$variance$scale * shape) for(k in 1:G) { m <- ctabel[k] x[clabels == k, ] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% diag(sss[, k]), MARGIN = 2, STATS = mu[, k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "EVI") } # old version: LS 20150317 sigma2decomp <- function(sigma, G = NULL, tol = sqrt(.Machine$double.eps), ...) { dimSigma <- dim(sigma) if(is.null(dimSigma)) stop("sigma improperly specified") d <- dimSigma[1] if(dimSigma[2] != d) stop("sigma improperly specified") l <- length(dimSigma) if(l < 2 || l > 3) stop("sigma improperly specified") if(is.null(G)) { if(l == 2) { G <- 1 sigma <- array(sigma, c(dimSigma, 1)) } else { G <- dimSigma[3] } } else { if(l == 3 && G != dimSigma[3]) stop("sigma and G are incompatible") if(l == 2 && G != 1) sigma <- array(sigma, c(d,d,G)) } # angle between subspaces subspace <- function(A, B) { for(k in 1:ncol(A)) { B <- B - A[,k,drop=FALSE] %*% (t(A[,k,drop=FALSE]) %*% B) } norm(B, type = "2") } # check equality of values uniq <- function(x) { abs(max(x) - min(x)) < tol } decomp <- list(d = d, G = G, scale = rep(0, G), shape = matrix(0, d, G), orientation = array(0, c(d, d, G))) for(k in 1:G) { ev <- eigen(sigma[,,k], symmetric = TRUE) temp <- log(ev$values); temp[!is.finite(temp)] <- 0 logScale <- sum(temp)/d decomp$scale[k] <- exp(logScale) decomp$shape[,k] <- exp(temp - logScale) decomp$orientation[,,k] <- ev$vectors } scaleName <- "V" shapeName <- "V" orientName <- "V" # check scale/volume if(uniq(decomp$scale)) { decomp$scale <- decomp$scale[1] scaleName <- "E" } # check shape if(all(apply(decomp$shape, 1, uniq))) { decomp$shape <- decomp$shape[, 1] if(all(uniq(decomp$shape))) { shapeName <- "I" decomp$shape <- rep(1, d) } else { shapeName <- "E" } } # check orientation eqOrientation <- { if(d == 2) all(apply(matrix(decomp$orientation, nrow = d * d, ncol = G), 1, uniq)) else all(apply(decomp$orientation[,,-1,drop=FALSE], 3, function(o) subspace(decomp$orientation[,,1],o)) < tol) } if(eqOrientation) { decomp$orientation <- decomp$orientation[,,1] if(all(apply(cbind(decomp$orientation, diag(d)), 1, uniq))) { orientName <- "I" decomp$orientation <- NULL } else { orientName <- "E" } } decomp$modelName <- paste0(scaleName, shapeName, orientName) decomp$sigma <- sigma orderedNames <- c("sigma", "d", "modelName", "G", "scale", "shape", "orientation") return(decomp[orderedNames]) } sigma2decomp <- function(sigma, G = NULL, tol = sqrt(.Machine$double.eps), ...) { dimSigma <- dim(sigma) if(is.null(dimSigma)) stop("sigma improperly specified") d <- dimSigma[1] if(dimSigma[2] != d) stop("sigma improperly specified") l <- length(dimSigma) if(l < 2 || l > 3) stop("sigma improperly specified") if(is.null(G)) { if(l == 2) { G <- 1 sigma <- array(sigma, c(dimSigma, 1)) } else { G <- dimSigma[3] } } else { if(l == 3 && G != dimSigma[3]) stop("sigma and G are incompatible") if(l == 2 && G != 1) sigma <- array(sigma, c(d,d,G)) } # angle between subspaces subspace <- function(A, B) { for(k in 1:ncol(A)) { B <- B - A[,k,drop=FALSE] %*% (t(A[,k,drop=FALSE]) %*% B) } norm(B, type = "2") } # check equality of values uniq <- function(x) { abs(max(x) - min(x)) < tol } decomp <- list(d = d, G = G, scale = rep(0, G), shape = matrix(0, d, G), orientation = array(0, c(d, d, G))) for(k in 1:G) { ev <- eigen(sigma[,,k], symmetric = TRUE) temp <- log(ev$values); temp[!is.finite(temp)] <- 0 logScale <- sum(temp)/d decomp$scale[k] <- exp(logScale) decomp$shape[,k] <- exp(temp - logScale) decomp$orientation[,,k] <- ev$vectors } scaleName <- "V" shapeName <- "V" orientName <- "V" # check scale/volume if(uniq(decomp$scale)) { decomp$scale <- decomp$scale[1] scaleName <- "E" } # check shape if(all(apply(decomp$shape, 1, uniq))) { decomp$shape <- decomp$shape[, 1] if(all(uniq(decomp$shape))) { shapeName <- "I" decomp$shape <- rep(1, d) } else { shapeName <- "E" } } # check orientation D <- decomp$orientation eqOrientation <- all(apply(D, 3, function(d) any(apply(d, 2, function(x) cor(D[,,1], x)^2) > (1-tol)))) if(eqOrientation) { decomp$orientation <- decomp$orientation[,,1] orientName <- "E" if(sum(abs(svd(decomp$orientation)$v) - diag(d)) < tol) { orientName <- "I" # decomp$orientation <- NULL } } decomp$modelName <- paste0(scaleName, shapeName, orientName) decomp$sigma <- sigma orderedNames <- c("sigma", "d", "modelName", "G", "scale", "shape", "orientation") return(decomp[orderedNames]) } decomp2sigma <- function(d, G, scale, shape, orientation = NULL, ...) { nod <- missing(d) noG <- missing(G) lenScale <- length(scale) if(lenScale != 1) { if(!noG && G != lenScale) stop("scale incompatibile with G") G <- lenScale } shape <- as.matrix(shape) p <- nrow(shape) if(!nod && p != d) stop("shape incompatible with d") d <- p g <- ncol(shape) if(g != 1) { if(!is.null(G) && g != G) stop("shape incompatible with scale") if(!noG && g != G) stop("shape incompatible with G") G <- g } if(is.null(orientation)) { orientName <- "I" if(is.null(G)) { G <- if(noG) 1 else G } orientation <- array(diag(d), c(d, d, G)) } else { dimO <- dim(orientation) l <- length(dimO) if(is.null(dimO) || l < 2 || l > 3 || dimO[1] != dimO[2]) stop("orientation improperly specified") if(dimO[1] != d) stop("orientation incompatible with shape") if(l == 3) { orientName <- "V" if(is.null(G)) { if(!noG && dimO[3] != G) stop("orientation incompatible with G") G <- dimO[3] } else if(G != dimO[3]) stop("orientation incompatible with scale and/or shape" ) } else { orientName <- "E" if(is.null(G)) { G <- if(noG) 1 else G } orientation <- array(orientation, c(d, d, G)) } } if(G == 1) { scaleName <- shapeName <- "X" } else { scaleName <- if(lenScale == 1) "E" else "V" shapeName <- if(g == 1) "E" else "V" scale <- rep(scale, G) shape <- matrix(shape, nrow = d, ncol = G) } sigma <- array(0, c(d, d, G)) for(k in 1:G) { sigma[,,k] <- crossprod(t(orientation[,,k]) * sqrt(scale[k] * shape[,k])) } structure(sigma, modelName = paste0(scaleName, shapeName, orientName)) } grid1 <- function (n, range = c(0, 1), edge = TRUE) { if (any(n < 0 | round(n) != n)) stop("n must be nonpositive and integer") G <- rep(0, n) if (edge) { G <- seq(from = min(range), to = max(range), by = abs(diff(range))/(n - 1)) } else { lj <- abs(diff(range)) incr <- lj/(2 * n) G <- seq(from = min(range) + incr, to = max(range) - incr, by = 2 * incr) } G } grid2 <- function (x, y) { lx <- length(x) ly <- length(y) xy <- matrix(0, nrow = lx * ly, ncol = 2) l <- 0 for (j in 1:ly) { for (i in 1:lx) { l <- l + 1 xy[l,] <- c(x[i], y[j]) } } xy } hypvol <- function (data, reciprocal = FALSE) { dimdat <- dim(data) oneD <- ((is.null(dimdat) || NCOL(data) == 1)) if (oneD) { n <- length(as.vector(data)) ans <- if (reciprocal) 1/diff(range(data)) else diff(range(data)) return(ans) } if (length(dimdat) != 2) stop("data must be a vector or a matrix") data <- as.matrix(data) sumlogdifcol <- function(x) sum(log(apply(x, 2, function(colm) diff(range(colm))))) bdvolog <- sumlogdifcol(data) pcvolog <- sumlogdifcol(princomp(data)$scores) volog <- min(bdvolog, pcvolog) if(reciprocal) { minlog <- log(.Machine$double.xmin) if (-volog < minlog) { warning("hypervolume smaller than smallest machine representable positive number") ans <- 0 } else ans <- exp(-volog) } else { maxlog <- log(.Machine$double.xmax) if (volog > maxlog) { warning("hypervolume greater than largest machine representable number") ans <- Inf } else ans <- exp(volog) } return(ans) } "[.mclustBIC" <- function (x, i, j, drop = FALSE) { ATTR <- attributes(x)[c("G", "modelNames", "prior", "control", "initialization", "Vinv", "warn", "n", "d", "oneD", "returnCodes", "class")] oldClass(x) <- NULL x <- NextMethod("[") if (is.null(dim(x))) return(x) ATTR$G <- as.numeric(dimnames(x)[[1]]) ATTR$modelNames <- dimnames(x)[[2]] ATTR$returnCodes <- ATTR$returnCodes[dimnames(x)[[1]],dimnames(x)[[2]], drop=FALSE] do.call("structure", c(list(.Data = x), ATTR)) } bic <- function(modelName, loglik, n, d, G, noise = FALSE, equalPro = FALSE, ...) { nparams <- nMclustParams(modelName = modelName, d = d, G = G, noise = noise, equalPro = equalPro) 2 * loglik - nparams * log(n) } checkModelName <- function(modelName) { switch(EXPR = modelName, "X" = , "E" = , "V" = , "XII" = , "XXI" = , "XXX" = , "EII" = , "VII" = , "EEI" = , "VEI" = , "EVI" = , "VVI" = , "EEE" = , "EVE" = , "VEE" = , "VVE" = , "EEV" = , "VEV" = , "EVV" = , "VVV" = TRUE, stop("invalid model name")) } em <- function(data, modelName, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { checkModelName(modelName) funcName <- paste("em", modelName, sep = "") mc <- match.call(expand.dots = TRUE) mc[[1]] <- as.name(funcName) mc$modelName <- NULL eval(mc, parent.frame()) } estep <- function(data, modelName, parameters, warn = NULL, ...) { checkModelName(modelName) funcName <- paste("estep", modelName, sep = "") mc <- match.call(expand.dots = TRUE) mc[[1]] <- as.name(funcName) mc$modelName <- NULL eval(mc, parent.frame()) } mclustVariance <- function(modelName, d=NULL, G=2) { x <- -1 if (nchar(modelName) == 1) { if (!is.null(d) && d != 1) stop("modelName and d are incompatible") varList <- switch(EXPR = modelName, "X" = list(sigmasq = x), "E" = list(sigmasq = x), "V" = list(sigmasq = rep(x,G)), stop("modelName not recognized")) } else { if (nchar(modelName) != 3) stop("modelName is misspecified") if (is.null(d)) d <- 3 varList <- switch(EXPR = modelName, "XII" = list(sigmasq = x), "EII" = list(sigmasq = x, scale = x, shape = rep(x,d)), "VII" = list(sigmasq = rep(x,G), scale = rep(x,G), shape = rep(x,d)), "XXI" = list(scale = x, shape = rep(x,d)), "EEI" = list(scale = x, shape = rep(x,d)), "EVI" = list(scale = x, shape = matrix(x,d,G)), "VEI" = list(scale = rep(x,G), shape = rep(x,d)), "VVI" = list(scale = rep(x,G), shape = matrix(x,d,G)), "XXX" = { M <- matrix(x,d,d); M[row(M) > col(M)] <- 0; list(cholSigma = M) }, "EEE" = { M <- matrix(x,d,d); M[row(M) > col(M)] <- 0; list(cholSigma = M) }, "VEE" = list(scale = rep(x,G), shape = rep(x,d), orientation = matrix(x,d,d)), "VVE" = list(scale = rep(x,G), shape = matrix(x,d,G), orientation = matrix(x,d,d)), "EVV" = list(scale = x, shape = matrix(x,d,G), orientation = array(x,c(d,d,G))), "EVE" = list(scale = x, shape = matrix(x,d,G), orientation = matrix(x,d,d)), "EEV" = list(scale = x, shape = rep(x,d), orientation = array(x,c(d,d,G))), "VEV" = list(scale = x, shape = matrix(x,d,G), orientation = array(x,c(d,d,G))), "VVV" = { A <- array(x,c(d,d,G)); I <- row(A[,,1]) > col(A[,,1]) for (k in 1:G) A[,,k][I] <- 0 list(cholsigma = A)}, stop("modelName not recognized")) } c(modelName = modelName, d = d, G = G, varList) } me <- function(data, modelName, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { checkModelName(modelName) funcName <- paste("me", modelName, sep = "") mc <- match.call(expand.dots = TRUE) mc[[1]] <- as.name(funcName) mc$modelName <- NULL eval(mc, parent.frame()) } mstep <- function(data, modelName, z, prior = NULL, warn = NULL, ...) { checkModelName(modelName) funcName <- paste("mstep", modelName, sep = "") mc <- match.call(expand.dots = TRUE) mc[[1]] <- as.name(funcName) mc$modelName <- NULL eval(mc, parent.frame()) } mvn <- function(modelName, data, prior = NULL, warn = NULL, ...) { modelName <- switch(EXPR = modelName, "E" = "X", "V" = "X", "X" = "X", "Spherical" = "XII", "EII" = "XII", "VII" = "XII", "XII" = "XII", "Diagonal" = "XXI", "EEI" = "XXI", "VEI" = "XXI", "EVI" = "XXI", "VVI" = "XXI", "XXI" = "XXI", "Ellipsoidal" = "XXX", "EEE" = "XXX", "VEE" = "XXX", "EVE" = "XXX", "EVV" = "XXX", "VVE" = "XXX", "EEV" = "XXX", "VEV" = "XXX", "VVV" = "XXX", "XXX" = "XXX", stop("invalid model name")) funcName <- paste("mvn", modelName, sep = "") mc <- match.call() mc[[1]] <- as.name(funcName) mc[[2]] <- NULL out <- eval(mc, parent.frame()) varnames <- colnames(as.matrix(data)) if(!all(is.null(varnames))) { rownames(out$parameters$mean) <- varnames dimnames(out$parameters$variance$Sigma) <- list(varnames, varnames) dimnames(out$parameters$variance$sigma) <- list(varnames, varnames, NULL) } return(out) } nVarParams <- function(modelName, d, G, ...) { modelName <- switch(EXPR = modelName, X = "E", XII = "EII", XXI = "EEI", XXX = "EEE", modelName) # checkModelName(modelName) switch(EXPR = modelName, "E" = 1, "V" = G, "EII" = 1, "VII" = G, "EEI" = d, "VEI" = G + (d-1), "EVI" = 1 + G * (d-1), "VVI" = G * d, "EEE" = d*(d+1)/2, "EVE" = 1 + G*(d-1) + d*(d-1)/2, "VEE" = G + (d-1) + d*(d-1)/2, "VVE" = G + G * (d-1) + d*(d-1)/2, "EEV" = 1 + (d-1) + G * d*(d-1)/2, "VEV" = G + (d-1) + G * d*(d-1)/2, "EVV" = 1 - G + G * d*(d+1)/2, "VVV" = G * d*(d+1)/2, stop("invalid model name")) } nMclustParams <- function(modelName, d, G, noise = FALSE, equalPro = FALSE, ...) { modelName <- switch(EXPR = modelName, X = "E", XII = "EII", XXI = "EEI", XXX = "EEE", modelName) checkModelName(modelName) if(G == 0) { ## one noise cluster case if(!noise) stop("undefined model") nparams <- 1 } else { nparams <- nVarParams(modelName, d = d, G = G) + G*d if(!equalPro) nparams <- nparams + (G - 1) if(noise) nparams <- nparams + 2 } return(nparams) } sim <- function(modelName, parameters, n, seed = NULL, ...) { modelName <- switch(EXPR = modelName, X = "E", XII = "EII", XXI = "EEI", XXX = "EEE", modelName) checkModelName(modelName) funcName <- paste("sim", modelName, sep = "") mc <- match.call(expand.dots = TRUE) mc[[1]] <- as.name(funcName) mc$modelName <- NULL eval(mc, parent.frame()) } cdensVEI <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- as.matrix(parameters$mean) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "VEI", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape)) stop("variance parameters are missing") temp <- .Fortran("esvei", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(1), double(n * G), PACKAGE = "mclust")[10:11] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],dimnames(mu)[[2]]) structure(z, logarithm = logarithm, modelName = "VEI", WARNING = WARNING, returnCode = ret) } emVEI <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepVEI(data, parameters = parameters, warn = warn)$z meVEI(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepVEI <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "VEI", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape)) stop("variance parameters are missing") temp <- .Fortran("esvei", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(1), double(n * K), PACKAGE = "mclust")[10:11] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "VEI", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meVEI <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) > 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VEI", d = p, G = G, scale = rep(NA,G), shape = rep(NA,p)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="VEI", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("mevei", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax), as.double(control$tol), as.double(control$eps), double(p * G), double(G), double(p), double(K), double(G), double(p), double(p * G), PACKAGE = "mclust")[7:14] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VEI"), prior[names(prior) != "functionName"])) temp <- .Fortran("meveip", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax), as.double(control$tol), as.double(control$eps), double(p * G), double(G), double(p), double(K), double(G), double(p), double(p * G), PACKAGE = "mclust")[11:18] } z <- temp[[1]] its <- temp[[2]][1] inner <- temp[[2]][2] err <- temp[[3]][1] inerr <- temp[[3]][2] loglik <- temp[[4]] mu <- matrix(temp[[5]], p, G) scale <- temp[[6]] shape <- temp[[7]] dimnames(mu) <- list(NULL, as.character(1:G)) pro <- temp[[8]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) sigma <- array(NA, c(p, p, G)) mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } sigma <- array(NA, c(p, p, G)) mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA ret <- if(control$equalPro) -2 else -3 } else { sigma <- array(0, c(p, p, G)) for(k in 1:G) sigma[, , k] <- diag(scale[k] * shape) if(inner >= control$itmax[2]) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) inner <- - inner ret <- 2 } else if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = err) attr(info, "inner") <- c(iterations = inner, error = inerr) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VEI", d = p, G = G, sigma = sigma, scale = scale, shape = shape) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "VEI", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepVEI <- function(data, z, prior = NULL, warn = NULL, control = NULL,...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VEI", d = p, G = G, scale = rep(NA,G), shape = rep(NA,p)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="VEI", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if (is.null(control)) control <- emControl() itmax <- if(length(control$itmax) == 1) control$itmax else control$ itmax[2] tol <- if(length(control$tol) == 1) control$tol else control$tol[2] if(is.null(prior)) { temp <- .Fortran("msvei", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.integer(itmax), as.double(tol), double(p * G), double(G), double(p), double(G), double(G), double(p), double(p * G), PACKAGE = "mclust")[6:11] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VEI"), prior[names( prior) != "functionName"])) temp <- .Fortran("msveip", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), as.integer(itmax), as.double(tol), double(p * G), double(G), double(p), double(G), double(G), double(p), double(p * G), PACKAGE = "mclust")[10:15] } inner <- temp[[1]] inerr <- temp[[2]] mu <- matrix(temp[[3]], p, G) scale <- temp[[4]] shape <- temp[[5]] dimnames(mu) <- list(NULL, as.character(1:G)) pro <- temp[[6]] WARNING <- NULL if(any(c(scale, shape) > signif(.Machine$double.xmax, 6)) || any(! c(scale, shape))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- shape <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else { ret <- 0 sigma <- array(0, c(p, p, G)) for(k in 1:G) sigma[, , k] <- diag(scale[k] * shape) if(inner >= itmax) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) inner <- - inner } } info <- c(iterations = inner, error = inerr) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VEI", d = p, G = G, sigma = sigma, scale = scale, shape = shape) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "VEI", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control), info = info, WARNING = WARNING, returnCode = ret) } simVEI <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "VEI")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) rtshape <- sqrt(parameters$variance$shape) if(length(rtshape) != d) stop("shape incompatible with mean") rtscale <- sqrt(parameters$variance$scale) if(length(rtscale) != G) stop("scale incompatible with mean") for(k in 1:G) { m <- ctabel[k] x[clabels == k, ] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% diag(rtscale[k] * rtshape), MARGIN = 2, STATS = mu[, k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "VEI") } cdensV <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one-dimensional") data <- drop(data) n <- length(data) mu <- drop(parameters$mean) G <- length(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(names(data), NULL) return(structure(z, logarithm = logarithm, modelName = "V", WARNING = WARNING, returnCode = 9)) } sigmasq <- parameters$variance$sigmasq if(is.null(sigmasq)) stop("variance parameters are missing") if(any(sigmasq < 0)) stop("sigma-squared is negative") if(any(!sigmasq)) { WARNING <- "sigma-squared vanishes" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(names(data), NULL) return(structure(z, logarithm = logarithm, modelName = "V", WARNING = WARNING, returnCode = 9)) } if (length(sigmasq) == 1) sigmasq <- rep(sigmasq,G) temp <- .Fortran("es1v", as.double(data), as.double(mu), as.double(sigmasq), as.double(-1), as.integer(n), as.integer(G), as.double(-1), double(1), double(n * G), PACKAGE = "mclust")[8:9] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(names(data),NULL) structure(z, logarithm = logarithm, modelName = "V", WARNING = WARNING, returnCode = ret) } emV <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepV(data, parameters = parameters, warn = warn)$z meV(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepV <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one-dimensional") data <- drop(data) n <- length(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- drop(parameters$mean) G <- length(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(names(data), NULL) return(structure(list(modelName = "V", n=n, d=1, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } sigmasq <- parameters$variance$sigmasq if(is.null(sigmasq)) stop("variance parameters are missing") if(any(sigmasq < 0)) stop("sigma-squared is negative") if(any(!sigmasq)) { WARNING <- "sigma-squared vanishes" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(names(data), NULL) return(structure(list(modelName = "V", n=n, d=1, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = -1)) } temp <- .Fortran("es1v", as.double(data), as.double(mu), as.double(sigmasq), as.double(pro), as.integer(n), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(1), double(n * K), PACKAGE = "mclust")[8:9] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(names(data),NULL) structure(list(modelName = "V", n = n, d = 1, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } cdensVEV <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- as.matrix(parameters$mean) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "VEV", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") temp <- .Fortran("esvev", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(aperm(parameters$variance$orientation,c(2,1,3))), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(p), double(p), double(1), double(n * G), PACKAGE = "mclust")[13:14] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "VEV", WARNING = WARNING, returnCode = ret) } emVEV <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepVEV(data, parameters = parameters, warn = warn)$z meVEV(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepVEV <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "VEV", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") temp <- .Fortran("esvev", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(aperm(parameters$variance$orientation,c(2,1,3))), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(p), double(p), double(1), double(n * K), PACKAGE = "mclust")[13:14] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "VEV", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meVEV <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VEV", d = p, G = G, scale=rep(NA,G), shape=rep(NA,p), orientation=array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="VEV", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), p + G) storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("mevev", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax), as.double(control$tol), as.double(control$eps), as.integer(lwork), double(p * G), double(G), double(p), double(p * p * G), double(K), double(lwork), double(p), PACKAGE = "mclust")[7:16] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VEV"), prior[names(prior) != "functionName"])) temp <- .Fortran("mevevp", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(if(any(priorParams$scale != 0)) chol(priorParams$ scale) else priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax), as.double(control$tol), as.double(control$eps), as.integer(lwork), double(p * G), double(G), double(p), double(p * p * G), double(K), double(lwork), double(p), PACKAGE = "mclust")[11:20] } z <- temp[[1]] its <- temp[[2]][1] inner <- temp[[2]][2] err <- temp[[3]][1] inerr <- temp[[3]][2] loglik <- temp[[4]] lapackSVDinfo <- temp[[5]] mu <- matrix(temp[[6]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp[[7]] shape <- temp[[8]] O <- aperm( array(temp[[9]], c(p, p, G)), c(2,1,3)) pro <- temp[[10]] WARNING <- NULL if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DGESVD fails to converge" } else { WARNING <- "input error for LAPACK DGESVD" } if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 } else if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { sigma <- shapeO(shape, O, transpose = FALSE) sigma <- sweep(sigma, MARGIN = 3, STATS = scale, FUN = "*") if(inner >= control$itmax[2]) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) inner <- - inner ret <- 2 } else if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- structure(c(iterations = its, error = err), inner = c( iterations = inner, error = inerr)) dimnames(z) <- list(dimnames(data)[[1]],NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) ## Sigma = scale * O %*% diag(shape) %*% t(O) variance <- list(modelName = "VEV", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "VEV", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepVEV <- function(data, z, prior = NULL, warn = NULL, control = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) ## z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VEV", d = p, G = G, scale = rep(NA,G), shape = rep(NA,p), orientation = array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="VEV", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) WARNING <- "z is missing" if(warn) warning(WARNING) return(structure(list(n = n, d = p, G = G, mu = matrix(as.double(NA), p, G), sigma = array(NA, c(p, p, G)), decomp = list( d = p, G = G, scale = rep(NA, G), shape = rep(NA, p), orientation = array(NA, c(p, p, G))), pro = rep(NA, G), modelName = "VEV", prior = prior), WARNING = WARNING)) } # shape <- sqrt(rev(sort(shape/exp(sum(log(shape))/p)))) if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop( "improper specification of z") if (is.null(control)) control <- emControl() itmax <- if(length(control$itmax) == 1) control$itmax else control$ itmax[2] tol <- if(length(control$tol) == 1) control$tol else control$tol[2] lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), p + G) if(is.null(prior)) { temp <- .Fortran("msvev", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), double(lwork), as.integer(lwork), as.integer(itmax), as.double(tol), double(p * G), double(G), double(p), double(p * p * G), double(G), PACKAGE = "mclust")[7:14] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VEV"), prior[names(prior) != "functionName"])) temp <- .Fortran("msvevp", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(if(any(priorParams$scale != 0)) chol(priorParams$ scale) else priorParams$scale), as.double(priorParams$dof), double(lwork), as.integer(lwork), as.integer(itmax), as.double(tol), double(p * G), double(G), double(p), double(p * p * G), double(G), PACKAGE = "mclust")[11:18] } lapackSVDinfo <- temp[[1]] inner <- temp[[2]] inerr <- temp[[3]] mu <- matrix(temp[[4]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp[[5]] shape <- temp[[6]] O <- aperm(array(temp[[7]], c(p, p, G)),c(2,1,3)) pro <- temp[[8]] WARNING <- NULL if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DGESVD fails to converge" if(warn) warning(WARNING) } else { WARNING <- "input error for LAPACK DGESVD" if(warn) warning(WARNING) } O[] <- shape[] <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 } else if(any(c(scale, shape) > signif(.Machine$double.xmax, 6)) || any( !c(scale, shape))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- O[] <- shape[] <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else { sigma <- sweep(shapeO(shape, O, transpose = FALSE), MARGIN = 3, STATS = scale, FUN = "*") if(inner >= itmax) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) inner <- - inner } ret <- 2 } info <- c(iteration = inner, error = inerr) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VEV", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "VEV", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control), info = info, WARNING = WARNING, returnCode = ret) } simVEV <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "VEV")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) rtshape <- sqrt(parameters$variance$shape) if(length(rtshape) != d) stop("shape incompatible with mean") rtscale <- sqrt(parameters$variance$scale) if(length(rtscale) != G) stop("scale incompatible with mean") for(k in 1:G) { m <- ctabel[k] sss <- rtscale[k] * rtshape cholSigma <- t(parameters$variance$orientation[, , k]) * sss x[clabels == k, ] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% cholSigma, MARGIN = 2, STATS = mu[, k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "VEV") } cdensVII <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- as.matrix(parameters$mean) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "VII", WARNING = WARNING, returnCode = 9)) } sigmasq <- parameters$variance$sigmasq if(any(sigmasq < 0)) stop("sigma-squared is negative") if(any(!sigmasq)) { WARNING <- "sigma-squared vanishes" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "VII", WARNING = WARNING, returnCode = 9)) } temp <- .Fortran("esvii", as.double(data), as.double(mu), as.double(sigmasq), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(1), double(n * G), PACKAGE = "mclust")[9:10] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "VII", WARNING = WARNING, returnCode = ret) } emVII <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepVII(data, parameters = parameters, warn = warn)$z meVII(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepVII <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "VII", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } sigmasq <- parameters$variance$sigmasq if(is.null(sigmasq)) stop("variance parameters are missing") if(any(sigmasq < 0)) stop("sigma-squared is negative") if(any(!sigmasq)) { WARNING <- "sigma-squared vanishes" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "VII", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = -1)) } temp <- .Fortran("esvii", as.double(data), as.double(mu), as.double(sigmasq), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(1), double(n * K), PACKAGE = "mclust")[9:10] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "VII", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meVII <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) > 2) stop("data must be in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VII", d=p, G=G, sigmasq=rep(NA,G)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="VII", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("mevii", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(G), double(K), PACKAGE = "mclust")[7:13] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VII"), prior[names(prior) != "functionName"])) storage.mode(z) <- "double" temp <- .Fortran("meviip", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(G), double(K), PACKAGE = "mclust")[c(11:17, 10)] } mu <- matrix(temp[[5]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] sigmasq <- temp[[6]] pro <- temp[[7]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6) || any(sigmasq <= max(control$eps, 0))) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) mu[] <- pro[] <- sigmasq <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- sigmasq <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { sigma <- array(0, c(p, p, G)) for(k in 1:G) sigma[, , k] <- diag(rep(sigmasq[k], p)) if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = err) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VII", d = p, G = G, sigma = sigma, sigmasq = sigmasq, scale = sigmasq) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "VII", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } meVVI <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) > 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VVI", d = p, G = G, scale = rep(NA,G), shape = matrix(as.double(NA),p,G)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="VVI", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("mevvi", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(G), double(p * G), double(K), PACKAGE = "mclust")[7:14] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VVI"), prior[names(prior) != "functionName"])) temp <- .Fortran("mevvip", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(G), double(p * G), double(K), PACKAGE = "mclust")[11:18] } z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] mu <- matrix(temp[[5]], p, G) scale <- temp[[6]] shape <- matrix(temp[[7]], p, G) dimnames(mu) <- dimnames(shape) <- list(NULL, as.character(1:G)) pro <- temp[[8]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) sigma <- array(NA, c(p, p, G)) mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } sigma <- array(NA, c(p, p, G)) mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA ret <- if(control$equalPro) -2 else -3 } else { sigma <- array(apply(sweep(shape, MARGIN = 2, STATS = scale, FUN = "*"), 2, diag), c(p, p, G)) if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = err) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VVI", d = p, G = G, sigma = sigma, scale = scale, shape = shape) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "VVI", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepVII <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal number of observations") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VII", d=p, G=G, sigmasq=rep(NA,G)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="VII", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("msvii", as.double(data), z, as.integer(n), as.integer(p), as.integer(G), double(p * G), double(G), double(G), PACKAGE = "mclust")[6:8] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VII"), prior[names(prior) != "functionName"])) temp <- .Fortran("msviip", as.double(data), z, as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(p * G), double(G), double(G), PACKAGE = "mclust")[10:12] } mu <- matrix(temp[[1]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) sigmasq <- temp[[2]] pro <- temp[[3]] sigma <- array(0, c(p, p, G)) for(k in 1:G) sigma[, , k] <- diag(rep(sigmasq[k], p)) WARNING <- NULL if(any(sigmasq > signif(.Machine$double.xmax, 6))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VII", d = p, G = G, sigma = sigma, sigmasq = sigmasq, scale = sigmasq) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "VII", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simVII <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d), modelName = "VII")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) sigmasq <- parameters$variance$sigmasq for(k in 1:G) { m <- ctabel[k] x[clabels == k, ] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% diag(rep(sqrt(sigmasq[k]), d)), MARGIN = 2, STATS = mu[, k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "VII") } meV <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one-dimensional") data <- as.vector(data) n <- length(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal length of data") K <- dimz[2] if(!is.null(Vinv)) { G <- K - 1 if (Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "V", d=1, G=G, sigmasq = rep(NA,G)) parameters <- list(pro=rep(NA,G), mean=rep(NA,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="V", prior=prior, n=n, d=1, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("me1v", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(G), as.double(if(is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(G), double(G), double(K), PACKAGE = "mclust")[6:12] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "V"), prior[names(prior) != "functionName"])) temp <- .Fortran("me1vp", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(G), double(G), double(K), PACKAGE = "mclust")[c(10:16, 9)] } z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] mu <- temp[[5]] names(mu) <- as.character(1:G) sigmasq <- temp[[6]] pro <- temp[[7]] ## logpost <- temp[[8]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6) || any(sigmasq <= max(control$eps, 0))) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) mu[] <- pro[] <- sigmasq[] <- z[] <- loglik <- NA ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- sigmasq[] <- z[] <- loglik <- NA ret <- if(control$equalPro) -2 else -3 } else if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 info <- c(iterations = its, error = err) dimnames(z) <- list(names(data),NULL) variance = list(modelName = "V", d = 1, G = G, sigmasq = sigmasq, scale = sigmasq) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "V", prior = prior, n = n, d = 1, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepV <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one-dimensional") data <- as.vector(data) n <- length(data) ## z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") # number of groups G <- dimz[2] ## if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "V", d=1, G=G, sigmasq=rep(NA,G)) parameters <- list(pro=rep(NA,G), mean=rep(NA,G), variance=variance) return(structure(list(modelName="V", prior=prior, n=n, d=1, G=G, z=z, parameters=parameters), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if(is.null(prior)) { temp <- .Fortran("ms1v", as.double(data), as.double(z), as.integer(n), as.integer(G), double(G), double(G), double(G), PACKAGE = "mclust")[5:7] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "V"), prior[names(prior) != "functionName"])) storage.mode(z) <- "double" temp <- .Fortran("ms1vp", as.double(data), z, as.integer(n), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(G), double(G), double(G), PACKAGE = "mclust")[9:11] } mu <- temp[[1]] names(mu) <- as.character(1:G) sigmasq <- temp[[2]] pro <- temp[[3]] WARNING <- NULL if(any(sigmasq > signif(.Machine$double.xmax, 6))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- sigmasq[] <- z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(names(data),NULL) variance = list(modelName = "V", d = 1, G = G, sigmasq = sigmasq, scale = sigmasq) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "V", prior = prior, n = n, d = 1, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simV <- function(parameters, n, seed = NULL, ...) { if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, 2), modelName = "V")) } if(!is.null(seed)) set.seed(seed) mu <- parameters$mean G <- length(mu) pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- rep(0, n) sd <- sqrt(parameters$variance$sigmasq) for(k in 1:G) { x[clabels == k] <- mu[k] + rnorm(ctabel[k], sd = sd[k]) } structure(cbind(group = clabels, "1" = x), modelName = "V") } cdensVVI <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- as.matrix(parameters$mean) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mu", "variance")]))) || any(is.null(parameters[c("pro", "mu", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "VVI", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape)) stop("variance parameters are missing") temp <- .Fortran("esvvi", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(1), double(n * G), PACKAGE = "mclust")[10:11] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "VVI", WARNING = WARNING, retrinCode = ret) } emVVI <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepVVI(data, parameters = parameters, warn = warn)$z meVVI(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepVVI <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if (is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mu", "variance")]))) || any(is.null(parameters[c("pro", "mu", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "VVI", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape)) stop("variance parameters are missing") temp <- .Fortran("esvvi", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(1), double(n * K), PACKAGE = "mclust")[10:11] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "VVI", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meVVI <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) > 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VVI", d = p, G = G, scale = rep(NA,G), shape = matrix(as.double(NA),p,G)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="VVI", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("mevvi", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(G), double(p * G), double(K), PACKAGE = "mclust")[7:14] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VVI"), prior[names(prior) != "functionName"])) temp <- .Fortran("mevvip", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(G), double(p * G), double(K), PACKAGE = "mclust")[11:18] } z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] mu <- matrix(temp[[5]], p, G) scale <- temp[[6]] shape <- matrix(temp[[7]], p, G) dimnames(mu) <- dimnames(shape) <- list(NULL, as.character(1:G)) pro <- temp[[8]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) sigma <- array(NA, c(p, p, G)) mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } sigma <- array(NA, c(p, p, G)) mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA ret <- if(control$equalPro) -2 else -3 } else { sigma <- array(apply(sweep(shape, MARGIN = 2, STATS = scale, FUN = "*"), 2, diag), c(p, p, G)) if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = err) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VVI", d = p, G = G, sigma = sigma, scale = scale, shape = shape) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "VVI", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepVVI <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VII", d=p, G=G, sigmasq=rep(NA,G)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="VII", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if(is.null(prior)) { temp <- .Fortran("msvvi", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), double(p * G), double(G), double(p * G), double(G), PACKAGE = "mclust")[6:9] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VVI"), prior[names( prior) != "functionName"])) temp <- .Fortran("msvvip", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(p * G), double(G), double(p * G), double(G), PACKAGE = "mclust")[10:13] } mu <- matrix(temp[[1]], p, G) scale <- temp[[2]] shape <- matrix(temp[[3]], p, G) dimnames(mu) <- dimnames(shape) <- list(NULL, as.character(1:G)) pro <- temp[[4]] WARNING <- NULL if(any(c(scale, shape) > signif(.Machine$double.xmax, 6)) || any(! c(scale, shape))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- shape <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else { sigma <- array(apply(sweep(shape, MARGIN = 2, STATS = scale, FUN = "*"), 2, diag), c(p, p, G)) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VVI", d = p, G = G, sigma = sigma, sigmasq = scale, scale = scale, shape = shape) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "VVI", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simVVI <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "VVI")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) rtshape <- sqrt(parameters$variance$shape) if(!all(dim(rtshape) == dim(mu))) stop("shape incompatible with mean") rtscale <- sqrt(parameters$variance$scale) if(length(rtscale) != G) stop("scale incompatible with mean") for(k in 1:G) { m <- ctabel[k] x[clabels == k, ] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% diag(rtscale[k] * rtshape[, k]), MARGIN = 2, STATS = mu[, k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "VVI") } cdensVVV <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- as.matrix(parameters$mean) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "VVV", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$cholsigma)) stop("variance parameters are missing") temp <- .Fortran("esvvv", as.logical(1), as.double(data), as.double(mu), as.double(parameters$variance$cholsigma), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(p), double(1), double(n * G), PACKAGE = "mclust")[10:12] lapackCholInfo <- temp[[1]][1] loglik <- temp[[2]] z <- matrix(temp[[3]], n, G) WARNING <- NULL if(lapackCholInfo) { if(lapackCholInfo > 0) { WARNING <- "sigma is not positive definite" if(warn) warning(WARNING) } else { WARNING <- "input error for LAPACK DPOTRF" if(warn) warning(WARNING) } z[] <- NA ret <- -9 } else if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "VVV", WARNING = WARNING, returnCode = ret) } emVVV <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepVVV(data, parameters = parameters, warn = warn)$z meVVV(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepVVV <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "VVV", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$cholsigma)) stop("variance parameters are missing") temp <- .Fortran("esvvv", as.logical(1), as.double(data), as.double(mu), as.double(parameters$variance$cholsigma), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(p), double(1), double(n * K), PACKAGE = "mclust")[10:12] lapackCholInfo <- temp[[1]][1] loglik <- temp[[2]] z <- matrix(temp[[3]], n, K) WARNING <- NULL if(lapackCholInfo) { if(lapackCholInfo > 0) { WARNING <- "sigma is not positive definite" if(warn) warning(WARNING) } else { WARNING <- "input error for LAPACK DPOTRF" if(warn) warning(WARNING) } z[] <- loglik <- NA ret <- -9 } else if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "VVV", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meVVV <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VVV", d = p, G = G, sigma = array(NA, c(p,p,G)), cholsigma = array(NA, c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="VVV", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("mevvv", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(p * p * G), double(K), double(p), double(p*p), PACKAGE = "mclust")[7:13] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VVV"), prior[names(prior) != "functionName"])) temp <- .Fortran("mevvvp", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(if(any(priorParams$scale != 0)) chol(priorParams$ scale) else priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(p * p * G), double(K), double(p), double(p*p), PACKAGE = "mclust")[c(11:17, 10)] } z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] mu <- matrix(temp[[5]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) cholsigma <- array(temp[[6]], c(p, p, G)) pro <- temp[[7]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { sigma <- array(apply(cholsigma, 3, unchol, upper = TRUE), c(p,p,G)) if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = abs(err)) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- dimnames(cholsigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VVV", d = p, G = G, sigma = sigma, cholsigma = cholsigma) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "VVV", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepVVV <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VVV", d = p, G = G, sigma <- array(NA, c(p,p, G)), cholsigma = array(NA, c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="VVV", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if(is.null(prior)) { temp <- .Fortran("msvvv", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), double(p), double(p * G), double(p * p * G), double(G), double(p * p), PACKAGE = "mclust")[7:9] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VVV"), prior[names(prior) != "functionName"])) temp <- .Fortran("msvvvp", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(if(any(priorParams$scale != 0)) chol(priorParams$ scale) else priorParams$scale), as.double(priorParams$dof), double(p), double(p * G), double(p * p * G), double(G), double(p * p), PACKAGE = "mclust")[11:13] } mu <- matrix(temp[[1]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) cholsigma <- array(temp[[2]], c(p, p, G)) pro <- temp[[3]] WARNING <- NULL if(any(c(mu, cholsigma) > signif(.Machine$double.xmax, 6))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- sigma[] <- cholsigma[] <- NA ret <- -1 } else { sigma <- array(apply(cholsigma, 3, unchol, upper = TRUE), c(p,p,G)) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) dimnames(cholsigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VVV", d = p, G = G, sigma = sigma, cholsigma= cholsigma) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "VVV", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simVVV <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "VVV")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) if(is.null(cholsigma <- parameters$variance$cholsigma)) { if(is.null(sigma <- parameters$variance$sigma)) { stop("variance parameters must inlcude either sigma or cholsigma" ) } cholsigma <- apply(sigma, 3, chol) for(k in 1:ncol(cholsigma)) sigma[, , k] <- cholsigma[, k] cholsigma <- sigma } if(dim(cholsigma)[3] != G) stop("variance incompatible with mean") for(k in 1:G) { m <- ctabel[k] x[clabels == k,] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% cholsigma[,,k], MARGIN = 2, STATS = mu[,k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "VVV") } # single component univariate case mvnX <- function(data, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one dimensional") data <- as.vector(data) n <- length(data) if(is.null(prior)) { temp <- .Fortran("mvn1d", as.double(data), as.integer(n), double(1), double(1), double(1), PACKAGE = "mclust")[3:5] logpost <- NULL } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = 1, modelName = "X"), prior[names(prior) != "functionName"])) temp <- .Fortran("mvn1p", as.double(data), as.integer(n), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(1), double(1), double(1), PACKAGE = "mclust")[c(7:9, 6)] logpost <- temp[[4]] } mu <- temp[[1]] sigmasq <- temp[[2]] loglik <- temp[[3]] ret <- 0 WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "sigma-squared vanishes" if(warn) warning(WARNING) loglik <- NA ret <- -1 } variance = list(modelName= "X", d = 1, G = 1, sigmasq = sigmasq) parameters <- list(pro = 1, mean = mu, variance = variance) structure(list(modelName = "X", prior = prior, n = n, d = 1, G = 1, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } cdensX <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { call <- match.call() mc <- match.call(expand.dots = FALSE) mc[[1]] <- as.name("cdensE") z <- eval(mc, parent.frame()) attr(z, "modelName") <- "X" return(z) } emX <- function(data, prior = NULL, warn = NULL, ...) { mvnX(data = data, prior = prior, warn = warn, ...) } meX <- emX # single component multivariate case with diagonal common variance mvnXII <- function(data, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD) stop("for multidimensional data only") if(length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) if(is.null(prior)) { temp <- .Fortran("mvnxii", as.double(data), as.integer(n), as.integer(p), double(p), double(1), double(1), PACKAGE = "mclust")[4:6] logpost <- NULL } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = 1, modelName = "XII"), prior[names(prior) != "functionName"])) temp <- .Fortran("mnxiip", as.double(data), as.integer(n), as.integer(p), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(p), double(1), double(1), PACKAGE = "mclust")[c(8:10, 7)] logpost <- temp[[4]] } mu <- temp[[1]] sigmasq <- temp[[2]] loglik <- temp[[3]] Sigma <- sigmasq * diag(p) ret <- 0 WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) loglik <- NA ret <- -1 } variance <- list(modelName = "XII", d = p, G = 1, sigmasq = sigmasq, Sigma = Sigma, sigma = array(Sigma, c(p, p, 1)), scale = sigmasq) parameters <- list(pro = 1, mean = matrix(mu, ncol = 1), variance = variance) structure(list(modelName = "XII", prior = prior, n = n, d = p, G = 1, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } cdensXII <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { call <- match.call() mc <- match.call(expand.dots = FALSE) mc[[1]] <- as.name("cdensEII") z <- eval(mc, parent.frame()) attr(z, "modelName") <- "XII" return(z) } emXII <- function(data, prior = NULL, warn = NULL, ...) { mvnXII(data = data, prior = prior, warn = warn, ...) } meXII <- emXII # single component multivariate case with diagonal different variances mvnXXI <- function(data, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD) stop("for multidimensional data only") if(length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) if(is.null(prior)) { temp <- .Fortran("mvnxxi", as.double(data), as.integer(n), as.integer(p), double(p), double(1), double(p), double(1), PACKAGE = "mclust")[4:7] logpost <- NULL } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = 1, modelName = "XXI"), prior[names(prior) != "functionName"])) temp <- .Fortran("mnxxip", as.double(data), as.integer(n), as.integer(p), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(p), double(1), double(p), double(1), PACKAGE = "mclust")[c(8:11, 7)] logpost <- temp[[5]] } mu <- temp[[1]] scale <- temp[[2]] shape <- temp[[3]] loglik <- temp[[4]] Sigma <- diag(scale * shape) ret <- 0 WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) loglik <- NA ret <- -1 } variance <- list(modelName = "XXI", d = p, G = 1, Sigma = Sigma, sigma = array(Sigma, c(p, p, 1)), scale = scale, shape = shape) parameters <- list(pro = 1, mean = matrix(mu, ncol = 1), variance = variance) structure(list(modelName = "XXI", prior = prior, n = n, d = p, G = 1, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } cdensXXI <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { call <- match.call() mc <- match.call(expand.dots = FALSE) mc[[1]] <- as.name("cdensEEI") z <- eval(mc, parent.frame()) attr(z, "modelName") <- "XXI" return(z) } emXXI <- function(data, prior = NULL, warn = NULL, ...) { mvnXXI(data = data, prior = prior, warn = warn, ...) } meXXI <- emXXI # single component multivariate case with full covariance matrix mvnXXX <- function(data, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD) stop("for multidimensional data only") if(length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) if(is.null(prior)) { temp <- .Fortran("mvnxxx", as.double(data), as.integer(n), as.integer(p), double(p), double(p * p), double(1), PACKAGE = "mclust")[c(4:6)] logpost <- NULL } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = 1, modelName = "XXX"), prior[names(prior) != "functionName"])) temp <- .Fortran("mnxxxp", as.double(data), as.integer(n), as.integer(p), double(p), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(if(any(priorParams$scale != 0)) chol(priorParams$scale) else priorParams$scale), as.double(priorParams$dof), double(p), double(p * p), double(1), PACKAGE = "mclust")[c(9:11, 8)] logpost <- temp[[4]] } mu <- temp[[1]] cholSigma <- matrix(temp[[2]], p, p) Sigma <- unchol(cholSigma, upper = TRUE) loglik <- temp[[3]] ## Sigma = t(cholSigma) %*% cholSigma ret <- 0 WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) loglik <- NA ret <- -1 } variance <- list(modelName = "XXX", d = p, G = 1, Sigma = Sigma, cholSigma = cholSigma, cholsigma = cholSigma, sigma = array(Sigma, c(p, p, 1))) parameters <- list(pro = 1, mean = matrix(mu, ncol = 1), variance = variance) structure(list(modelName = "XXX", prior = prior, n = n, d = p, G = 1, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } cdensXXX <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { call <- match.call() mc <- match.call(expand.dots = FALSE) mc[[1]] <- as.name("cdensEEE") z <- eval(mc, parent.frame()) attr(z, "modelName") <- "XXX" return(z) } emXXX <- function(data, prior = NULL, warn = NULL, ...) { mvnXXX(data = data, prior = prior, warn = warn, ...) } meXXX <- emXXX mclust/R/bootstrap.R0000644000175000017500000004277114125667145014302 0ustar nileshnilesh## ## Resampling methods ## # # Bootstrap Likelihood Ratio Test # mclustBootstrapLRT <- function(data, modelName = NULL, nboot = 999, level = 0.05, maxG = NULL, verbose = interactive(), ...) { if(is.null(modelName)) stop("A 'modelName' must be provided. Please see help(mclustModelNames) which describes the available models.") modelName <- modelName[1] checkModelName(modelName) if(grepl("X", modelName)) stop("Specified 'modelName' is only valid for one-component mixture.") if(is.null(maxG)) { G <- seq.int(1, 9) } else { maxG <- as.numeric(maxG) G <- seq.int(1, maxG+1) } BIC <- mclustBIC(data, G = G, modelNames = modelName, warn = FALSE, verbose = FALSE, ...) if(!(modelName %in% attr(BIC, "modelNames"))) stop("'modelName' not compatibile with data. Please see help(mclustModelNames) which describes the available models.") # select only sequential models that can be fit bic <- BIC[, attr(BIC, "modelNames") == modelName] G <- G[!is.na(BIC)] if(length(G) == 0) stop(paste("no model", modelName, "can be fitted.")) if(all(G == 1)) { warning("only 1-component model could be fitted. No LRT is performed!") return() } if(sum(is.na(bic)) > 0) warning("some model(s) could not be fitted!") if(verbose) { flush.console() cat("bootstrapping LRTS ...\n") pbar <- txtProgressBar(min = 0, max = (max(G)-1)*nboot, style = 3) on.exit(close(pbar)) } obsLRTS <- p.value <- vector("numeric", length = max(G)-1) bootLRTS <- matrix(as.double(NA), nrow = nboot, ncol = max(G)-1) g <- 0; continue <- TRUE while(g < (max(G)-1) & continue) { g <- g + 1 # fit model under H0 Mod0 <- summary(BIC, data, G = g, modelNames = modelName) # fit model under H1 Mod1 <- summary(BIC, data, G = g+1, modelNames = modelName) # observed LRTS obsLRTS[g] <- 2*(Mod1$loglik - Mod0$loglik) # bootstrap b <- 0 while(b < nboot) { b <- b + 1 # generate 'parametric' bootstrap sample under H0 bootSample <- sim(Mod0$modelName, Mod0$parameters, n = Mod0$n) # fit model under H0 bootMod0 <- em(data = bootSample[,-1], modelName = Mod0$modelName, parameters = Mod0$parameters, warn = FALSE, ...) # fit model under H1 bootMod1 <- em(data = bootSample[,-1], modelName = Mod1$modelName, parameters = Mod1$parameters, warn = FALSE, ...) # compute bootstrap LRT LRTS <- 2*(bootMod1$loglik - bootMod0$loglik) if(is.na(LRTS)) { b <- b - 1; next() } bootLRTS[b,g] <- LRTS if(verbose) setTxtProgressBar(pbar, (g-1)*nboot+b) } p.value[g] <- (1 + sum(bootLRTS[,g] >= obsLRTS[g]))/(nboot+1) # check if not-significant when no maxG is provided if(is.null(maxG) & p.value[g] > level) { continue <- FALSE if(verbose) setTxtProgressBar(pbar, (max(G)-1)*nboot) } } out <- list(G = 1:g, modelName = modelName, obs = obsLRTS[1:g], boot = bootLRTS[,1:g,drop=FALSE], p.value = p.value[1:g]) class(out) <- "mclustBootstrapLRT" return(out) } print.mclustBootstrapLRT <- function(x, ...) { txt <- paste(rep("-", min(61, getOption("width"))), collapse = "") catwrap(txt) catwrap("Bootstrap sequential LRT for the number of mixture components") catwrap(txt) cat(formatC("Model", flag = "-", width = 12), "=", x$modelName, "\n") cat(formatC("Replications", flag = "-", width = 12), "=", nrow(x$boot), "\n") df <- data.frame(x$obs, x$p.value) colnames(df) <- c("LRTS", "bootstrap p-value") rownames(df) <- formatC(paste(x$G, "vs", x$G+1), flag = "-", width = 8) print(df, ...) } plot.mclustBootstrapLRT <- function(x, G = 1, hist.col = "grey", hist.border = "lightgrey", breaks = "Scott", col = "forestgreen", lwd = 2, lty = 3, main = NULL, ...) { if(!any(G == x$G)) { warning(paste("bootstrap LRT not available for G =", G)) return() } G <- as.numeric(G)[1] h <- hist(x$boot[,G], breaks = breaks, plot = FALSE) xlim <- range(h$breaks, x$boot[,G], x$obs[G], na.rm = TRUE) xlim <- extendrange(xlim, f = 0.05) plot(h, xlab = "LRTS", freq = FALSE, xlim = xlim, col = hist.col, border = hist.border, main = NULL) box() abline(v = x$obs[G], lty = lty, lwd = lwd, col = col) if(is.null(main) | is.character(main)) { if(is.null(main)) main <- paste("Bootstrap LRT for model", x$modelName, "with", G, "vs", G+1, "components") title(main = main, cex.main = 1) } invisible() } # # Bootstrap inference (standard errors and percentile confidence intervals) # MclustBootstrap <- function(object, nboot = 999, type = c("bs", "wlbs", "pb", "jk"), max.nonfit = 10*nboot, verbose = interactive(), ...) { if(!any(class(object) %in% c("Mclust", "densityMclust"))) stop("object must be of class 'Mclust' or 'densityMclust'") if(any(type %in% c("nonpara", "wlb"))) { type <- gsub("nonpara", "bs", type) type <- gsub("wlb", "wlbs", type) warning("resampling type converted to \"", type, "\"") } type <- match.arg(type, choices = eval(formals(MclustBootstrap)$type)) # data <- object$data n <- object$n d <- object$d G <- object$G if(type == "jk") nboot <- n varnames <- rownames(object$parameters$mean) # model parameters par <- summary(object)[c("pro", "mean", "variance")] if(d == 1) { par$mean <- array(par$mean, dim = c(d, G)) par$variance <- array(par$variance, dim = c(d, d, G)) } # bootstrapped parameters pro.boot <- array(NA, c(nboot,G), dimnames = list(NULL, seq.int(G))) mean.boot <- array(NA, c(nboot,d,G), dimnames = list(NULL, varnames, seq.int(G))) var.boot <- array(NA, c(nboot,d,d,G), dimnames = list(NULL, varnames, varnames, seq.int(G))) if(verbose) { cat("resampling ...\n") flush.console() pbar <- txtProgressBar(min = 0, max = nboot, style = 3) on.exit(close(pbar)) } b <- nonfit <- 0 while(b < nboot & nonfit < max.nonfit) { b <- b + 1 obj <- object switch(type, "bs" = { idx <- sample(seq_len(n), size = n, replace = TRUE) obj$data <- object$data[idx,] obj$z <- object$z[idx,] obj$warn <- FALSE mod.boot <- try(do.call("me", obj), silent = TRUE) }, "wlbs" = { w <- rexp(n) # w <- w/mean(w) w <- w/max(w) mod.boot <- try(do.call("me.weighted", c(list(weights = w, warn = FALSE), obj)), silent = TRUE) }, "pb" = { obj$data <- do.call("sim", object)[,-1,drop=FALSE] obj$z <- predict(obj)$z obj$warn <- FALSE mod.boot <- try(do.call("me", obj), silent = TRUE) }, "jk" = { idx <- seq_len(n)[-b] obj$data <- object$data[idx,] obj$z <- object$z[idx,] obj$warn <- FALSE mod.boot <- try(do.call("me", obj), silent = TRUE) } ) # check model convergence if(inherits(mod.boot, "try-error")) { if(type != "jk") b <- b - 1 nonfit <- nonfit + 1 next() } if(is.na(mod.boot$loglik)) { if(type != "jk") b <- b - 1 nonfit <- nonfit + 1 next() } if(type == "jk") { # pseudovalues ... # pro.boot[b,] <- n*par$pro - (n-1)*mod.boot$parameters$pro # mean.boot[b,,] <- n*par$mean - (n-1)*mod.boot$parameters$mean # var.boot[b,,,] <- n*par$variance - (n-1)*mod.boot$parameters$variance$sigma pro.boot[b,] <- mod.boot$parameters$pro mean.boot[b,,] <- mod.boot$parameters$mean var.boot[b,,,] <- mod.boot$parameters$variance$sigma } else { # bootstrap values pro.boot[b,] <- mod.boot$parameters$pro mean.boot[b,,] <- mod.boot$parameters$mean var.boot[b,,,] <- mod.boot$parameters$variance$sigma } if(verbose) setTxtProgressBar(pbar, b) } out <- list(G = G, modelName = object$modelName, parameters = par, nboot = nboot, type = type, nonfit = nonfit, pro = pro.boot, mean = mean.boot, variance = var.boot) class(out) <- "MclustBootstrap" return(out) } print.MclustBootstrap <- function(x, digits = getOption("digits"), ...) { cat("\'", class(x)[1], "\' object:\n", sep = "") str(x, max.level = 1, give.attr = FALSE, strict.width = "wrap") invisible() } summary.MclustBootstrap <- function(object, what = c("se", "ci", "ave"), conf.level = 0.95, ...) { what <- match.arg(what, choices = eval(formals(summary.MclustBootstrap)$what)) dims <- dim(object$mean) # varnames <- dimnames(object$mean)[[2]] nboot <- dims[1] d <- dims[2] G <- dims[3] switch(what, "se" = { out <- list(pro = apply(object$pro, 2, sd, na.rm=TRUE), mean = apply(object$mean, c(2,3), sd, na.rm=TRUE), variance = apply(object$variance, c(2,3,4), sd, na.rm=TRUE)) if(object$type == "jk") out <- lapply(out, function(x) sqrt(x^2*(nboot-object$nonfit-1)^2/(nboot-object$nonfit))) }, "ave" = { out <- list(pro = apply(object$pro, 2, mean, na.rm=TRUE), mean = apply(object$mean, c(2,3), mean, na.rm=TRUE), variance = apply(object$variance, c(2,3,4), mean, na.rm=TRUE)) }, "ci" = { levels <- c((1-conf.level)/2, (1 + conf.level)/2) if(object$type == "jk") { # bias-corrected ci based on normal-approximation ave <- list(pro = apply(object$pro, 2, mean, na.rm=TRUE), mean = apply(object$mean, c(2,3), mean, na.rm=TRUE), variance = t(sapply(seq.int(d), function(j) apply(object$variance[,j,j,], 2, mean, na.rm=TRUE), simplify = "array"))) se <- list(pro = apply(object$pro, 2, sd, na.rm=TRUE), mean = apply(object$mean, c(2,3), sd, na.rm=TRUE), variance = t(sapply(seq.int(d), function(j) apply(object$variance[,j,j,], 2, sd, na.rm=TRUE), simplify = "array"))) se <- lapply(se, function(x) sqrt(x^2*(nboot-object$nonfit-1)^2/(nboot-object$nonfit))) zq <- qnorm(max(levels)) lnames <- paste0(formatC(levels * 100, format = "fg", width = 1, digits = getOption("digits")), "%") # the code above mimic stats:::format_perc(levels) which can't be used # because format_perc is not exported from stats out <- list(pro = array(as.double(NA), c(2,G), dimnames = list(lnames, 1:G)), mean = array(as.double(NA), dim = c(2,d,G), dimnames = list(lnames, 1:d, 1:G)), variance = array(as.double(NA), dim = c(2,d,G), dimnames = list(lnames, 1:d, 1:G))) out$pro[1,] <- ave$pro - zq*se$pro out$pro[2,] <- ave$pro + zq*se$pro out$mean[1,,] <- ave$mean - zq*se$mean out$mean[2,,] <- ave$mean + zq*se$mean out$variance[1,,] <- ave$variance - zq*se$variance out$variance[2,,] <- ave$variance + zq*se$variance } else { # percentile-based ci out <- list(pro = apply(object$pro, 2, quantile, probs = levels, na.rm=TRUE), mean = apply(object$mean, c(2,3), quantile, probs = levels, na.rm=TRUE)) v <- array(as.double(NA), dim = c(2,d,G), dimnames = dimnames(out$mean)) for(j in seq.int(d)) v[,j,] <- apply(object$variance[,j,j,], 2, quantile, probs = levels, na.rm=TRUE) out$variance <- v } } ) obj <- append(object[c("modelName", "G", "nboot", "type")], list(d = d, what = what)) if(what == "ci") obj$conf.level <- conf.level obj <- append(obj, out) class(obj) <- "summary.MclustBootstrap" return(obj) } print.summary.MclustBootstrap <- function(x, digits = getOption("digits"), ...) { txt <- paste(rep("-", min(58, getOption("width"))), collapse = "") catwrap(txt) catwrap(paste("Resampling", switch(x$what, "se" = "standard errors", "ave" = "averages", "ci" = "confidence intervals"))) catwrap(txt) # cat(formatC("Model", flag = "-", width = 26), "=", x$modelName, "\n") cat(formatC("Num. of mixture components", flag = "-", width = 26), "=", x$G, "\n") cat(formatC("Replications", flag = "-", width = 26), "=", x$nboot, "\n") cat(formatC("Type", flag = "-", width = 26), "=", switch(x$type, "bs" = "nonparametric bootstrap", "wlbs" = "weighted likelihood bootstrap", "pb" = "parametric bootstrap", "jk" = "jackknife"), "\n") if(x$what == "ci") cat(formatC("Confidence level", flag = "-", width = 26), "=", x$conf.level, "\n") # cat("\nMixing probabilities:\n") print(x$pro, digits = digits) # cat("\nMeans:\n") if(x$d == 1) { if(x$what == "se" | x$what == "ave") print(x$mean[1,], digits = digits) else print(x$mean[,1,], digits = digits) } else if(x$what == "se" | x$what == "ave") print(x$mean, digits = digits) else { for(g in seq.int(x$G)) { cat("[,,", g, "]\n", sep = "") print(x$mean[,,g], digits = digits) } } # cat("\nVariances:\n") if(x$d == 1) { print(x$variance[,1,], digits = digits) } else { for(g in seq.int(x$G)) { cat("[,,", g, "]\n", sep = "") print(x$variance[,,g], digits = digits) } } invisible(x) } plot.MclustBootstrap <- function(x, what = c("pro", "mean", "var"), show.parest = TRUE, show.confint = TRUE, hist.col = "grey", hist.border = "lightgrey", breaks = "Sturges", col = "forestgreen", lwd = 2, lty = 3, xlab = NULL, xlim = NULL, ylim = NULL, ...) { object <- x # Argh. Really want to use object anyway what <- match.arg(what, choices = eval(formals(plot.MclustBootstrap)$what)) par <- object$parameters d <- dim(object$mean)[2] varnames <- rownames(par$mean) if(show.confint) { ci <- summary(object, what = "ci", ...) ave <- summary(object, what = "ave", ...) } histBoot <- function(boot, stat, ci, ave, breaks, xlim, ylim, xlab, ...) { hist(boot, breaks = breaks, xlim = xlim, ylim = ylim, main = "", xlab = xlab, ylab = "", border = hist.border, col = hist.col) box() if(show.parest) abline(v = stat, col = col, lwd = lwd, lty = lty) if(show.confint) { lines(ci, rep(par("usr")[3]/2,2), lwd = lwd, col = col) points(ave, par("usr")[3]/2, pch = 15, col = col) } } switch(what, "pro" = { xlim <- range(if(is.null(xlim)) pretty(object$pro) else xlim) for(k in 1:object$G) histBoot(object$pro[,k], breaks = breaks, stat = par$pro[k], ci = ci$pro[,k], ave = ave$pro[k], xlim = xlim, ylim = ylim, xlab = ifelse(is.null(xlab), paste("Mix. prop. for comp.",k), xlab)) }, "mean" = { isNull_xlim <- is.null(xlim) for(j in 1:d) { xlim <- range(if(isNull_xlim) pretty(object$mean[,j,]) else xlim) for(k in 1:object$G) histBoot(object$mean[,j,k], breaks = breaks, stat = par$mean[j,k], ci = ci$mean[,j,k], ave = ave$mean[j,k], xlim = xlim, ylim = ylim, xlab = ifelse(is.null(xlab), paste(varnames[j], "mean for comp.",k), xlab)) } }, "var" = { isNull_xlim <- is.null(xlim) for(j in 1:d) { xlim <- range(if(isNull_xlim) pretty(object$variance[,j,j,]) else xlim) for(k in 1:object$G) histBoot(object$variance[,j,j,k], breaks = breaks, stat = par$variance[j,j,k], ci = ci$variance[,j,k], ave = ave$variance[j,k], xlim = xlim, ylim = ylim, xlab = ifelse(is.null(xlab), paste(varnames[j], "var. for comp.",k), xlab)) } } ) invisible() } mclust/R/densityMclust.R0000644000175000017500000004045214156724500015117 0ustar nileshnileshdensityMclust <- function(data, ..., plot = TRUE) { mc <- match.call() obj <- Mclust(data, ...) if(is.null(obj)) return(obj) obj$call <- mc obj$density <- dens(data = obj$data, modelName = obj$modelName, parameters = obj$parameters, logarithm = FALSE) class(obj) <- c("densityMclust", "Mclust") if(plot) plot(obj, what = "density") return(obj) } predict.densityMclust <- function(object, newdata, what = c("dens", "cdens", "z"), logarithm = FALSE, ...) { if(!inherits(object, "densityMclust")) stop("object not of class 'densityMclust'") if(missing(newdata)) { newdata <- object$data } newdata <- as.matrix(newdata) if(ncol(object$data) != ncol(newdata)) { stop("newdata must match ncol of object data") } what <- match.arg(what, choices = eval(formals(predict.densityMclust)$what)) pro <- object$parameters$pro; pro <- pro/sum(pro) noise <- (!is.na(object$hypvol)) cl <- c(seq(object$G), if(noise) 0) switch(what, "dens" = { out <- dens(data = newdata, modelName = object$modelName, parameters = object$parameters, logarithm = logarithm) }, "cdens" = { z <- cdens(data = newdata, modelName = object$modelName, parameters = object$parameters, logarithm = TRUE) z <- if(noise) cbind(z, log(object$parameters$Vinv)) else cbind(z) # drop redundant attributes colnames(z) <- cl out <- if(!logarithm) exp(z) else z }, "z" = { z <- cdens(data = newdata, modelName = object$modelName, parameters = object$parameters, logarithm = TRUE) z <- if(noise) cbind(z, log(object$parameters$Vinv)) else cbind(z) # drop redundant attributes z <- sweep(z, MARGIN = 2, FUN = "+", STATS = log(pro)) z <- sweep(z, MARGIN = 1, FUN = "-", STATS = apply(z, 1, logsumexp)) colnames(z) <- cl out <- if(!logarithm) exp(z) else z } ) return(out) } plot.densityMclust <- function(x, data = NULL, what = c("BIC", "density", "diagnostic"), ...) { object <- x # Argh. Really want to use object anyway what <- match.arg(what, several.ok = TRUE) if(object$d > 1) what <- setdiff(what, "diagnostic") oldpar <- par(no.readonly = TRUE) # on.exit(par(oldpar)) plot.densityMclust.density <- function(...) { if(object$d == 1) plotDensityMclust1(object, data = data, ...) else if(object$d == 2) plotDensityMclust2(object, data = data, ...) else plotDensityMclustd(object, data = data, ...) } plot.densityMclust.bic <- function(...) { plot.mclustBIC(object$BIC, ...) } plot.densityMclust.diagnostic <- function(...) { densityMclust.diagnostic(object, ...) } if(interactive() & length(what) > 1) { title <- "Model-based density estimation plots:" # present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) while(choice != 0) { if(what[choice] == "BIC") plot.densityMclust.bic(...) if(what[choice] == "density") plot.densityMclust.density(...) if(what[choice] == "diagnostic") plot.densityMclust.diagnostic(...) # re-present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) } } else { if(any(what == "BIC")) plot.densityMclust.bic(...) if(any(what == "density")) plot.densityMclust.density(...) if(any(what == "diagnostic")) plot.densityMclust.diagnostic(...) } invisible() } plotDensityMclust1 <- function(x, data = NULL, col = gray(0.3), hist.col = "lightgrey", hist.border = "white", breaks = "Sturges", ...) { object <- x # Argh. Really want to use object anyway mc <- match.call(expand.dots = TRUE) mc$x <- mc$data <- mc$col <- mc$hist.col <- mc$hist.border <- mc$breaks <- NULL xlab <- mc$xlab if(is.null(xlab)) xlab <- deparse(object$call$data) ylab <- mc$ylab if(is.null(ylab)) ylab <- "Density" # xrange <- extendrange(object$data, f = 0.1) xlim <- eval(mc$xlim, parent.frame()) if(!is.null(xlim)) xrange <- range(xlim) ylim <- eval(mc$ylim, parent.frame()) # eval.points <- seq(from = xrange[1], to = xrange[2], length = 1000) d <- predict.densityMclust(object, eval.points) # if(!is.null(data)) { h <- hist(data, breaks = breaks, plot = FALSE) plot(h, freq = FALSE, col = hist.col, border = hist.border, main = "", xlim = range(h$breaks, xrange), ylim = if(!is.null(ylim)) range(ylim) else range(0, h$density, d), xlab = xlab, ylab = ylab) box() mc[[1]] <- as.name("lines") mc$x <- eval.points mc$y <- d mc$type <- "l" mc$col <- col eval(mc, parent.frame()) } else { mc[[1]] <- as.name("plot") mc$x <- eval.points mc$y <- d mc$type <- "l" mc$col <- col mc$xlim <- xlim mc$ylim <- if(!is.null(ylim)) range(ylim) else range(0, d) mc$ylab <- ylab mc$xlab <- xlab eval(mc, parent.frame()) } invisible() } plotDensityMclust2 <- function(x, data = NULL, nlevels = 11, levels = NULL, prob = c(0.25, 0.5, 0.75), points.pch = 1, points.col = 1, points.cex = 0.8, ...) { # This function call surfacePlot() with a suitable modification of arguments object <- x # Argh. Really want to use object anyway mc <- match.call(expand.dots = TRUE) mc$x <- mc$points.pch <- mc$points.col <- mc$points.cex <- NULL mc$nlevels <- nlevels mc$levels <- levels if(!is.null(mc$type)) if(mc$type == "level") mc$type <- "hdr" # TODO: to be removed at certain point if(isTRUE(mc$type == "hdr")) { mc$levels <- c(sort(hdrlevels(object$density, prob)), 1.1*max(object$density)) mc$nlevels <- length(mc$levels) } if(is.null(data)) { addPoints <- FALSE mc$data <- object$data } else { data <- as.matrix(data) stopifnot(ncol(data) == ncol(object$data)) addPoints <- TRUE } # set mixture parameters par <- object$parameters # these parameters should be missing par$variance$cholSigma <- par$Sigma <- NULL if(is.null(par$pro)) par$pro <- 1 par$variance$cholsigma <- par$variance$sigma for(k in seq(par$variance$G)) { par$variance$cholsigma[,,k] <- chol(par$variance$sigma[,,k]) } mc$parameters <- par # now surfacePlot() is called mc[[1]] <- as.name("surfacePlot") out <- eval(mc, parent.frame()) if(addPoints) points(data, pch = points.pch, col = points.col, cex = points.cex) # invisible(out) } plotDensityMclustd <- function(x, data = NULL, nlevels = 11, levels = NULL, prob = c(0.25, 0.5, 0.75), points.pch = 1, points.col = 1, points.cex = 0.8, gap = 0.2, ...) { # This function call surfacePlot() with a suitable modification of arguments object <- x # Argh. Really want to use object anyway mc <- match.call(expand.dots = TRUE) mc$x <- mc$points.pch <- mc$points.col <- mc$points.cex <- mc$gap <- NULL mc$nlevels <- nlevels mc$levels <- levels mc$prob <- prob if(!is.null(mc$type)) if(mc$type == "level") mc$type <- "hdr" # TODO: to be removed at certain point if(is.null(data)) { data <- mc$data <- object$data addPoints <- FALSE } else { data <- as.matrix(data) stopifnot(ncol(data) == ncol(object$data)) addPoints <- TRUE } nc <- object$d oldpar <- par(mfrow = c(nc, nc), mar = rep(gap/2,4), oma = rep(3, 4), no.readonly = TRUE) on.exit(par(oldpar)) for(i in seq(nc)) { for(j in seq(nc)) { if(i == j) { plot(data[,c(i,j)], type="n", xlab = "", ylab = "", axes=FALSE) text(mean(par("usr")[1:2]), mean(par("usr")[3:4]), colnames(data)[i], cex = 1.5, adj = 0.5) box() } else { # set mixture parameters par <- object$parameters if(is.null(par$pro)) par$pro <- 1 par$mean <- par$mean[c(j,i),,drop=FALSE] par$variance$d <- 2 sigma <- array(dim = c(2, 2, par$variance$G)) for(g in seq(par$variance$G)) sigma[,,g] <- par$variance$sigma[c(j,i),c(j,i),g] par$variance$sigma <- sigma par$variance$Sigma <- NULL par$variance$cholSigma <- NULL par$variance$cholsigma <- NULL mc$parameters <- par mc$data <- object$data[,c(j,i)] mc$axes <- FALSE mc[[1]] <- as.name("surfacePlot") eval(mc, parent.frame()) box() if(addPoints & (j > i)) points(data[,c(j,i)], pch = points.pch, col = points.col, cex = points.cex) } if(i == 1 && (!(j%%2))) axis(3) if(i == nc && (j%%2)) axis(1) if(j == 1 && (!(i%%2))) axis(2) if(j == nc && (i%%2)) axis(4) } } # invisible() } dens <- function(data, modelName, parameters, logarithm = FALSE, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") # aux <- list(...) logcden <- cdens(data = data, modelName = modelName, parameters = parameters, logarithm = TRUE, warn = warn) pro <- parameters$pro if(is.null(pro)) stop("mixing proportions must be supplied") noise <- (!is.null(parameters$Vinv)) if(noise) { proNoise <- pro[length(pro)] pro <- pro[-length(pro)] } if(any(proz <- pro == 0)) { pro <- pro[!proz] logcden <- logcden[, !proz, drop = FALSE] } logcden <- sweep(logcden, 2, FUN = "+", STATS = log(pro)) # logsumexp maxlog <- apply(logcden, 1, max) logcden <- sweep(logcden, 1, FUN = "-", STATS = maxlog) logden <- log(apply(exp(logcden), 1, sum)) + maxlog # if(noise) logden <- log(exp(logden) + proNoise*parameters$Vinv) out <- if(logarithm) logden else exp(logden) return(out) } cdens <- function(data, modelName, parameters, logarithm = FALSE, warn = NULL, ...) { modelName <- switch(EXPR = modelName, X = "E", XII = "EII", XXI = "EEI", XXX = "EEE", modelName) checkModelName(modelName) funcName <- paste("cdens", modelName, sep = "") mc <- match.call(expand.dots = TRUE) mc[[1]] <- as.name(funcName) mc$modelName <- NULL eval(mc, parent.frame()) } densityMclust.diagnostic <- function(object, type = c("cdf", "qq"), col = c("black", "black"), lwd = c(2,1), lty = c(1,1), legend = TRUE, grid = TRUE, ...) { # Diagnostic plots for density estimation # (only available for the one-dimensional case) # # Arguments: # object = a 'densityMclust' object # type = type of diagnostic plot: # "cdf" = fitted CDF vs empirical CDF # "qq" = fitted CDF evaluated over the observed data points vs # the quantile from a uniform distribution # # Reference: # Loader C. (1999), Local Regression and Likelihood. New York, Springer, # pp. 87-90) if(!any(class(object) == "densityMclust")) { stop("first argument must be an object of class 'densityMclust'") } if(object$d > 1) { warning("only available for one-dimensional data") return() } type <- match.arg(type, c("cdf", "qq"), several.ok = TRUE) # main <- if(is.null(main) || is.character(main)) FALSE else as.logical(main) data <- as.numeric(object$data) n <- length(data) cdf <- cdfMclust(object, data = data, ngrid = min(n*10,1000), ...) oldpar <- par(no.readonly = TRUE) if(interactive() & length(type) > 1) { par(ask = TRUE) on.exit(par(oldpar)) } if(any(type == "cdf")) { # Fitted CDF vs Emprical CDF empcdf <- ecdf(data) plot(empcdf, do.points = FALSE, verticals = TRUE, col = col[2], lwd = lwd[2], lty = lty[2], xlab = deparse(object$call$data), ylab = "Cumulative Distribution Function", panel.first = if(grid) grid(equilogs=FALSE) else NULL, main = NULL, ...) # if(main) title(main = "CDF plot", cex.main = 1.1) lines(cdf, col = col[1], lwd = lwd[1], lty = lty[1]) rug(data) if(legend) { legend("bottomright", legend = c("Estimated CDF", "Empirical CDF"), ncol = 1, inset = 0.05, cex = 0.8, col = col, lwd = lwd, lty = lty) } } if(any(type == "qq")) { # Q-Q plot q <- quantileMclust(object, p = ppoints(n)) plot(q, sort(data), xlab = "Quantiles from estimated density", ylab = "Sample Quantiles", panel.first = if(grid) grid(equilogs=FALSE) else NULL, main = NULL, ...) # add qq-line Q.y <- quantile(sort(data), probs = c(.25,.75)) Q.x <- quantileMclust(object, p = c(.25,.75)) b <- (Q.y[2] - Q.y[1])/(Q.x[2] - Q.x[1]) a <- Q.y[1] - b*Q.x[1] abline(a, b, untf = TRUE, col = 1, lty = 2) # old method to draw qq-line # with(list(y = sort(data), x = q), # { i <- (y > quantile(y, 0.25) & y < quantile(y, 0.75)) # abline(lm(y ~ x, subset = i), lty = 2) # }) # P-P plot # cdf <- cdfMclust(object, data, ...) # plot(seq(1,n)/(n+1), cdf$y, xlab = "Uniform quantiles", # ylab = "Cumulative Distribution Function", # panel.first = if(grid) grid(equilogs=FALSE) else NULL) # abline(0, 1, untf = TRUE, col = col[2], lty = lty[1]) } invisible() } cdfMclust <- function(object, data, ngrid = 100, ...) { # Cumulative Density Function # (only available for the one-dimensional case) # # Returns the estimated CDF evaluated at points given by the optional # argument data. If not provided, a regular grid of ngrid points is used. # # Arguments: # object = a 'densityMclust' object # data = the data vector # ngrid = the length of rectangular grid if(!any(class(object) == "densityMclust")) { stop("first argument must be an object of class 'densityMclust'") } if(missing(data)) { eval.points <- extendrange(object$data, f = 0.1) eval.points <- seq(eval.points[1], eval.points[2], length.out = ngrid) } else { eval.points <- sort(as.vector(data)) ngrid <- length(eval.points) } G <- object$G pro <- object$parameters$pro mean <- object$parameters$mean var <- object$parameters$variance$sigmasq if(length(var) < G) var <- rep(var, G) noise <- (!is.null(object$parameters$Vinv)) cdf <- rep(0, ngrid) for(k in seq(G)) { cdf <- cdf + pro[k]*pnorm(eval.points, mean[k], sqrt(var[k])) } if(noise) cdf <- cdf/sum(pro[seq(G)]) out <- list(x = eval.points, y = cdf) return(out) } quantileMclust <- function(object, p, ...) { # Calculate the quantile of a univariate mixture corresponding to cdf # equal to p using bisection line search method. # # Arguments: # object = a 'densityMclust' object # p = vector of probabilities (0 <= p <= 1) stopifnot(inherits(object, "densityMclust")) if(object$d != 1) { stop("quantile function only available for 1-dimensional data") } p <- as.vector(p) m <- object$parameters$mean s <- sqrt(object$parameters$variance$sigmasq) if(object$modelName == "E") s <- rep(s, object$G) r <- matrix(as.double(NA), nrow = length(p), ncol = object$G) for(g in 1:object$G) { r[,g] <- qnorm(p, mean = m[g], sd = s[g]) } if(object$G == 1) return(as.vector(r)) q <- rep(as.double(NA), length(p)) for(i in 1:length(p)) { F <- function(x) cdfMclust(object, x)$y - p[i] q[i] <- uniroot(F, interval = range(r[i,]), tol = sqrt(.Machine$double.eps))$root } q[ p < 0 | p > 1] <- NaN q[ p == 0 ] <- -Inf q[ p == 1 ] <- Inf return(q) } mclust/R/util.R0000644000175000017500000003414614064570074013233 0ustar nileshnilesh adjustedRandIndex <- function (x, y) { x <- as.vector(x) y <- as.vector(y) if(length(x) != length(y)) stop("arguments must be vectors of the same length") tab <- table(x,y) if(all(dim(tab)==c(1,1))) return(1) a <- sum(choose(tab, 2)) b <- sum(choose(rowSums(tab), 2)) - a c <- sum(choose(colSums(tab), 2)) - a d <- choose(sum(tab), 2) - a - b - c ARI <- (a - (a + b) * (a + c)/(a + b + c + d)) / ((a + b + a + c)/2 - (a + b) * (a + c)/(a + b + c + d)) return(ARI) } classError <- function(classification, class) { q <- function(map, len, x) { x <- as.character(x) map <- lapply(map, as.character) y <- sapply(map, function(x) x[1]) best <- y != x if(all(len) == 1) return(best) errmin <- sum(as.numeric(best)) z <- sapply(map, function(x) x[length(x)]) mask <- len != 1 counter <- rep(0, length(len)) k <- sum(as.numeric(mask)) j <- 0 while(y != z) { i <- k - j m <- mask[i] counter[m] <- (counter[m] %% len[m]) + 1 y[x == names(map)[m]] <- map[[m]][counter[m]] temp <- y != x err <- sum(as.numeric(temp)) if(err < errmin) { errmin <- err best <- temp } j <- (j + 1) %% k } best } if(any(isNA <- is.na(classification))) { classification <- as.character(classification) nachar <- paste(unique(classification[!isNA]),collapse="") classification[isNA] <- nachar } MAP <- mapClass(classification, class) len <- sapply(MAP[[1]], length) if(all(len) == 1) { CtoT <- unlist(MAP[[1]]) I <- match(as.character(classification), names(CtoT), nomatch= 0) one <- CtoT[I] != class } else { one <- q(MAP[[1]], len, class) } len <- sapply(MAP[[2]], length) if(all(len) == 1) { TtoC <- unlist(MAP[[2]]) I <- match(as.character(class), names(TtoC), nomatch = 0) two <- TtoC[I] != classification } else { two <- q(MAP[[2]], len, classification) } err <- if(sum(as.numeric(one)) > sum(as.numeric(two))) as.vector(one) else as.vector(two) bad <- seq(along = classification)[err] list(misclassified = bad, errorRate = length(bad)/length(class)) } mapClass <- function(a, b) { l <- length(a) x <- y <- rep(NA, l) if(l != length(b)) { warning("unequal lengths") return(x) } # LS: new - check if both a & b are factors or character vectors # with the same levels then assume they are known classes and # match by level names if(is.factor(a) & is.factor(b) & nlevels(a) == nlevels(b)) { aTOb <- as.list(levels(b)) names(aTOb) <- levels(a) bTOa <- as.list(levels(a)) names(bTOa) <- levels(b) out <- list(aTOb = aTOb, bTOa = bTOa) return(out) } if(is.character(a) & is.character(b) & length(unique(a)) == length(unique(b))) { aTOb <- as.list(unique(b)) names(aTOb) <- unique(a) bTOa <- as.list(unique(a)) names(bTOa) <- unique(b) out <- list(aTOb = aTOb, bTOa = bTOa) return(out) } # otherwise match by closest class correspondence Tab <- table(a, b) Ua <- dimnames(Tab)[[1]] Ub <- dimnames(Tab)[[2]] aTOb <- rep(list(Ub), length(Ua)) names(aTOb) <- Ua bTOa <- rep(list(Ua), length(Ub)) names(bTOa) <- Ub # k <- nrow(Tab) Map <- rep(0, k) Max <- apply(Tab, 1, max) for(i in 1:k) { I <- match(Max[i], Tab[i, ], nomatch = 0) aTOb[[i]] <- Ub[I] } if(is.numeric(b)) aTOb <- lapply(aTOb, as.numeric) # k <- ncol(Tab) Map <- rep(0, k) Max <- apply(Tab, 2, max) for(j in (1:k)) { J <- match(Max[j], Tab[, j]) bTOa[[j]] <- Ua[J] } if(is.numeric(a)) bTOa <- lapply(bTOa, as.numeric) # out <- list(aTOb = aTOb, bTOa = bTOa) return(out) } map <- function(z, warn = mclust.options("warn"), ...) { nrowz <- nrow(z) cl <- numeric(nrowz) I <- 1:nrowz J <- 1:ncol(z) for(i in I) { cl[i] <- (J[z[i, ] == max(z[i, ])])[1] } if(warn) { K <- as.logical(match(J, sort(unique(cl)), nomatch = 0)) if(any(!K)) warning(paste("no assignment to", paste(J[!K], collapse = ","))) } return(cl) } unmap <- function(classification, groups=NULL, noise=NULL, ...) { # converts a classification to conditional probabilities # classes are arranged in sorted order unless groups is specified # if a noise indicator is specified, that column is placed last n <- length(classification) u <- sort(unique(classification)) if(is.null(groups)) { groups <- u } else { if(any(match( u, groups, nomatch = 0) == 0)) stop("groups incompatible with classification") miss <- match( groups, u, nomatch = 0) == 0 } cgroups <- as.character(groups) if(!is.null(noise)) { noiz <- match( noise, groups, nomatch = 0) if(any(noiz == 0)) stop("noise incompatible with classification") groups <- c(groups[groups != noise],groups[groups==noise]) noise <- as.numeric(factor(as.character(noise), levels = unique(groups))) } groups <- as.numeric(factor(cgroups, levels = unique(cgroups))) classification <- as.numeric(factor(as.character(classification), levels = unique(cgroups))) k <- length(groups) - length(noise) nam <- levels(groups) if(!is.null(noise)) { k <- k + 1 nam <- nam[1:k] nam[k] <- "noise" } z <- matrix(0, n, k, dimnames = c(names(classification),nam)) for(j in 1:k) { z[classification == groups[j], j] <- 1 } return(z) } BrierScore <- function(z, class) { z <- as.matrix(z) z <- sweep(z, 1, STATS = rowSums(z), FUN = "/") cl <- unmap(class, groups = if(is.factor(class)) levels(class) else NULL) if(any(dim(cl) != dim(z))) stop("input arguments do not match!") sum((cl-z)^2)/(2*nrow(cl)) } orth2 <- function (n) { u <- rnorm(n) u <- u/vecnorm(u) v <- rnorm(n) v <- v/vecnorm(v) Q <- cbind(u, v - sum(u * v) * u) dimnames(Q) <- NULL Q } randomOrthogonalMatrix <- function(nrow, ncol, n = nrow, d = ncol, seed = NULL) { # Generate a random orthogonal basis matrix of dimension (nrow x ncol) using # the algorithm in # Heiberger R. (1978) Generation of random orthogonal matrices. JRSS C, 27, # 199-206. if(!is.null(seed)) set.seed(seed) if(missing(nrow) & missing(n)) stop() if(missing(nrow)) { warning("Use of argument 'n' is deprecated. Please use 'nrow'") nrow <- n } if(missing(ncol) & missing(d)) stop() if(missing(ncol)) { warning("Use of argument 'd' is deprecated. Please use 'ncol'") ncol <- d } Q <- qr.Q(qr(matrix(rnorm(nrow*ncol), nrow = nrow, ncol = ncol))) return(Q) } logsumexp <- function(x) { # Numerically efficient implementation of log(sum(exp(x))) max <- max(x) max + log(sum(exp(x-max))) } partconv <- function(x, consec = TRUE) { n <- length(x) y <- numeric(n) u <- unique(x) if(consec) { # number groups in order of first row appearance l <- length(u) for(i in 1:l) y[x == u[i]] <- i } else { # represent each group by its lowest-numbered member for(i in u) { l <- x == i y[l] <- (1:n)[l][1] } } y } partuniq <- function(x) { # finds the classification that removes duplicates from x charconv <- function(x, sep = "001") { if(!is.data.frame(x)) x <- data.frame(x) do.call("paste", c(as.list(x), sep = sep)) } n <- nrow(x) x <- charconv(x) k <- duplicated(x) partition <- 1.:n partition[k] <- match(x[k], x) partition } dmvnorm <- function(data, mean, sigma, log = FALSE) { data <- as.matrix(data) n <- nrow(data) d <- ncol(data) if(missing(mean)) mean <- rep(0, length = d) mean <- as.vector(mean) if(length(mean) != d) stop("data and mean have non-conforming size") if(missing(sigma)) sigma <- diag(d) sigma <- as.matrix(sigma) if(ncol(sigma) != d) stop("data and sigma have non-conforming size") if(max(abs(sigma - t(sigma))) > sqrt(.Machine$double.eps)) stop("sigma must be a symmetric matrix") # - 1st approach # cholsigma <- chol(sigma) # logdet <- 2 * sum(log(diag(cholsigma))) # md <- mahalanobis(data, center = mean, # cov = chol2inv(cholsigma), inverted = TRUE) # logdens <- -(ncol(data) * log(2 * pi) + logdet + md)/2 # # - 2nd approach # cholsigma <- chol(sigma) # logdet <- 2 * sum(log(diag(cholsigma))) # mean <- outer(rep(1, nrow(data)), as.vector(matrix(mean,d))) # data <- t(data - mean) # conc <- chol2inv(cholsigma) # Q <- colSums((conc %*% data)* data) # logdens <- as.vector(Q + d*log(2*pi) + logdet)/(-2) # # - 3rd approach (via Fortran code) logdens <- .Fortran("dmvnorm", as.double(data), # x as.double(mean), # mu as.double(sigma), # Sigma as.integer(n), # n as.integer(d), # p double(d), # w double(1), # hood double(n), # logdens PACKAGE = "mclust")[[8]] # if(log) logdens else exp(logdens) } shapeO <- function(shape, O, transpose = FALSE) { dimO <- dim(O) if(dimO[1] != dimO[2]) stop("leading dimensions of O are unequal") if((ldO <- length(dimO)) != 3) { if(ldO == 2) { dimO <- c(dimO, 1) O <- array(O, dimO) } else stop("O must be a matrix or an array") } l <- length(shape) if(l != dimO[1]) stop("dimension of O and length s are unequal") storage.mode(O) <- "double" .Fortran("shapeo", as.logical(transpose), as.double(shape), O, as.integer(l), as.integer(dimO[3]), double(l * l), integer(1), PACKAGE = "mclust")[[3]] } traceW <- function(x) { # sum(as.vector(sweep(x, 2, apply(x, 2, mean)))^2) dimx <- dim(x) n <- dimx[1] p <- dimx[2] .Fortran("mcltrw", as.double(x), as.integer(n), as.integer(p), double(p), double(1), PACKAGE = "mclust")[[5]] } unchol <- function(x, upper = NULL) { if(is.null(upper)) { upper <- any(x[row(x) < col(x)]) lower <- any(x[row(x) > col(x)]) if(upper && lower) stop("not a triangular matrix") if(!(upper || lower)) { x <- diag(x) return(diag(x * x)) } } dimx <- dim(x) storage.mode(x) <- "double" .Fortran("uncholf", as.logical(upper), x, as.integer(nrow(x)), as.integer(ncol(x)), integer(1), PACKAGE = "mclust")[[2]] } vecnorm <- function (x, p = 2) { if (is.character(p)) { if (charmatch(p, "maximum", nomatch = 0) == 1) p <- Inf else if (charmatch(p, "euclidean", nomatch = 0) == 1) p <- 2 else stop("improper specification of p") } if (!is.numeric(x) && !is.complex(x)) stop("mode of x must be either numeric or complex") if (!is.numeric(p)) stop("improper specification of p") if (p < 1) stop("p must be greater than or equal to 1") if (is.numeric(x)) x <- abs(x) else x <- Mod(x) if (p == 2) return(.Fortran("d2norm", as.integer(length(x)), as.double(x), as.integer(1), double(1), PACKAGE = "mclust")[[4]]) if (p == Inf) return(max(x)) if (p == 1) return(sum(x)) xmax <- max(x) if (!xmax) xmax <- max(x) if (!xmax) return(xmax) x <- x/xmax xmax * sum(x^p)^(1/p) } errorBars <- function(x, upper, lower, width = 0.1, code = 3, angle = 90, horizontal = FALSE, ...) { # Draw error bars at x from upper to lower. If horizontal = FALSE (default) # bars are drawn vertically, otherwise horizontally. if(horizontal) arrows(upper, x, lower, x, length = width, angle = angle, code = code, ...) else arrows(x, upper, x, lower, length = width, angle = angle, code = code, ...) } covw <- function(X, Z, normalize = TRUE) # Given data matrix X(n x p) and weight matrix Z(n x G) computes # weighted means(p x G), weighted covariance matrices S(p x p x G) and # weighted scattering matrices W(p x p x G) { X <- as.matrix(X) Z <- as.matrix(Z) n <- nrow(X) p <- ncol(X) nZ <- nrow(Z) G <- ncol(Z) if(n != nZ) stop("X and Z must have same number of rows") if(normalize) Z <- t( apply(Z, 1, function(z) z/sum(z)) ) tmp <- .Fortran("covwf", X = as.double(X), Z = as.double(Z), n = as.integer(n), p = as.integer(p), G = as.integer(G), mean = double(p*G), S = double(p*p*G), W = double(p*p*G), PACKAGE = "mclust") out <- list(mean = matrix(tmp$mean, p,G), S = array(tmp$S, c(p,p,G)), W = array(tmp$W, c(p,p,G)) ) return(out) } hdrlevels <- function(density, prob) { # Compute the levels for Highest Density Levels (HDR) for estimated 'density' # values and probability levels 'prob'. # # Reference: Hyndman (1996) Computing and Graphing Highest Density Regions if(missing(density) | missing(prob)) stop("Please provide both 'density' and 'prob' arguments to function call!") density <- as.vector(density) prob <- pmin(pmax(as.numeric(prob), 0), 1) alpha <- 1-prob lev <- quantile(density, alpha, na.rm = TRUE) names(lev) <- paste0(round(prob*100),"%") return(lev) } catwrap <- function(x, width = getOption("width"), ...) { # version of cat with wrapping at specified width cat(paste(strwrap(x, width = width, ...), collapse = "\n"), "\n") } ## ## Convert to a from classes 'Mclust' and 'densityMclust' ## as.Mclust <- function(x, ...) { UseMethod("as.Mclust") } as.Mclust.default <- function(x, ...) { if(inherits(x, "Mclust")) x else stop("argument 'x' cannot be coerced to class 'Mclust'") } as.Mclust.densityMclust <- function(x, ...) { class(x) <- c("Mclust", class(x)[1]) return(x) } as.densityMclust <- function(x, ...) { UseMethod("as.densityMclust") } as.densityMclust.default <- function(x, ...) { if(inherits(x, "densityMclust")) x else stop("argument 'x' cannot be coerced to class 'densityMclust'") } as.densityMclust.Mclust <- function(x, ...) { class(x) <- c("densityMclust", class(x)) x$density <- dens(data = x$data, modelName = x$modelName, parameters = x$parameters, logarithm = FALSE) return(x) }mclust/R/mbahc.R0000644000175000017500000005606614146167160013334 0ustar nileshnilesh## ## Model-based Agglomerative Hierarchical Clustering (MBAHC) ## # MBAHC used for EM initialization for d-dim data ---- hc <- function(data, modelName = "VVV", use = "VARS", partition = dupPartition(data), minclus = 1, ...) { if(!any(modelName == c("E", "V", "EII", "VII", "EEE", "VVV"))) stop("invalid 'modelName' argument for model-based hierarchical clustering. See help(mclust.options)") if(!any(use == c("VARS", "STD", "SPH", "PCS", "PCR", "SVD", "RND"))) stop("invalid 'use' argument for model-based hierarchical clustering. See help(mclust.options)") funcName <- paste("hc", modelName, sep = "") mc <- match.call(expand.dots = TRUE) mc$use <- mc$modelName <- NULL data <- data.matrix(data) dropCols <- function(x) { # select only those columns of matrix x with all finite numerical values x[,apply(x, 2, function(x) all(is.finite(x))), drop = FALSE] } use <- toupper(use[1]) switch(use, "VARS" = { Z <- data }, "STD" = { Z <- scale(data, center = TRUE, scale = TRUE) Z <- dropCols(Z) }, "PCR" = { data <- scale(data, center = TRUE, scale = TRUE) data <- dropCols(data) SVD <- svd(data, nu=0) # evalues <- sqrt(SVD$d^2/(nrow(data)-1)) Z <- data %*% SVD$v }, "PCS" = { data <- scale(data, center = TRUE, scale = FALSE) SVD <- svd(data, nu=0) # evalues <- sqrt(SVD$d^2/(nrow(data)-1)) Z <- data %*% SVD$v Z <- dropCols(Z) }, "SPH" = { data <- scale(data, center = TRUE, scale = FALSE) n <- nrow(data); p <- ncol(data) Sigma <- var(data) * (n - 1)/n SVD <- svd(Sigma, nu = 0) Z <- data %*% SVD$v %*% diag(1/sqrt(SVD$d), p, p) Z <- dropCols(Z) }, "SVD" = { data <- scale(data, center = TRUE, scale = TRUE) data <- dropCols(data) p <- min(dim(data)) SVD <- svd(data, nu=0) Z <- data %*% SVD$v %*% diag(1/sqrt(SVD$d), p, p) }, "RND" = { out <- hcRandomPairs(data, ...) attr(out, "dimensions") <- dim(data) attr(out, "use") <- use attr(out, "call") <- match.call() class(out) <- "hc" return(out) } ) # call the proper hc function mc$data <- Z mc[[1]] <- as.name(funcName) out <- eval(mc, parent.frame()) attr(out, "use") <- use attr(out, "call") <- match.call() attr(out, "data") <- mc$data class(out) <- "hc" return(out) } print.hc <- function(x, ...) { if(!is.null(attr(x, "call"))) { cat("Call:\n") catwrap(paste0(deparse(attr(x, "call")))) cat("\n") } catwrap("Model-Based Agglomerative Hierarchical Clustering") if(!is.null(attr(x, "modelName"))) cat(paste("Model name =", attr(x, "modelName"), "\n")) if(!is.null(attr(x, "use"))) cat(paste("Use =", attr(x, "use"), "\n")) if(!is.null(attr(x, "dimensions"))) cat(paste("Number of objects =", attr(x, "dimensions")[1], "\n")) invisible(x) } randomPairs <- function(...) { .Deprecated(old = "randomPairs", new = "hcRandomPairs", package = "mclust") hcRandomPairs(...) } hcRandomPairs <- function(data, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) data <- as.matrix(data) n <- nrow(data) m <- if(n%%2 == 1) n-1 else n tree <- matrix(sample(1:n, m, replace = FALSE), nrow = 2, ncol = ceiling(m/2)) tree <- apply(tree, 2, sort) ind <- unique(tree[1,]) while(ncol(tree) < (m-1)) { addtree <- sort(sample(ind, size = 2, replace = FALSE)) ind <- setdiff(ind, addtree[2]) tree <- cbind(tree, addtree) } dimnames(tree) <- NULL structure(tree, initialPartition = 1:n, dimensions = c(n,2)) } dupPartition <- function(data) { dup <- duplicated(data) if (is.null(dim(data))) { data <- as.numeric(data) if (!any(dup)) return(1:length(data)) kmeans(data, centers = data[!dup])$cluster } else { data <- data.matrix(data) if (!any(dup)) return(1:nrow(data)) kmeans(data, centers = data[!dup,])$cluster } } hclass <- function(hcPairs, G) { initial <- attributes(hcPairs)$init n <- length(initial) k <- length(unique(initial)) G <- if(missing(G)) k:2 else rev(sort(unique(G))) select <- k - G if(length(select) == 1 && !select) return(matrix(initial, ncol = 1, dimnames = list(NULL, as.character(G)))) bad <- select < 0 | select >= k if(all(bad)) stop("No classification with the specified number of clusters") if(any(bad) & mclust.options("warn")) { warning("Some selected classifications are inconsistent with mclust object") } L <- length(select) cl <- matrix(as.double(NA), nrow = n, ncol = L, dimnames = list(NULL, as.character(G))) if(select[1]) m <- 1 else { cl[, 1] <- initial m <- 2 } for(l in 1:max(select)) { ij <- hcPairs[, l] i <- min(ij) j <- max(ij) initial[initial == j] <- i if(select[m] == l) { cl[, m] <- initial m <- m + 1 } } apply(cl[, L:1, drop = FALSE], 2, partconv, consec = TRUE) } hcEII <- function(data, partition = NULL, minclus = 1, ...) { if(minclus < 1) stop("minclus must be positive") if(any(is.na(data))) stop("missing values not allowed in data") #==================================================================== dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) #if(oneD || length(dimdat) > 2) # stop("data should in the form of a matrix") data <- as.matrix(data) dimnames(data) <- NULL n <- nrow(data) p <- ncol(data) if(is.null(partition)) partition <- 1:n else if(length(partition) != n) stop("partition must assign a class to each observation") partition <- partconv(partition, consec = TRUE) l <- length(unique(partition)) attr(partition, "unique") <- l m <- l - minclus if(m <= 0) { stop("initial number of clusters is not greater than minclus") } if(n <= p & mclust.options("warn")) { warning("# of observations <= data dimension") } #============================================================= storage.mode(data) <- "double" ld <- max(c((l * (l - 1))/2, 3 * m)) temp <- .Fortran("hceii", data, as.integer(n), as.integer(p), as.integer(partition), as.integer(l), as.integer(m), double(p), as.integer(ld), double(ld), PACKAGE = "mclust")[c(1, 9)] temp[[1]] <- temp[[1]][1:m, 1:2, drop = FALSE] temp[[2]] <- temp[[2]][1:m] structure(t(temp[[1]]), initialPartition = partition, dimensions = dimdat, modelName = "EII", call = match.call()) } hcEEE <- function(data, partition = NULL, minclus = 1, ...) { if(minclus < 1) stop("minclus must be positive") if(any(is.na(data))) stop("missing values not allowed in data") #===================================================================== dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) #if(oneD || length(dimdat) > 2) # stop("data should in the form of a matrix") data <- as.matrix(data) dimnames(data) <- NULL n <- nrow(data) p <- ncol(data) if(n <= p & mclust.options("warn")) warning("# of observations <= data dimension") if(is.null(partition)) partition <- 1:n else if(length(partition) != n) stop("partition must assign a class to each observation") partition <- partconv(partition, consec = TRUE) l <- length(unique(partition)) attr(partition, "unique") <- l m <- l - minclus if(m <= 0) stop("initial number of clusters is not greater than minclus") storage.mode(data) <- "double" ## R 2.12.0: 32 bit Windows build fails due to compiler bug ## workaround: removal (hopefully temporary) of hc functionality for EEE # Luca: commented the next line and uncommented below # stop("hc for EEE model is not currently supported") temp <- .Fortran("hceee", data, as.integer(n), as.integer(p), as.integer(partition), as.integer(l), as.integer(m), if(p < 3) integer(m) else integer(1), if(p < 4) integer(m) else integer(1), double(p), double(p * p), double(p * p), double(p * p), PACKAGE = "mclust")[c(1, 7:10)] # # currently temp[[5]] is not output temp[[4]] <- temp[[4]][1:2] temp[[5]] <- temp[[5]][1:2] names(temp[[5]]) <- c("determinant", "trace") temp[[1]] <- temp[[1]][1:(m + 1), ] if(p < 3) tree <- rbind(temp[[2]], temp[[3]]) else if(p < 4) tree <- rbind(temp[[1]][-1, 3], temp[[3]]) else tree <- t(temp[[1]][-1, 3:4, drop = FALSE]) determinant <- temp[[1]][, 1] attr(determinant, "breakpoints") <- temp[[4]] trace <- temp[[1]][, 2] structure(tree, initialPartition = partition, dimensions = dimdat, modelName = "EEE", call = match.call()) } hcVII <- function(data, partition = NULL, minclus = 1, alpha = 1, ...) { if(minclus < 1) stop("minclus must be positive") if(any(is.na(data))) stop("missing values not allowed in data") #===================================================================== dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) #if(oneD || length(dimdat) > 2) # stop("data should in the form of a matrix") data <- as.matrix(data) dimnames(data) <- NULL n <- nrow(data) p <- ncol(data) if(n <= p & mclust.options("warn")) warning("# of observations <= data dimension") if(is.null(partition)) partition <- 1:n else if(length(partition) != n) stop("partition must assign a class to each observation") partition <- partconv(partition, consec = TRUE) l <- length(unique(partition)) attr(partition, "unique") <- l m <- l - minclus if(m <= 0) stop("initial number of clusters is not greater than minclus") storage.mode(data) <- "double" ll <- (l * (l - 1))/2 ld <- max(n, ll, 3 * m) alpha <- alpha * traceW(data/sqrt(n * p)) alpha <- max(alpha, .Machine$double.eps) temp <- .Fortran("hcvii", data, as.integer(n), as.integer(p), as.integer(partition), as.integer(l), as.integer(m), as.double(alpha), double(p), as.integer(ld), double(ld), PACKAGE = "mclust")[c(1, 10)] temp[[1]] <- temp[[1]][1:m, 1:2, drop = FALSE] temp[[2]] <- temp[[2]][1:m] structure(t(temp[[1]]), initialPartition = partition, dimensions = dimdat, modelName = "VII", call = match.call()) } hcVVV <- function(data, partition = NULL, minclus = 1, alpha = 1, beta = 1, ...) { if(minclus < 1) stop("minclus must be positive") if(any(is.na(data))) stop("missing values not allowed in data") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) #if(oneD || length(dimdat) > 2) # stop("data should in the form of a matrix") data <- as.matrix(data) dimnames(data) <- NULL n <- nrow(data) p <- ncol(data) if(n <= p & mclust.options("warn")) warning("# of observations <= data dimension") if(is.null(partition)) partition <- 1:n else if(length(partition) != n) stop("partition must assign a class to each observation") partition <- partconv(partition, consec = TRUE) l <- length(unique(partition)) attr(partition, "unique") <- l m <- l - minclus if(m <= 0) stop("initial number of clusters is not greater than minclus") storage.mode(data) <- "double" ll <- (l * (l - 1))/2 # dp <- duplicated(partition) #x[c((1:n)[!dp],(1:n)[dp]),], #as.integer(c(partition[!dp], partition[dp])), ld <- max(n, ll + 1, 3 * m) alpha <- alpha * traceW(data/sqrt(n * p)) alpha <- max(alpha, .Machine$double.eps) temp <- .Fortran("hcvvv", cbind(data, 0.), as.integer(n), as.integer(p), as.integer(partition), as.integer(l), as.integer(m), as.double(alpha), as.double(beta), double(p), double(p * p), double(p * p), double(p * p), as.integer(ld), double(ld), PACKAGE = "mclust")[c(1, 14)] temp[[1]] <- temp[[1]][1:m, 1:2, drop = FALSE] temp[[2]] <- temp[[2]][1:m] structure(t(temp[[1]]), initialPartition = partition, dimensions = dimdat, modelName = "VVV", call = match.call()) } ## ## Plot method (dendrogram) for model-based hierarchical agglomeration ---- ## plot.hc <- function(x, what = c("loglik", "merge"), maxG = NULL, labels = FALSE, hang = 0, ...) { stopifnot(inherits(x, "hc")) what <- what[1] hier <- as.hclust(x, what = what, maxG = maxG, labels = labels) switch(what, "loglik" = { ylab <- paste("Classification log-likelihood", paste("(", hier$method, sep = ""), "model)") cloglik <- attr(hier,"cloglik") attr(hier,"cloglik") <- NULL plot( as.dendrogram(hier, hang=hang), axes=FALSE, ylab=ylab) r <- range(cloglik, na.rm=TRUE) par.usr <- par("usr") ybot <- max(r)-par.usr[3] ytop <- min(r)+par.usr[3] }, "merge" = { ylab <- paste("Number of Clusters", paste("(", hier$method, sep = ""), "model)") nclus <- attr(hier,"nclus") attr(hier,"nclus") <- NULL plot( as.dendrogram(hier, hang=hang), axes=FALSE, ylab=ylab) par.usr <- par("usr") ybot<- max(nclus)-par.usr[3] ytop <- 1+par.usr[3] }, stop("unrecognized what option")) par(usr=c(par("usr")[1:2],ybot,ytop)) at <- pretty(seq(from=ybot,to=ytop,length=100), min = 5, max = 10) axis(2, at=at) invisible(hier) } as.hclust.hc <- function (object, what = c("loglik", "merge"), maxG = NULL, labels = FALSE) { stopifnot(inherits(object, "hc")) if (!is.null(maxG) && maxG < 2) stop("maxG < 2") what <- what[1] switch(what, "loglik" = { obj <- ldend(object, maxG=maxG, labels) obj <- c(obj, list(dist.method = NULL)) attr(obj,"cloglik") <- as.vector(obj$cloglik) obj$cloglik <- NULL class(obj) <- "hclust" obj }, "merge" = { obj <- mdend(object, maxG=maxG, labels) obj <- c(obj, list(dist.method = NULL)) attr(obj,"nclus") <- as.vector(obj$nclus) obj$nclus <- NULL class(obj) <- "hclust" obj }, stop("unrecognized what option") ) } ldend <- function (hcObj, maxG = NULL, labels = FALSE) { # classification log-likelihood dendrogram setup for MBAHC stopifnot(inherits(hcObj,"hc")) if(!is.null(maxG) && maxG < 2) stop("maxG < 2") n <- ncol(hcObj) + 1 cLoglik <- CLL <- cloglik.hc(hcObj) maxG <- if (is.null(maxG)) length(CLL) else min(maxG,length(CLL)) na <- is.na(CLL) m <- length(CLL) d <- diff(CLL) if (any(neg <- d[!is.na(d)] < 0)) { m <- which(neg)[1] CLLmax <- CLL[min(maxG,m)] CLL[-(1:min(maxG,m))] <- CLLmax } else if (any(na)) { m <- which(na)[1] - 1 CLLmax <- CLL[min(maxG,m)] CLL[-(1:min(maxG,m))] <- CLLmax } else { CLLmax <- max(CLL[1:maxG]) CLL[-(1:maxG)] <- CLLmax } height <- CLL height <- height[-length(height)] height <- rev(-height+max(height)) mo <- mergeOrder(hcObj) nam <- rownames(as.matrix(attr(hcObj,"data"))) leafLabels <- if (labels) nam else character(length(nam)) obj <- structure(list(merge = mo$merge, height = height, order = mo$order, labels = leafLabels, cloglik = cLoglik, method = attr(hcObj, "model"), call = attr(hcObj, "call"))) return(obj) } mdend <- function (hcObj, maxG = NULL, labels = FALSE) { # uniform height dendrgram setup for MBAHC stopifnot(inherits(hcObj,"hc")) if(!is.null(maxG) && maxG < 2) stop("maxG < 2") ni <- length(unique(attr(hcObj,"initialPartition"))) maxG <- if (!is.null(maxG)) min(maxG, ni) else ni mo <- mergeOrder(hcObj) j <- ni - maxG n <- ncol(hcObj) height <- c(rep(0,j),1:(n-j)) nclus <- maxG:1 nam <- rownames(as.matrix(attr(hcObj,"data"))) leafLabels <- if (labels) nam else character(length(nam)) obj <- structure(list(merge = mo$merge, order = mo$order, height = height, labels = leafLabels, nclus = nclus, method = attr(hcObj, "model"), call = attr(hcObj, "call"))) return(obj) } mergeOrder <- function(hcObj) { # converts the hc representation of merges to conform with hclust # and computes the corresponding dendrogram leaf order # CF: inner code written by Luca Scrucca HC <- matrix(as.vector(hcObj), ncol(hcObj), nrow(hcObj), byrow = TRUE) HCm <- matrix(NA, nrow(HC), ncol(HC)) merged <- list(as.vector(HC[1, ])) HCm[1, ] <- -HC[1, ] for (i in 2:nrow(HC)) { lmerged <- lapply(merged, function(m) HC[i, ] %in% m) lm <- which(sapply(lmerged, function(lm) any(lm))) if (length(lm) == 0) { merged <- append(merged, list(HC[i, ])) HCm[i, ] <- sort(-HC[i, ]) } else if (length(lm) == 1) { merged <- append(merged, list(c(merged[[lm]], HC[i, !lmerged[[lm]]]))) merged[[lm]] <- list() HCm[i, ] <- sort(c(-HC[i, !lmerged[[lm]]], lm)) } else { merged <- append(merged, list(unlist(merged[lm]))) merged[[lm[1]]] <- merged[[lm[2]]] <- list() HCm[i, ] <- lm } } list(merge = HCm, order = merged[[length(merged)]]) } cloglik.hc <- function(hcObj, maxG = NULL) { n <- ncol(hcObj) + 1 if (is.null(maxG)) maxG <- n cl <- hclass(hcObj) cl <- cbind( "1" = 1, cl) modelName <- attr(hcObj,"modelName") LL <- rep(list(NA),maxG) for (j in 1:maxG) { ll <- NULL for (k in unique(cl[,j])) { i <- which(cl[,j] == k) # compute loglik term here llnew <- mvn( modelName, attr(hcObj,"data")[i,,drop=FALSE])$loglik if (substr(modelName,2,2) != "I") { llvii <- mvn( "VII", attr(hcObj,"data")[i,,drop=FALSE])$loglik if (substr(modelName,3,3) != "I") { llvvi <- mvn( "VVI", attr(hcObj,"data")[i,,drop=FALSE])$loglik llall <- c("VVV"=llnew,"VVI"=llvvi,"VII"=llvii) } else { llall <- c("VVI"=llnew,"VII"=llvii) } if (!all(nall <- is.na(llall))) { llnew <- llall[!nall][which.max(llall[!nall])] } } if (is.na(llnew)) break ll <- c(ll, llnew) } if (is.na(llnew)) break LL[[j]] <- ll } CLL <- sapply(LL,sum) for (i in seq(along = CLL)) { if (is.na(CLL[i])) LL[[i]] <- NA } attr(CLL,"terms") <- LL return(CLL) } ## Initialization for 1-dim data ---- qclass <- function (x, k) { x <- as.vector(x) # eps <- sqrt(.Machine$double.eps) # numerical accuracy problem if scale of x is large, so make tolerance # scale dependent eps <- sd(x)*sqrt(.Machine$double.eps) q <- NA n <- k while(length(q) < (k+1)) { n <- n + 1 q <- unique(quantile(x, seq(from = 0, to = 1, length = n))) } if(length(q) > (k+1)) { dq <- diff(q) nr <- length(q)-k-1 q <- q[-order(dq)[1:nr]] } q[1] <- min(x) - eps q[length(q)] <- max(x) + eps cl <- rep(0, length(x)) for(i in 1:k) { cl[ x >= q[i] & x < q[i+1] ] <- i } return(cl) } hcE <- function(data, partition = NULL, minclus = 1, ...) { if(minclus < 1) stop("minclus must be positive") if(any(is.na(data))) stop("missing values not allowed in data") #==================================================================== dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one-dimensional") data <- as.vector(data) n <- length(data) if(is.null(partition)) partition <- 1:n else if(length(partition) != n) stop("partition must assign a class to each observation") partition <- partconv(partition, consec = TRUE) l <- length(unique(partition)) attr(partition, "unique") <- l m <- l - minclus if(m <= 0) stop("initial number of clusters is not greater than minclus") storage.mode(data) <- "double" ld <- max(c((l * (l - 1))/2, 3 * m)) temp <- .Fortran("hc1e", data, as.integer(n), as.integer(partition), as.integer(l), as.integer(m), as.integer(ld), double(ld), PACKAGE = "mclust")[c(1, 3, 7)] temp[[1]] <- temp[[1]][1:m] temp[[2]] <- temp[[2]][1:m] temp[[3]] <- temp[[3]][1:m] structure(rbind(temp[[1]], temp[[2]]), initialPartition = partition, dimensions = n, modelName = "E", call = match.call()) } hcV <- function(data, partition = NULL, minclus = 1, alpha = 1, ...) { if(minclus < 1) stop("minclus must be positive") if(any(is.na(data))) stop("missing values not allowed in data") #===================================================================== dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one-dimensional") data <- as.vector(data) n <- length(data) if(is.null(partition)) partition <- 1:n else if(length(partition) != n) stop("partition must assign a class to each observation") partition <- partconv(partition, consec = TRUE) l <- length(unique(partition)) attr(partition, "unique") <- l m <- l - minclus if(m <= 0) stop("initial number of clusters is not greater than minclus") storage.mode(data) <- "double" alpha <- alpha * (vecnorm(data - mean(data))^2/n) alpha <- min(alpha, .Machine$double.eps) ld <- max(c((l * (l - 1))/2, 3 * m)) temp <- .Fortran("hc1v", data, as.integer(n), as.integer(partition), as.integer(l), as.integer(m), as.double(alpha), as.integer(ld), double(ld), PACKAGE = "mclust")[c(1, 3, 8)] temp[[1]] <- temp[[1]][1:m] temp[[2]] <- temp[[2]][1:m] temp[[3]] <- temp[[3]][1:m] structure(rbind(temp[[1]], temp[[2]]), initialPartition = partition, dimensions = n, modelName = "V", call = match.call()) } mclust/R/toremove.R0000644000175000017500000002435413324447600014112 0ustar nileshnilesh# functions to be removed?? EMclust <- function(data, G = NULL, modelNames = NULL, prior = NULL, control = emControl(), initialization = list(hcPairs=NULL, subset=NULL, noise=NULL), Vinv = NULL, warn = FALSE, x = NULL, ...) { if (!is.null(x)) { if (!missing(prior) || !missing(control) || !missing(initialization) || !missing(Vinv)) stop("only G and modelNames may be specified as arguments when x is supplied") prior <- attr(x,"prior") control <- attr(x,"control") initialization <- attr(x,"initialization") Vinv <- attr(x,"Vinv") warn <- attr(x,"warn") } dimData <- dim(data) oneD <- is.null(dimData) || length(dimData[dimData > 1]) == 1 if(!oneD && length(dimData) != 2) stop("data must be a vector or a matrix") if(oneD) { data <- drop(as.matrix(data)) n <- length(data) d <- 1 } else { data <- as.matrix(data) n <- nrow(data) d <- ncol(data) } if (is.null(x)) { if (is.null(modelNames)) { if (d == 1) { modelNames <- c("E", "V") } else { modelNames <- mclust.options("emModelNames") if (n <= d) { # select only spherical and diagonal models m <- match(modelNames, c("EII", "VII", "EEI", "VEI", "EVI", "VVI"), nomatch = 0) modelNames <- modelNames[m] } } } if (is.null(G)) { G <- if (is.null(initialization$noise)) 1:9 else 0:9 } else { G <- sort(as.numeric(G)) } Gall <- G Mall <- modelNames } else { Glabels <- dimnames(x)[[1]] Mlabels <- dimnames(x)[[2]] if (is.null(G)) G <- Glabels if (is.null(modelNames)) modelNames <- Mlabels Gmatch <- match(as.character(G), Glabels, nomatch = 0) Mmatch <- match(modelNames, Mlabels, nomatch = 0) if (all(Gmatch) && all(Mmatch)) { attr( x, "G") <- as.numeric(G) attr( x, "modelNames") <- modelNames attr( x, "returnCodes") <- attr(x, "returnCodes")[as.character(G),modelNames,drop=FALSE] return(x[as.character(G),modelNames,drop=FALSE]) } Gall <- sort(as.numeric(unique(c(as.character(G), Glabels)))) Mall <- unique(c(modelNames, Mlabels)) } if (any(as.logical(as.numeric(G))) < 0) { if (is.null(initialization$noise)) { stop("G must be positive") } else { stop("G must be nonnegative") } } if (d == 1 && any(nchar(modelNames) > 1)) { Emodel <- any(sapply(modelNames, function(x) charmatch("E", x, nomatch = 0)[1]) == 1) Vmodel <- any(sapply(modelNames, function(x) charmatch("V", x, nomatch = 0)[1]) == 1) modelNames <- c("E", "V")[c(Emodel, Vmodel)] } l <- length(Gall) m <- length(Mall) EMPTY <- -.Machine$double.xmax BIC <- RET <- matrix(EMPTY, nrow = l, ncol = m, dimnames = list(as.character(Gall), as.character(Mall))) if (!is.null(x)) { BIC[dimnames(x)[[1]],dimnames(x)[[2]]] <- x RET[dimnames(x)[[1]],dimnames(x)[[2]]] <- attr(x, "returnCodes") BIC <- BIC[as.character(G),modelNames,drop=FALSE] RET <- RET[as.character(G),modelNames,drop=FALSE] } G <- as.numeric(G) Glabels <- as.character(G) Gout <- G if (is.null(initialization$noise)) { if (G[1] == 1) { for (mdl in modelNames[BIC["1",] == EMPTY]) { out <- mvn(modelName = mdl, data = data, prior = prior) BIC["1", mdl] <- bic(modelName = mdl, loglik = out$loglik, n = n, d = d, G = 1, equalPro = FALSE) RET["1", mdl] <- attr(out, "returnCode") } if (l == 1) { BIC[BIC == EMPTY] <- NA return(structure(BIC, G = G, modelNames = modelNames, prior = prior, control = control, initialization = initialization, warn = warn, n = n, d = d, oneD = oneD, returnCodes = RET, class = "mclustBIC")) } G <- G[-1] Glabels <- Glabels[-1] } if (is.null(initialization$subset)) { ####################################################### # all data in initial hierarchical clustering phase ####################################################### if (is.null(initialization$hcPairs)) { if (d != 1) { if (n > d) { hcPairs <- hc(modelName = mclust.options("hcModelName"), data = data) } else { hcPairs <- hc(modelName = "EII", data = data) } } else { hcPairs <- NULL # hcPairs <- hc(modelName = "E", data = data) } } else hcPairs <- initialization$hcPairs if (d > 1 || !is.null(hcPairs)) clss <- hclass(hcPairs, G) for (g in Glabels) { if (d > 1 || !is.null(hcPairs)) { z <- unmap(clss[, g]) } else { z <- unmap(qclass( data, as.numeric(g))) } for (modelName in modelNames[BIC[g,] == EMPTY]) { out <- me(modelName = modelName, data = data, z = z, prior = prior, control = control, warn = warn) BIC[g, modelName] <- bic(modelName = modelName, loglik = out$loglik, n = n, d = d, G = as.numeric(g), equalPro = control$equalPro) RET[g, modelName] <- attr(out, "returnCode") } } } else { ###################################################### # initial hierarchical clustering phase on a subset ###################################################### if (is.logical(initialization$subset)) initialization$subset <- (1:n)[initialization$subset] if (is.null(initialization$hcPairs)) { if (d != 1) { if (n > d) { hcPairs <- hc(data = data[initialization$subset, ], modelName = mclust.options("hcModelName")) } else { hcPairs <- hc(data = data[initialization$subset,], modelName = "EII") } } else { hcPairs <- NULL # hcPairs <- hc(data = data[initialization$subset], # modelName = "E") } } else hcPairs <- initialization$hcPairs if (d > 1 || !is.null(hcPairs)) clss <- hclass(hcPairs, G) for (g in Glabels) { if (d > 1 || !is.null(hcPairs)) { z <- unmap(clss[, g]) } else { z <- unmap(qclass(data[initialization$subset], as.numeric(g))) } dimnames(z) <- list(as.character(initialization$subset), NULL) for (modelName in modelNames[!is.na(BIC[g,])]) { ms <- mstep(modelName = modelName, z = z, data = as.matrix(data)[initialization$subset, ], prior = prior, control = control, warn = warn) # # ctrl <- control # ctrl$itmax[1] <- 1 # ms <- me(modelName = modelName, data = as.matrix(data)[ # initialization$subset, ], z = z, prior = prior, control = ctrl) # es <- do.call("estep", c(list(data = data, warn = warn), ms)) out <- me(modelName = modelName, data = data, z = es$z, prior = prior, control = control, warn = warn) BIC[g, modelName] <- bic(modelName = modelName, loglik = out$loglik, n = n, d = d, G = as.numeric(g), equalPro = control$equalPro) RET[g, modelName] <- attr(out, "returnCode") } } } } else { ###################################################### # noise case ###################################################### if (!is.null(initialization$subset)) stop("subset option not implemented with noise") if (is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) noise <- initialization$noise if (!is.logical(noise)) noise <- as.logical(match(1:n, noise, nomatch = 0)) if (!G[1]) { hood <- n * log(Vinv) BIC["0", ] <- 2 * hood - log(n) if (l == 1) { return(structure(BIC, G = G, modelNames = modelNames, prior = prior, control = control, initialization = list(hcPairs = hcPairs, subset = initialization$subset), warn = warn, n = n, d = d, oneD = oneD, returnCodes = RET, class = "mclustBIC")) } G <- G[-1] Glabels <- Glabels[-1] } if (is.null(initialization$hcPairs)) { if (d != 1) { if (n > d) { hcPairs <- hc(data = data[!noise,], modelName = mclust.options("hcModelName")) } else { hcPairs <- hc(data = data[!noise,], modelName = "EII") } } else { hcPairs <- NULL # hcPairs <- hc(data = data[!noise], modelName = "E") } } else hcPairs <- initialization$hcPairs if (d > 1 || !is.null(hcPairs)) clss <- hclass(hcPairs, G) z <- matrix(0, n, max(G) + 1) for (g in Glabels) { z[] <- 0 k <- as.numeric(g) if (d > 1 || !is.null(hcPairs)) { z[!noise, 1:k] <- unmap(clss[, g]) } else { z[!noise, 1:k] <- unmap(qclass(data[!noise])) } z[noise, k+1] <- 1 K <- 1:(k+1) for (modelName in modelNames[BIC[g,] == EMPTY]) { out <- me(modelName = modelName, data = data, z = z[, K], prior = prior, control = control, Vinv = Vinv, warn = warn) BIC[g, modelName] <- bic(modelName = modelName, loglik = out$loglik, n = n, d = d, G = k, noise = TRUE, equalPro = control$equalPro) RET[g, modelName] <- attr(out, "returnCode") } } } structure(BIC, G = Gout, modelNames = modelNames, prior = prior, control = control, initialization = list(hcPairs = hcPairs, subset = initialization$subset, noise = initialization$noise), Vinv = Vinv, warn = warn, n = n, d = d, oneD = oneD, returnCodes = RET, class = "mclustBIC") } # EMclust <- function(...) .Defunct("mclustBIC", PACKAGE = "mclust") mclust/R/clustCombi.R0000644000175000017500000004607314053123511014350 0ustar nileshnileshclustCombi <- function(object = NULL, data = NULL, ...) { if(is.null(object) & is.null(data)) stop("An object or class 'Mclust' or data as matrix/data.frame must be provided!") if(is.null(object)) { object <- Mclust(data, ...) } else { if(!inherits(object, "Mclust")) stop("object not of class 'Mclust'") data <- object$data } combiRes <- combi(data, object) return(combiRes) } combMat <- function(K,l1,l2) { l=c(min(l1,l2), max(l1,l2)) if(any(length(l1) == 0, length(l2) == 0)){ l1 = numeric(0) l2 = l[2]} else { l1 = l[1] l2 = l[2]} M <- rbind(cbind(diag(l2-1), matrix(rep(0,(K-l2+1)*(l2-1)), nrow=l2-1, ncol=K-l2+1)), cbind(matrix(rep(0,l2*(K-l2)), nrow=K-l2, ncol=l2), diag(K-l2))) M[l1,l2] <- 1 return(M) } ## Define xlog to handle x*log(x) as x=0 xlog <- function(x) { xlog1d <- function (xi) if (xi == 0) 0 else (xi*log(xi)) if (is.null(dim(x))) { return(sapply(x,xlog1d)) } else { return(matrix(sapply(x,xlog1d),dim(x))) } } combi <- function(data, MclustOutput, n = nrow(data), d = ncol(data)) { combiM <- list() combiM[[MclustOutput$G]] <- diag(MclustOutput$G) tau <- list() tau[[MclustOutput$G]] = MclustOutput$z classif <- list() classif[[MclustOutput$G]] = map(tau[[MclustOutput$G]]) for (K in MclustOutput$G:2) { dEnt <- matrix(0,nrow=K-1, ncol=K) preCombiTau <- tau[[K]] for (l1 in 1:(K-1)) { for (l2 in (l1+1):K) { postCombiTau <- t(combMat(K,l1,l2) %*% t(preCombiTau)) dEnt[l1,l2] <- sum(xlog(postCombiTau[,l1])) - sum(xlog(preCombiTau[,l1])+xlog(preCombiTau[,l2])) } } l1=which(dEnt==max(dEnt),arr.ind=TRUE)[1] l2=which(dEnt==max(dEnt),arr.ind=TRUE)[2] combiM[[K-1]] <- combMat(K,l1,l2) tau[[K-1]] = t(combiM[[K-1]] %*% t(tau[[K]])) classif[[K-1]] = map(tau[[K-1]]) } output <- list(classification = classif, combiM = combiM, combiz = tau, MclustOutput = MclustOutput) class(output) <- "clustCombi" return(output) } plot.clustCombi <- function(x, what = c("classification", "entropy", "tree"), ...) { object <- x # Argh. Really want to use object anyway if(!inherits(object, "clustCombi")) stop("object not of class 'clustCombi'") data <- object$MclustOutput$data what <- match.arg(what, several.ok = TRUE) oldpar <- par(no.readonly = TRUE) on.exit(par(oldpar)) plot.clustCombi.classification <- function(...) { # Sort z columns so that one of the two combined column is the last one at # each step (prevents the colors and symbols to be mixed as K -> K-1) curr <- 1:object$MclustOutput$G i <- numeric() j <- numeric() for(K in (object$MclustOutput$G):2) { l1 <- which(!object$combiM[[K-1]] %*% rep(1,K) == 1) l2 <- (object$combiM[[K-1]] %*% curr)[l1] - curr[l1] i <- c(curr[l1],i) j <- c(l2,j) curr <- object$combiM[[K-1]] %*% curr - l2*c(rep(0,(l1-1)),1,rep(0,(K-1-l1))) } permutMat <- function(j,K) { M <- diag(K) M[j,j] <- 0 M[K,K] <- 0 M[j,K] <- 1 M[K,j] <- 1 return(M) } combiM <- diag(object$MclustOutput$G) j <- c(1,j) i <- c(0,i) permutz <- object$MclustOutput$z[,j] par(ask=TRUE) for(K in object$MclustOutput$G:1) { curr_title <- if(K == object$MclustOutput$G) paste0("BIC solution (", as.character(K), " clusters)") else paste0("Combined solution with ", as.character(K), " clusters") if(ncol(as.matrix(data)) > 2) { par(oma = c(0,0,2,0), mar = { mar <- oldpar$mar; mar[3] <- 0.1; mar }) } else { par(mar = { mar <- oldpar$mar; mar[3] <- 2.1; mar }) } combiPlot(data = data, z = permutz, combiM = combiM, ...) if(ncol(as.matrix(data)) > 2) { title(curr_title, outer = TRUE, cex.main = 1) } else { title(curr_title, cex.main = 1) } combiM <- combMat(K,which(j==i[K]),K) %*% combiM } par(ask=FALSE) } if(interactive() & length(what) > 1) { title <- "Combined clusterings plots:" # present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) while(choice != 0) { if(what[choice] == "classification") plot.clustCombi.classification(...) if(what[choice] == "entropy") entPlot(z = object$MclustOutput$z, combiM = object$combiM, ...) if(what[choice] == "tree") combiTree(object, ...) # re-present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) } } else { if(any(what == "classification")) plot.clustCombi.classification(...) if(any(what == "entropy")) entPlot(z = object$MclustOutput$z, combiM = object$combiM, ...) if(any(what == "tree")) combiTree(object, ...) } invisible() } combiPlot <- function(data, z, combiM, ...) { p <- ncol(as.matrix(data)) if (p > 2) { clPairs(data[,1:min(5,p)], classification = map(t(combiM %*% t(z))), ...) } else if (p == 2) { mclust2Dplot(data = data, parameters = NULL, classification = map(t(combiM %*% t(z))), what = "classification", ...) } else { mclust1Dplot(data = as.matrix(data), parameters = NULL, classification = map(t(combiM %*% t(z))), what = "classification", ...) } } entPlot <- function(z, combiM, abc = c("standard", "normalized"), reg = 2, ...) { oldpar <- par(no.readonly = TRUE) on.exit(par(oldpar)) if(length(abc) > 1) par(ask=TRUE) ent <- numeric() Kmax <- ncol(z) z0 <- z for(K in Kmax:1) { z0 <- t(combiM[[K]] %*% t(z0)) ent[K] <- -sum(xlog(z0)) } if(any(abc == "normalized")) { mergedn <- numeric() z0 <- z for(K in (Kmax-1):1) { z0 <- t(combiM[[K+1]] %*% t(z0)) mergedn[K] = sum(sapply(map(z0), function(x) any(which(as.logical(combiM[[K]][rowSums(combiM[[K]])==2,]))==x))) } } if(Kmax == 2) reg <- NULL if(any(abc == "standard")) { par(mfrow=c(1,2), oma=c(0,0,3,0), mar = { mar <- oldpar$mar; mar[3] <- 0.1; mar }) plot(1:Kmax, ent, xlab = "Number of clusters", ylab = "Entropy", xaxt = "n", ...) axis(side = 1, at = 1:Kmax) if(any(reg == 2)) { pcwsreg <- pcws2_reg(1:Kmax,ent) lines(1:pcwsreg$c, pcwsreg$a1*(1:pcwsreg$c) + pcwsreg$b1, lty = 2, col = "red") lines(pcwsreg$c:Kmax, pcwsreg$a2*(pcwsreg$c:Kmax) + pcwsreg$b2, lty = 2, col = "red") } if(any(reg == 3)) { pcwsreg <- pcws3_reg(1:Kmax,ent) lines(1:pcwsreg$c1, pcwsreg$a1*(1:pcwsreg$c1) + pcwsreg$b1, lty = 2, col = "blue") lines(pcwsreg$c1:pcwsreg$c2, pcwsreg$a2*(pcwsreg$c1:pcwsreg$c2) + pcwsreg$b2, lty = 2, col = "blue") lines(pcwsreg$c2:Kmax, pcwsreg$a3*(pcwsreg$c2:Kmax) + pcwsreg$b3, lty = 2, col = "blue") } plot(1:(Kmax-1), ent[2:Kmax]-ent[1:(Kmax-1)], xlab = "Number of clusters", ylab = "Difference in entropy", xaxt = "n", ...) axis(side = 1, at = 1:(Kmax-1)) title("Entropy plot", outer=TRUE, cex.main = 1) } if(any(abc == "normalized")) { par(mfrow=c(1,2), oma=c(0,0,3,0), mar = { mar <- oldpar$mar; mar[3] <- 0.1; mar }) plot(cumsum(c(0,mergedn)), ent, xlab = "Cumul. count of merged obs.", ylab = "Entropy", ...) if(any(reg == 2)) { X <- cumsum(c(0,mergedn)) pcwsreg <- pcws2_reg(X,ent) lines(X[1:pcwsreg$c], pcwsreg$a1*(X[1:pcwsreg$c]) + pcwsreg$b1, lty = 2, col = "red") lines(X[pcwsreg$c:Kmax], pcwsreg$a2*(X[pcwsreg$c:Kmax]) + pcwsreg$b2, lty = 2, col = "red") } if(any(reg == 3)) { X <- cumsum(c(0,mergedn)) pcwsreg <- pcws3_reg(X,ent) lines(X[1:pcwsreg$c1], pcwsreg$a1*(X[1:pcwsreg$c1]) + pcwsreg$b1, lty = 2, col = "blue") lines(X[pcwsreg$c1:pcwsreg$c2], pcwsreg$a2*(X[pcwsreg$c1:pcwsreg$c2]) + pcwsreg$b2, lty = 2, col = "blue") lines(X[pcwsreg$c2:Kmax], pcwsreg$a3*(X[pcwsreg$c2:Kmax]) + pcwsreg$b3, lty = 2, col = "blue") } plot(1:(Kmax-1), (ent[2:Kmax]-ent[1:(Kmax-1)])/mergedn, xlab = "Number of clusters", ylab = "Normalized difference in entropy", xaxt = "n", ...) axis(side = 1, at = 1:(Kmax-1)) title("Normalized entropy plot", outer=TRUE, cex.main = 1) } invisible() } combiTree <- function(object, type = c("triangle", "rectangle"), yaxis = c("entropy", "step"), edgePar = list(col = "darkgray", lwd = 2), ...) { if(!inherits(object, "clustCombi")) stop("object not of class 'clustCombi'") yaxis <- match.arg(yaxis, eval(formals(combiTree)$yaxis), several.ok = FALSE) type <- match.arg(type, eval(formals(combiTree)$type), several.ok = FALSE) G <- object$MclustOutput$G combiM <- object$combiM # combiZ <- object$combiz # define merging pattern: # - negative numbers are leaves, # - positive are merged clusters (defined by row number in merge) curr <- 1:G merged <- -(1:G) merge <- matrix(NA, G-1, 2) for(k in 1:(G-1)) { Kp <- G - k + 1 l1 <- which(!combiM[[Kp-1]] %*% rep(1,Kp) == 1) l2 <- (combiM[[Kp-1]] %*% curr)[l1] - curr[l1] curr <- setdiff(curr, max(l1, l2)) merge[k,] <- merged[c(l1,l2)] merged[merged == merged[l1] | merged == merged[l2]] <- k } # order along the x-axis sel <- function(x) { if(x < 0) return(abs(x)) else return(c(sel(merge[x,1]), sel(merge[x,2]))) } ord <- abs(c(sel(merge[nrow(merge),1]), sel(merge[nrow(merge),2]))) if(yaxis == "step") { # step h <- 1:(G-1) ylab <- "Steps" } else { # entropy entropy <- sapply(rev(object$combiz), function(z) -sum(xlog(z))) # normalized negentropy h <- entropy; h <- 1 - (h - min(h))/(max(h)-min(h)); h <- h[-1] ylab <- "1 - normalised entropy" } # hclust object (see help(hclust)) hc <- list(merge = merge, # mergin matrix height = h, # define merge heights order = ord, # order of leaves labels = 1:G) # labels of leaves class(hc) <- "hclust" # make it an hclust object # plot(hc, hang = -1) # look at the result # convert to a dendrogram object dendro <- as.dendrogram(hc) plot(dendro, type = type, edgePar = edgePar, ylab = ylab, ...) invisible(dendro) } # pcws2_reg computes the piecewise linear regression -- with two pieces -- to (x,y), for any possible change point and chooses the one leading to the smallest least-square error. pcws2_reg <- function(x, y) { C <- length(x) ssBest = Inf for (c in 2:(C-1)) { x1 <- x[1:c] y1 <- y[1:c] x2 <- x[c:C] y2 <- y[c:C] a1 <- sum((x1-mean(x1))*(y1-mean(y1)))/sum((x1-mean(x1))^2) b1 <- -a1 * mean(x1) + mean(y1) a2 <- sum((x2-mean(x2))*(y2-mean(y2)))/sum((x2-mean(x2))^2) b2 <- -a2 * mean(x2) + mean(y2) ss <- sum((a1*x1+b1-y1)^2) + sum((a2*x2+b2-y2)^2) if (ss < ssBest) { ssBest <- ss cBest <- c a1Best <- a1 a2Best <- a2 b1Best <- b1 b2Best <- b2 } } return(list(c=cBest, a1=a1Best, b1=b1Best, a2=a2Best, b2=b2Best, residuals = c(a1*x1+b1-y1,a2*x2+b2-y2))) } # pcws3_reg computes the piecewise linear regression -- with three pieces -- to (x,y), for any possible change points and chooses the ones leading to the smallest least-square error. pcws3_reg <- function(x, y) { C <- length(x) ssBest = Inf for (c1 in 2:(C-2)) { for (c2 in (c1+1):(C-1)) { x1 <- x[1:c1] y1 <- y[1:c1] x2 <- x[c1:c2] y2 <- y[c1:c2] x3 <- x[c2:C] y3 <- y[c2:C] a1 <- sum((x1-mean(x1))*(y1-mean(y1)))/sum((x1-mean(x1))^2) b1 <- -a1 * mean(x1) + mean(y1) a2 <- sum((x2-mean(x2))*(y2-mean(y2)))/sum((x2-mean(x2))^2) b2 <- -a2 * mean(x2) + mean(y2) a3 <- sum((x3-mean(x3))*(y3-mean(y3)))/sum((x3-mean(x3))^2) b3 <- -a3 * mean(x3) + mean(y3) ss <- sum((a1*x1+b1-y1)^2) + sum((a2*x2+b2-y2)^2) + sum((a3*x3+b3-y3)^2) if (ss < ssBest) { ssBest <- ss c1Best <- c1 c2Best <- c2 a1Best <- a1 b1Best <- b1 a2Best <- a2 b2Best <- b2 a3Best <- a3 b3Best <- b3 } } } return(list(c1=c1Best, c2=c2Best, a1=a1Best, b1=b1Best, a2=a2Best, b2=b2Best, a3=a3Best, b3=b3Best, residuals = c(a1*x1+b1-y1,a2*x2+b2-y2,a3*x3+b3-y3))) } # print.clustCombi <- function(x, ...) # { # output <- x # Argh. Really want to use 'output' # cat("\n EM/BIC Solution\n") # cat(" --------------- \n\n") # cat("Number of components: ", as.character(output$MclustOutput$G), "\n", sep = "") # # cat("Model name: ", output$MclustOutput$parameters$var$modelName, "\n\n", sep="") # for (K in 1:output$MclustOutput$G) # { # cat("Component num.", as.character(K),": ", "\n", sep="") # cat(" proportion: ", sprintf(fmt = "%4.2f ", output$MclustOutput$parameters$pro[K]), "\n", sep="") # if (output$Mclust$d == 1) cat(" mean: ", sprintf(fmt = "%4.2f ", output$MclustOutput$parameters$mean[K]), "\n", sep="") else cat(" mean: ", sprintf(fmt = "%4.2f ", output$MclustOutput$parameters$mean[,K]), "\n", sep="") # } # # cat("\n Combining steps \n") # cat(" --------------- \n\n") # # cl = paste(rep(" ", max(output$MclustOutput$G-4,0)), "Classes labels after this step", rep(" ", max(output$MclustOutput$G-4,0)), sep="") # # if (output$MclustOutput$G>4) for (K in 5:output$MclustOutput$G) cl = paste(" ", cl, " ", sep="") # # cat(" Step | Classes combined at this step | Classes labels after this step", "\n", sep="") # cat("-------|-------------------------------|-------------------------------", "\n", sep="") # curr = 1:output$MclustOutput$G # # cat(" 0 | --- |", sprintf(fmt = "%2d ", curr), "\n", sep="") # # for (K in 1:(output$MclustOutput$G-1)) # { # Kp = output$MclustOutput$G - K + 1 # l1 = which(!output$combiM[[Kp-1]] %*% rep(1,Kp) == 1) # l2 = (output$combiM[[Kp-1]] %*% curr)[l1] - curr[l1] # # nc1 = floor((7-nchar(as.character(K)))/2) # nc2 = (7-nchar(as.character(K))) - nc1 # nc3 = floor((33-nchar(paste(as.character(c(l1)), " & ", as.character(l2))))/2) # nc4 = 33-nchar(paste(as.character(c(l1)), " & ", as.character(l2))) - nc3 # # curr <- output$combiM[[Kp-1]] %*% curr - l2*c(rep(0,(l1-1)),1,rep(0,(Kp-1-l1))) # # cat(rep(" ", nc1), as.character(K), rep(" ", nc2), "|", rep(" ", nc3), as.character(l1), " & ", as.character(l2), rep(" ", nc4), "|", sprintf(fmt = "%2d ", curr), "\n", sep="") # # } # # cat("\n Classification for K classes: output$classification[[K]]\n") # cat(" Combining matrix (K classes -> (K-1) classes): output$combiM[[K]]\n\n") # } print.clustCombi <- function(x, digits = getOption("digits"), ...) { cat("\'", class(x)[1], "\' object:\n", sep = "") cat(paste0(" Mclust model: (", x$MclustOutput$modelName, ",", x$MclustOutput$G, ")\n")) cat(" Available object components: ") cat(names(x), "\n") cat(" Combining matrix (K+1 classes -> K classes): $combiM[[K]]\n") cat(" Classification for K classes: $classification[[K]]\n") invisible() } summary.clustCombi <- function(object, ...) { title <- paste("Combining Gaussian mixture components for clustering") out <- with(object, list(title = title, MclustModelName = object$MclustOutput$modelName, MclustG = object$MclustOutput$G, combiM = object$combiM)) class(out) <- "summary.clustCombi" return(out) } print.summary.clustCombi <- function(x, digits = getOption("digits"), ...) { cat(rep("-", nchar(x$title)),"\n",sep="") cat(x$title, "\n") cat(rep("-", nchar(x$title)),"\n",sep="") # cat("\nMclust model name:", x$MclustModelName, "\n") cat("Number of components:", x$MclustG, "\n") # cat("\nCombining steps:\n\n") # cl <- paste(rep(" ", max(x$MclustG-4,0)), # "Class labels after this step", # rep(" ", max(x$MclustG-4,0)), sep="") # # if(x$MclustG>4) # for(K in 5:x$MclustG) # cl <- paste(" ", cl, " ", sep="") cat(" Step | Classes combined at this step | Class labels after this step", "\n", sep="") cat("-------|-------------------------------|-----------------------------", "\n", sep="") curr <- 1:x$MclustG cat(" 0 | --- | ", sprintf(fmt = "%d ", curr), "\n", sep="") for(K in 1:(x$MclustG-1)) { Kp = x$MclustG - K + 1 l1 = which(!x$combiM[[Kp-1]] %*% rep(1,Kp) == 1) l2 = (x$combiM[[Kp-1]] %*% curr)[l1] - curr[l1] nc1 = floor((7-nchar(as.character(K)))/2) nc2 = (7-nchar(as.character(K))) - nc1 nc3 = floor((33-nchar(paste(as.character(c(l1)), " & ", as.character(l2))))/2) nc4 = 33-nchar(paste(as.character(c(l1)), " & ", as.character(l2))) - nc3 curr <- x$combiM[[Kp-1]] %*% curr - l2*c(rep(0,(l1-1)),1,rep(0,(Kp-1-l1))) cat(rep(" ", nc1), as.character(K), rep(" ", nc2), "|", rep(" ", nc3), as.character(l1), " & ", as.character(l2), rep(" ", nc4), "| ", sprintf(fmt = "%d ", curr), "\n", sep="") } invisible() } clustCombiOptim <- function(object, reg = 2, plot = FALSE, ...) { # Return the optimal number of clusters suggested by the method based on the # entropy and discussed in reference given in help(clustCombi). # # object = "clustCombi" object # reg = see help(entPlot) z <- object$MclustOutput$z combiM <- object$combiM ent <- rep(as.double(NA, nrow(z))) Kmax <- ncol(z) z0 <- z for(K in Kmax:1) { z0 <- t(combiM[[K]] %*% t(z0)) ent[K] <- -sum(xlog(z0)) } if(Kmax == 2) { # reg <- NULL # in the original code # my modification to get however a result reg <- 1 pcwsreg <- list(K = Kmax) } if(reg == 2) { pcwsreg <- pcws2_reg(1:Kmax, ent) } if(reg == 3) { pcwsreg <- pcws3_reg(1:Kmax, ent) } if(plot) { plot(1:Kmax, ent, xlab = "Number of clusters", ylab = "Entropy", panel.first = grid(), xaxt = "n", ...) axis(side = 1, at = 1:Kmax) if(reg == 2) { lines(1:pcwsreg$c, pcwsreg$a1 * (1:pcwsreg$c) + pcwsreg$b1, lty = 2, col = "red") lines(pcwsreg$c:Kmax, pcwsreg$a2 * (pcwsreg$c:Kmax) + pcwsreg$b2, lty = 2, col = "red") } if(reg == 3) { lines(1:pcwsreg$c1, pcwsreg$a1 * (1:pcwsreg$c1) + pcwsreg$b1, lty = 2, col = "blue") lines(pcwsreg$c1:pcwsreg$c2, pcwsreg$a2 * (pcwsreg$c1:pcwsreg$c2) + pcwsreg$b2, lty = 2, col = "blue") lines(pcwsreg$c2:Kmax, pcwsreg$a3 * (pcwsreg$c2:Kmax) + pcwsreg$b3, lty = 2, col = "blue") } } K <- pcwsreg[[1]] z0 <- z for(K in Kmax:K) { z0 <- t(combiM[[K]] %*% t(z0)) } out <- list(numClusters.combi = K, z.combi = z0, cluster.combi = map(z0)) return(out) } mclust/inst/0000755000175000017500000000000014157117042012672 5ustar nileshnileshmclust/inst/CITATION0000644000175000017500000000156214051467050014033 0ustar nileshnileshcitHeader("To cite 'mclust' R package in publications, please use:") citEntry(entry = "Article", title = "{mclust} 5: clustering, classification and density estimation using {G}aussian finite mixture models", author = personList(person(given="Luca", family="Scrucca"), person(given="Michael", family="Fop"), person(given=c("T.", "Brendan"), family="Murphy"), person(given=c("Adrian", "E."), family="Raftery")), journal = "The {R} Journal", year = "2016", volume = "8", number = "1", pages = "289--317", url="https://doi.org/10.32614/RJ-2016-021", # textVersion = paste("Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016)", "mclust 5: clustering, classification and density estimation using Gaussian finite mixture models", "The R Journal", "8/1, pp. 289-317")) mclust/inst/doc/0000755000175000017500000000000014157117041013436 5ustar nileshnileshmclust/inst/doc/mclust.html0000644000175000017500001335155414157117041015654 0ustar nileshnilesh A quick tour of mclust

A quick tour of mclust

Luca Scrucca

17 Dec 2021

Introduction

mclust is a contributed R package for model-based clustering, classification, and density estimation based on finite normal mixture modelling. It provides functions for parameter estimation via the EM algorithm for normal mixture models with a variety of covariance structures, and functions for simulation from these models. Also included are functions that combine model-based hierarchical clustering, EM for mixture estimation and the Bayesian Information Criterion (BIC) in comprehensive strategies for clustering, density estimation and discriminant analysis. Additional functionalities are available for displaying and visualizing fitted models along with clustering, classification, and density estimation results.

This document gives a quick tour of mclust (version 5.4.9) functionalities. It was written in R Markdown, using the knitr package for production. See help(package="mclust") for further details and references provided by citation("mclust").

library(mclust)
##     __  ___________    __  _____________
##    /  |/  / ____/ /   / / / / ___/_  __/
##   / /|_/ / /   / /   / / / /\__ \ / /   
##  / /  / / /___/ /___/ /_/ /___/ // /    
## /_/  /_/\____/_____/\____//____//_/    version 5.4.9
## Type 'citation("mclust")' for citing this R package in publications.

Clustering

data(diabetes)
class <- diabetes$class
table(class)
## class
## Chemical   Normal    Overt 
##       36       76       33
X <- diabetes[,-1]
head(X)
##   glucose insulin sspg
## 1      80     356  124
## 2      97     289  117
## 3     105     319  143
## 4      90     356  199
## 5      90     323  240
## 6      86     381  157
clPairs(X, class)


BIC <- mclustBIC(X)
plot(BIC)

summary(BIC)
## Best BIC values:
##              VVV,3       VVV,4       EVE,6
## BIC      -4751.316 -4784.32213 -4785.24591
## BIC diff     0.000   -33.00573   -33.92951

mod1 <- Mclust(X, x = BIC)
summary(mod1, parameters = TRUE)
## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust VVV (ellipsoidal, varying volume, shape, and orientation) model with 3
## components: 
## 
##  log-likelihood   n df       BIC       ICL
##       -2303.496 145 29 -4751.316 -4770.169
## 
## Clustering table:
##  1  2  3 
## 81 36 28 
## 
## Mixing probabilities:
##         1         2         3 
## 0.5368974 0.2650129 0.1980897 
## 
## Means:
##              [,1]     [,2]       [,3]
## glucose  90.96239 104.5335  229.42136
## insulin 357.79083 494.8259 1098.25990
## sspg    163.74858 309.5583   81.60001
## 
## Variances:
## [,,1]
##          glucose    insulin       sspg
## glucose 57.18044   75.83206   14.73199
## insulin 75.83206 2101.76553  322.82294
## sspg    14.73199  322.82294 2416.99074
## [,,2]
##           glucose   insulin       sspg
## glucose  185.0290  1282.340  -509.7313
## insulin 1282.3398 14039.283 -2559.0251
## sspg    -509.7313 -2559.025 23835.7278
## [,,3]
##           glucose   insulin       sspg
## glucose  5529.250  20389.09  -2486.208
## insulin 20389.088  83132.48 -10393.004
## sspg    -2486.208 -10393.00   2217.533

plot(mod1, what = "classification")

table(class, mod1$classification)
##           
## class       1  2  3
##   Chemical  9 26  1
##   Normal   72  4  0
##   Overt     0  6 27

plot(mod1, what = "uncertainty")


ICL <- mclustICL(X)
summary(ICL)
## Best ICL values:
##              VVV,3       EVE,6       EVE,7
## ICL      -4770.169 -4797.38232 -4797.50566
## ICL diff     0.000   -27.21342   -27.33677
plot(ICL)


LRT <- mclustBootstrapLRT(X, modelName = "VVV")
LRT
## ------------------------------------------------------------- 
## Bootstrap sequential LRT for the number of mixture components 
## ------------------------------------------------------------- 
## Model        = VVV 
## Replications = 999 
##               LRTS bootstrap p-value
## 1 vs 2   361.16739             0.001
## 2 vs 3   123.49685             0.001
## 3 vs 4    16.76161             0.498

Initialisation

EM algorithm is used by mclust for maximum likelihood estimation. Initialisation of EM is performed using the partitions obtained from agglomerative hierarchical clustering. For details see help(mclustBIC) or help(Mclust), and help(hc).

(hc1 <- hc(X, modelName = "VVV", use = "SVD"))
## Call:
## hc(data = X, modelName = "VVV", use = "SVD") 
## 
## Model-Based Agglomerative Hierarchical Clustering 
## Model name        = VVV 
## Use               = SVD 
## Number of objects = 145
BIC1 <- mclustBIC(X, initialization = list(hcPairs = hc1)) # default 
summary(BIC1)
## Best BIC values:
##              VVV,3       VVV,4       EVE,6
## BIC      -4751.316 -4784.32213 -4785.24591
## BIC diff     0.000   -33.00573   -33.92951

(hc2 <- hc(X, modelName = "VVV", use = "VARS"))
## Call:
## hc(data = X, modelName = "VVV", use = "VARS") 
## 
## Model-Based Agglomerative Hierarchical Clustering 
## Model name        = VVV 
## Use               = VARS 
## Number of objects = 145
BIC2 <- mclustBIC(X, initialization = list(hcPairs = hc2))
summary(BIC2)
## Best BIC values:
##              VVV,3       VVE,3       EVE,4
## BIC      -4760.091 -4775.53693 -4793.26143
## BIC diff     0.000   -15.44628   -33.17079

(hc3 <- hc(X, modelName = "EEE", use = "SVD"))
## Call:
## hc(data = X, modelName = "EEE", use = "SVD") 
## 
## Model-Based Agglomerative Hierarchical Clustering 
## Model name        = EEE 
## Use               = SVD 
## Number of objects = 145
BIC3 <- mclustBIC(X, initialization = list(hcPairs = hc3))
summary(BIC3)
## Best BIC values:
##              VVV,3        VVE,4       VVE,3
## BIC      -4751.354 -4757.091572 -4775.69587
## BIC diff     0.000    -5.737822   -24.34212

Update BIC by merging the best results:

BIC <- mclustBICupdate(BIC1, BIC2, BIC3)
summary(BIC)
## Best BIC values:
##              VVV,3        VVE,4       VVE,3
## BIC      -4751.316 -4757.091572 -4775.53693
## BIC diff     0.000    -5.775172   -24.22053
plot(BIC)

Univariate fit using random starting points obtained by creating random agglomerations (see help(hcRandomPairs)) and merging best results:

data(galaxies, package = "MASS") 
galaxies <- galaxies / 1000
BIC <- NULL
for(j in 1:20)
{
  rBIC <- mclustBIC(galaxies, verbose = FALSE,
                    initialization = list(hcPairs = hcRandomPairs(galaxies)))
  BIC <- mclustBICupdate(BIC, rBIC)
}
summary(BIC)
## Best BIC values:
##                V,3         V,4        V,5
## BIC      -441.6122 -443.399746 -446.34966
## BIC diff    0.0000   -1.787536   -4.73745
plot(BIC)

mod <- Mclust(galaxies, x = BIC)
summary(mod)
## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust V (univariate, unequal variance) model with 3 components: 
## 
##  log-likelihood  n df       BIC       ICL
##       -203.1792 82  8 -441.6122 -441.6126
## 
## Clustering table:
##  1  2  3 
##  3  7 72

Classification

EDDA

data(iris)
class <- iris$Species
table(class)
## class
##     setosa versicolor  virginica 
##         50         50         50
X <- iris[,1:4]
head(X)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1          5.1         3.5          1.4         0.2
## 2          4.9         3.0          1.4         0.2
## 3          4.7         3.2          1.3         0.2
## 4          4.6         3.1          1.5         0.2
## 5          5.0         3.6          1.4         0.2
## 6          5.4         3.9          1.7         0.4
mod2 <- MclustDA(X, class, modelType = "EDDA")
summary(mod2)
## ------------------------------------------------ 
## Gaussian finite mixture model for classification 
## ------------------------------------------------ 
## 
## EDDA model summary: 
## 
##  log-likelihood   n df       BIC
##       -187.7097 150 36 -555.8024
##             
## Classes       n     % Model G
##   setosa     50 33.33   VEV 1
##   versicolor 50 33.33   VEV 1
##   virginica  50 33.33   VEV 1
## 
## Training confusion matrix:
##             Predicted
## Class        setosa versicolor virginica
##   setosa         50          0         0
##   versicolor      0         47         3
##   virginica       0          0        50
## Classification error = 0.02 
## Brier score          = 0.0127
plot(mod2, what = "scatterplot")

plot(mod2, what = "classification")

MclustDA

data(banknote)
class <- banknote$Status
table(class)
## class
## counterfeit     genuine 
##         100         100
X <- banknote[,-1]
head(X)
##   Length  Left Right Bottom  Top Diagonal
## 1  214.8 131.0 131.1    9.0  9.7    141.0
## 2  214.6 129.7 129.7    8.1  9.5    141.7
## 3  214.8 129.7 129.7    8.7  9.6    142.2
## 4  214.8 129.7 129.6    7.5 10.4    142.0
## 5  215.0 129.6 129.7   10.4  7.7    141.8
## 6  215.7 130.8 130.5    9.0 10.1    141.4
mod3 <- MclustDA(X, class)
summary(mod3)
## ------------------------------------------------ 
## Gaussian finite mixture model for classification 
## ------------------------------------------------ 
## 
## MclustDA model summary: 
## 
##  log-likelihood   n df       BIC
##       -646.0801 200 66 -1641.849
##              
## Classes         n  % Model G
##   counterfeit 100 50   EVE 2
##   genuine     100 50   XXX 1
## 
## Training confusion matrix:
##              Predicted
## Class         counterfeit genuine
##   counterfeit         100       0
##   genuine               0     100
## Classification error = 0 
## Brier score          = 0
plot(mod3, what = "scatterplot")

plot(mod3, what = "classification")

Cross-validation error

cv <- cvMclustDA(mod2, nfold = 10)
str(cv)
## List of 6
##  $ classification: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ z             : num [1:150, 1:3] 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:3] "setosa" "versicolor" "virginica"
##  $ ce            : num 0.0267
##  $ se.ce         : num 0.0109
##  $ brier         : num 0.0208
##  $ se.brier      : num 0.00738
unlist(cv[3:6])
##          ce       se.ce       brier    se.brier 
## 0.026666667 0.010886621 0.020795887 0.007383247
cv <- cvMclustDA(mod3, nfold = 10)
str(cv)
## List of 6
##  $ classification: Factor w/ 2 levels "counterfeit",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ z             : num [1:200, 1:2] 1.56e-06 3.50e-19 5.41e-28 3.33e-20 2.42e-29 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:2] "counterfeit" "genuine"
##  $ ce            : num 0.005
##  $ se.ce         : num 0.005
##  $ brier         : num 0.00514
##  $ se.brier      : num 0.00498
unlist(cv[3:6])
##          ce       se.ce       brier    se.brier 
## 0.005000000 0.005000000 0.005135796 0.004980123

Density estimation

Univariate

data(acidity)
mod4 <- densityMclust(acidity)

summary(mod4)
## ------------------------------------------------------- 
## Density estimation via Gaussian finite mixture modeling 
## ------------------------------------------------------- 
## 
## Mclust E (univariate, equal variance) model with 2 components: 
## 
##  log-likelihood   n df       BIC       ICL
##       -185.9493 155  4 -392.0723 -398.5554
plot(mod4, what = "BIC")

plot(mod4, what = "density", data = acidity, breaks = 15)

plot(mod4, what = "diagnostic", type = "cdf")

plot(mod4, what = "diagnostic", type = "qq")

Multivariate

data(faithful)
mod5 <- densityMclust(faithful)

summary(mod5)
## ------------------------------------------------------- 
## Density estimation via Gaussian finite mixture modeling 
## ------------------------------------------------------- 
## 
## Mclust EEE (ellipsoidal, equal volume, shape and orientation) model with 3
## components: 
## 
##  log-likelihood   n df       BIC       ICL
##       -1126.326 272 11 -2314.316 -2357.824
plot(mod5, what = "BIC")

plot(mod5, what = "density", type = "hdr", data = faithful, points.cex = 0.5)

plot(mod5, what = "density", type = "persp")

Bootstrap inference

boot1 <- MclustBootstrap(mod1, nboot = 999, type = "bs")
summary(boot1, what = "se")
## ---------------------------------------------------------- 
## Resampling standard errors 
## ---------------------------------------------------------- 
## Model                      = VVV 
## Num. of mixture components = 3 
## Replications               = 999 
## Type                       = nonparametric bootstrap 
## 
## Mixing probabilities:
##          1          2          3 
## 0.05185780 0.05058160 0.03559685 
## 
## Means:
##                1         2         3
## glucose 1.042239  3.444948 16.340816
## insulin 7.554105 29.047203 63.483315
## sspg    7.669033 31.684647  9.926121
## 
## Variances:
## [,,1]
##          glucose   insulin      sspg
## glucose 10.78177  51.28084  51.61617
## insulin 51.28084 529.62298 416.38176
## sspg    51.61617 416.38176 623.81098
## [,,2]
##           glucose   insulin      sspg
## glucose  65.66172  616.6785  442.0993
## insulin 616.67852 7279.0671 3240.3558
## sspg    442.09927 3240.3558 7070.4152
## [,,3]
##           glucose   insulin      sspg
## glucose 1045.6542  4178.685  667.2709
## insulin 4178.6846 18873.253 2495.0278
## sspg     667.2709  2495.028  506.8173
summary(boot1, what = "ci")
## ---------------------------------------------------------- 
## Resampling confidence intervals 
## ---------------------------------------------------------- 
## Model                      = VVV 
## Num. of mixture components = 3 
## Replications               = 999 
## Type                       = nonparametric bootstrap 
## Confidence level           = 0.95 
## 
## Mixing probabilities:
##               1         2         3
## 2.5%  0.4490043 0.1510533 0.1324862
## 97.5% 0.6518326 0.3548749 0.2688038
## 
## Means:
## [,,1]
##        glucose  insulin     sspg
## 2.5%  89.13950 344.9890 150.8405
## 97.5% 93.16603 374.7221 181.8322
## [,,2]
##         glucose  insulin     sspg
## 2.5%   98.82567 447.4121 257.9011
## 97.5% 112.28459 561.3273 374.6194
## [,,3]
##        glucose   insulin      sspg
## 2.5%  198.5986  969.6231  63.22103
## 97.5% 263.2932 1226.2654 101.09078
## 
## Variances:
## [,,1]
##        glucose  insulin     sspg
## 2.5%  38.65508 1234.198 1514.416
## 97.5% 79.43401 3287.722 4146.024
## [,,2]
##         glucose   insulin     sspg
## 2.5%   88.35268  3514.662 12583.92
## 97.5% 358.15175 31416.557 39228.47
## [,,3]
##        glucose   insulin     sspg
## 2.5%  3377.773  47477.74 1317.041
## 97.5% 7379.344 120297.75 3229.747
plot(boot1, what = "pro")
plot(boot1, what = "mean")

boot4 <- MclustBootstrap(mod4, nboot = 999, type = "bs")
summary(boot4, what = "se")
## ---------------------------------------------------------- 
## Resampling standard errors 
## ---------------------------------------------------------- 
## Model                      = E 
## Num. of mixture components = 2 
## Replications               = 999 
## Type                       = nonparametric bootstrap 
## 
## Mixing probabilities:
##          1          2 
## 0.04130937 0.04130937 
## 
## Means:
##          1          2 
## 0.04669993 0.06719883 
## 
## Variances:
##          1          2 
## 0.02376885 0.02376885
summary(boot4, what = "ci")
## ---------------------------------------------------------- 
## Resampling confidence intervals 
## ---------------------------------------------------------- 
## Model                      = E 
## Num. of mixture components = 2 
## Replications               = 999 
## Type                       = nonparametric bootstrap 
## Confidence level           = 0.95 
## 
## Mixing probabilities:
##               1         2
## 2.5%  0.5364895 0.3004131
## 97.5% 0.6995869 0.4635105
## 
## Means:
##              1        2
## 2.5%  4.279055 6.184439
## 97.5% 4.461108 6.449465
## 
## Variances:
##               1         2
## 2.5%  0.1395796 0.1395796
## 97.5% 0.2317769 0.2317769
plot(boot4, what = "pro")
plot(boot4, what = "mean")

Dimension reduction

Clustering

mod1dr <- MclustDR(mod1)
summary(mod1dr)
## ----------------------------------------------------------------- 
## Dimension reduction for model-based clustering and classification 
## ----------------------------------------------------------------- 
## 
## Mixture model type: Mclust (VVV, 3) 
##         
## Clusters  n
##        1 81
##        2 36
##        3 28
## 
## Estimated basis vectors: 
##              Dir1     Dir2
## glucose  0.764699  0.86359
## insulin -0.643961 -0.22219
## sspg     0.023438 -0.45260
## 
##                Dir1      Dir2
## Eigenvalues  1.2629   0.35218
## Cum. %      78.1939 100.00000
plot(mod1dr, what = "pairs")

plot(mod1dr, what = "boundaries", ngrid = 200)


mod1dr <- MclustDR(mod1, lambda = 1)
summary(mod1dr)
## ----------------------------------------------------------------- 
## Dimension reduction for model-based clustering and classification 
## ----------------------------------------------------------------- 
## 
## Mixture model type: Mclust (VVV, 3) 
##         
## Clusters  n
##        1 81
##        2 36
##        3 28
## 
## Estimated basis vectors: 
##              Dir1     Dir2
## glucose  0.764699  0.86359
## insulin -0.643961 -0.22219
## sspg     0.023438 -0.45260
## 
##                Dir1      Dir2
## Eigenvalues  1.2629   0.35218
## Cum. %      78.1939 100.00000
plot(mod1dr, what = "scatterplot")

plot(mod1dr, what = "boundaries", ngrid = 200)

Classification

mod2dr <- MclustDR(mod2)
summary(mod2dr)
## ----------------------------------------------------------------- 
## Dimension reduction for model-based clustering and classification 
## ----------------------------------------------------------------- 
## 
## Mixture model type: EDDA 
##             
## Classes       n Model G
##   setosa     50   VEV 1
##   versicolor 50   VEV 1
##   virginica  50   VEV 1
## 
## Estimated basis vectors: 
##                  Dir1      Dir2
## Sepal.Length  0.20874 -0.006532
## Sepal.Width   0.38620 -0.586611
## Petal.Length -0.55401  0.252562
## Petal.Width  -0.70735 -0.769453
## 
##                Dir1       Dir2
## Eigenvalues  1.8813   0.098592
## Cum. %      95.0204 100.000000
plot(mod2dr, what = "scatterplot")

plot(mod2dr, what = "boundaries", ngrid = 200)


mod3dr <- MclustDR(mod3)
summary(mod3dr)
## ----------------------------------------------------------------- 
## Dimension reduction for model-based clustering and classification 
## ----------------------------------------------------------------- 
## 
## Mixture model type: MclustDA 
##              
## Classes         n Model G
##   counterfeit 100   EVE 2
##   genuine     100   XXX 1
## 
## Estimated basis vectors: 
##              Dir1     Dir2
## Length   -0.07016 -0.25690
## Left     -0.36888 -0.19963
## Right     0.29525 -0.10111
## Bottom    0.54683  0.46254
## Top       0.55720  0.41370
## Diagonal -0.40290  0.70628
## 
##                Dir1     Dir2
## Eigenvalues  1.7188   1.0607
## Cum. %      61.8373 100.0000
plot(mod3dr, what = "scatterplot")

plot(mod3dr, what = "boundaries", ngrid = 200)


Using colorblind-friendly palettes

Most of the graphs produced by mclust use colors that by default are defined in the following options:

mclust.options("bicPlotColors")
##       EII       VII       EEI       EVI       VEI       VVI       EEE       VEE 
##    "gray"   "black" "#218B21" "#41884F" "#508476" "#58819C" "#597DC3" "#5178EA" 
##       EVE       VVE       EEV       VEV       EVV       VVV         E         V 
## "#716EE7" "#9B60B8" "#B2508B" "#C03F60" "#C82A36" "#CC0000"    "gray"   "black"
mclust.options("classPlotColors")
##  [1] "dodgerblue2"    "red3"           "green3"         "slateblue"     
##  [5] "darkorange"     "skyblue1"       "violetred4"     "forestgreen"   
##  [9] "steelblue4"     "slategrey"      "brown"          "black"         
## [13] "darkseagreen"   "darkgoldenrod3" "olivedrab"      "royalblue"     
## [17] "tomato4"        "cyan2"          "springgreen2"

The first option controls colors used for plotting BIC, ICL, etc. curves, whereas the second option is used to assign colors for indicating clusters or classes when plotting data.

Starting with R version 4.0, the function can be used for retrieving colors from some pre-defined palettes. For instance

palette.colors(palette = "Okabe-Ito")

returns a color-blind-friendly palette for individuals suffering from protanopia or deuteranopia, the two most common forms of inherited color blindness. For earlier versions of R such palette can be defined as:

cbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", 
"#D55E00", "#CC79A7", "#999999")

and then assigned to the mclust options as follows:

bicPlotColors <- mclust.options("bicPlotColors")
bicPlotColors[1:14] <- c(cbPalette, cbPalette[1:5])
mclust.options("bicPlotColors" = bicPlotColors)
mclust.options("classPlotColors" = cbPalette[-1])

clPairs(iris[,-5], iris$Species)

mod <- Mclust(iris[,-5])
plot(mod, what = "BIC")

plot(mod, what = "classification")

If needed, users can easily define their own palettes following the same procedure outlined above.



References

Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, The R Journal, 8/1, pp. 205-233. https://journal.r-project.org/archive/2016/RJ-2016-021/RJ-2016-021.pdf

Fraley C. and Raftery A. E. (2002) Model-based clustering, discriminant analysis and density estimation, Journal of the American Statistical Association, 97/458, pp. 611-631.

Fraley C., Raftery A. E., Murphy T. B. and Scrucca L. (2012) mclust Version 4 for R: Normal Mixture Modeling for Model-Based Clustering, Classification, and Density Estimation. Technical Report No. 597, Department of Statistics, University of Washington.


sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] mclust_5.4.9 knitr_1.36  
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.28   R6_2.5.1        jsonlite_1.7.2  magrittr_2.0.1 
##  [5] evaluate_0.14   highr_0.9       rlang_0.4.12    stringi_1.7.5  
##  [9] jquerylib_0.1.4 bslib_0.3.1     rmarkdown_2.11  tools_4.1.0    
## [13] stringr_1.4.0   xfun_0.28       yaml_2.2.1      fastmap_1.1.0  
## [17] compiler_4.1.0  htmltools_0.5.2 sass_0.4.0
mclust/inst/doc/mclust.Rmd0000644000175000017500000001777714141212044015424 0ustar nileshnilesh--- title: "A quick tour of mclust" author: "Luca Scrucca" date: "`r format(Sys.time(), '%d %b %Y')`" output: rmarkdown::html_vignette: toc: true number_sections: false css: "vignette.css" vignette: > %\VignetteIndexEntry{A quick tour of mclust} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} library(knitr) opts_chunk$set(fig.align = "center", out.width = "90%", fig.width = 6, fig.height = 5, dev.args = list(pointsize=10), par = TRUE, # needed for setting hook collapse = TRUE, # collapse input & output code in chunks warning = FALSE) knit_hooks$set(par = function(before, options, envir) { if(before && options$fig.show != "none") par(family = "sans", mar=c(4.1,4.1,1.1,1.1), mgp=c(3,1,0), tcl=-0.5) }) set.seed(1) # for exact reproducibility ``` # Introduction **mclust** is a contributed R package for model-based clustering, classification, and density estimation based on finite normal mixture modelling. It provides functions for parameter estimation via the EM algorithm for normal mixture models with a variety of covariance structures, and functions for simulation from these models. Also included are functions that combine model-based hierarchical clustering, EM for mixture estimation and the Bayesian Information Criterion (BIC) in comprehensive strategies for clustering, density estimation and discriminant analysis. Additional functionalities are available for displaying and visualizing fitted models along with clustering, classification, and density estimation results. This document gives a quick tour of **mclust** (version `r packageVersion("mclust")`) functionalities. It was written in R Markdown, using the [knitr](https://cran.r-project.org/package=knitr) package for production. See `help(package="mclust")` for further details and references provided by `citation("mclust")`. ```{r, message = FALSE, echo=-2} library(mclust) cat(mclust:::mclustStartupMessage(), sep="") ``` # Clustering ```{r} data(diabetes) class <- diabetes$class table(class) X <- diabetes[,-1] head(X) clPairs(X, class) BIC <- mclustBIC(X) plot(BIC) summary(BIC) mod1 <- Mclust(X, x = BIC) summary(mod1, parameters = TRUE) plot(mod1, what = "classification") table(class, mod1$classification) plot(mod1, what = "uncertainty") ICL <- mclustICL(X) summary(ICL) plot(ICL) LRT <- mclustBootstrapLRT(X, modelName = "VVV") LRT ``` ## Initialisation EM algorithm is used by **mclust** for maximum likelihood estimation. Initialisation of EM is performed using the partitions obtained from agglomerative hierarchical clustering. For details see `help(mclustBIC)` or `help(Mclust)`, and `help(hc)`. ```{r} (hc1 <- hc(X, modelName = "VVV", use = "SVD")) BIC1 <- mclustBIC(X, initialization = list(hcPairs = hc1)) # default summary(BIC1) (hc2 <- hc(X, modelName = "VVV", use = "VARS")) BIC2 <- mclustBIC(X, initialization = list(hcPairs = hc2)) summary(BIC2) (hc3 <- hc(X, modelName = "EEE", use = "SVD")) BIC3 <- mclustBIC(X, initialization = list(hcPairs = hc3)) summary(BIC3) ``` Update BIC by merging the best results: ```{r} BIC <- mclustBICupdate(BIC1, BIC2, BIC3) summary(BIC) plot(BIC) ``` Univariate fit using random starting points obtained by creating random agglomerations (see `help(hcRandomPairs)`) and merging best results: ```{r, echo=-1} set.seed(20181116) data(galaxies, package = "MASS") galaxies <- galaxies / 1000 BIC <- NULL for(j in 1:20) { rBIC <- mclustBIC(galaxies, verbose = FALSE, initialization = list(hcPairs = hcRandomPairs(galaxies))) BIC <- mclustBICupdate(BIC, rBIC) } summary(BIC) plot(BIC) mod <- Mclust(galaxies, x = BIC) summary(mod) ``` # Classification ## EDDA ```{r} data(iris) class <- iris$Species table(class) X <- iris[,1:4] head(X) mod2 <- MclustDA(X, class, modelType = "EDDA") summary(mod2) plot(mod2, what = "scatterplot") plot(mod2, what = "classification") ``` ## MclustDA ```{r} data(banknote) class <- banknote$Status table(class) X <- banknote[,-1] head(X) mod3 <- MclustDA(X, class) summary(mod3) plot(mod3, what = "scatterplot") plot(mod3, what = "classification") ``` ## Cross-validation error ```{r} cv <- cvMclustDA(mod2, nfold = 10) str(cv) unlist(cv[3:6]) cv <- cvMclustDA(mod3, nfold = 10) str(cv) unlist(cv[3:6]) ``` # Density estimation ## Univariate ```{r} data(acidity) mod4 <- densityMclust(acidity) summary(mod4) plot(mod4, what = "BIC") plot(mod4, what = "density", data = acidity, breaks = 15) plot(mod4, what = "diagnostic", type = "cdf") plot(mod4, what = "diagnostic", type = "qq") ``` ## Multivariate ```{r} data(faithful) mod5 <- densityMclust(faithful) summary(mod5) plot(mod5, what = "BIC") plot(mod5, what = "density", type = "hdr", data = faithful, points.cex = 0.5) plot(mod5, what = "density", type = "persp") ``` # Bootstrap inference ```{r} boot1 <- MclustBootstrap(mod1, nboot = 999, type = "bs") summary(boot1, what = "se") summary(boot1, what = "ci") ``` ```{r, echo=-1, fig.width=6, fig.height=7} par(mfrow=c(4,3)) plot(boot1, what = "pro") plot(boot1, what = "mean") ``` ```{r} boot4 <- MclustBootstrap(mod4, nboot = 999, type = "bs") summary(boot4, what = "se") summary(boot4, what = "ci") ``` ```{r, echo=-1} par(mfrow=c(2,2)) plot(boot4, what = "pro") plot(boot4, what = "mean") ``` # Dimension reduction ## Clustering ```{r} mod1dr <- MclustDR(mod1) summary(mod1dr) plot(mod1dr, what = "pairs") plot(mod1dr, what = "boundaries", ngrid = 200) mod1dr <- MclustDR(mod1, lambda = 1) summary(mod1dr) plot(mod1dr, what = "scatterplot") plot(mod1dr, what = "boundaries", ngrid = 200) ``` ## Classification ```{r} mod2dr <- MclustDR(mod2) summary(mod2dr) plot(mod2dr, what = "scatterplot") plot(mod2dr, what = "boundaries", ngrid = 200) mod3dr <- MclustDR(mod3) summary(mod3dr) plot(mod3dr, what = "scatterplot") plot(mod3dr, what = "boundaries", ngrid = 200) ```
# Using colorblind-friendly palettes Most of the graphs produced by **mclust** use colors that by default are defined in the following options: ```{r} mclust.options("bicPlotColors") mclust.options("classPlotColors") ``` The first option controls colors used for plotting BIC, ICL, etc. curves, whereas the second option is used to assign colors for indicating clusters or classes when plotting data. Starting with R version 4.0, the function \code{palette.colors()} can be used for retrieving colors from some pre-defined palettes. For instance ```{r, eval=FALSE} palette.colors(palette = "Okabe-Ito") ``` returns a color-blind-friendly palette for individuals suffering from protanopia or deuteranopia, the two most common forms of inherited color blindness. For earlier versions of R such palette can be defined as: ```{r} cbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999") ``` and then assigned to the **mclust** options as follows: ```{r} bicPlotColors <- mclust.options("bicPlotColors") bicPlotColors[1:14] <- c(cbPalette, cbPalette[1:5]) mclust.options("bicPlotColors" = bicPlotColors) mclust.options("classPlotColors" = cbPalette[-1]) clPairs(iris[,-5], iris$Species) mod <- Mclust(iris[,-5]) plot(mod, what = "BIC") plot(mod, what = "classification") ``` If needed, users can easily define their own palettes following the same procedure outlined above.

# References Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, *The R Journal*, 8/1, pp. 205-233. https://journal.r-project.org/archive/2016/RJ-2016-021/RJ-2016-021.pdf Fraley C. and Raftery A. E. (2002) Model-based clustering, discriminant analysis and density estimation, *Journal of the American Statistical Association*, 97/458, pp. 611-631. Fraley C., Raftery A. E., Murphy T. B. and Scrucca L. (2012) mclust Version 4 for R: Normal Mixture Modeling for Model-Based Clustering, Classification, and Density Estimation. *Technical Report* No. 597, Department of Statistics, University of Washington. ---- ```{r} sessionInfo() ```mclust/inst/doc/mclust.R0000644000175000017500000001353214157117040015073 0ustar nileshnilesh## ----setup, include=FALSE----------------------------------------------------- library(knitr) opts_chunk$set(fig.align = "center", out.width = "90%", fig.width = 6, fig.height = 5, dev.args = list(pointsize=10), par = TRUE, # needed for setting hook collapse = TRUE, # collapse input & output code in chunks warning = FALSE) knit_hooks$set(par = function(before, options, envir) { if(before && options$fig.show != "none") par(family = "sans", mar=c(4.1,4.1,1.1,1.1), mgp=c(3,1,0), tcl=-0.5) }) set.seed(1) # for exact reproducibility ## ---- message = FALSE, echo=-2------------------------------------------------ library(mclust) cat(mclust:::mclustStartupMessage(), sep="") ## ----------------------------------------------------------------------------- data(diabetes) class <- diabetes$class table(class) X <- diabetes[,-1] head(X) clPairs(X, class) BIC <- mclustBIC(X) plot(BIC) summary(BIC) mod1 <- Mclust(X, x = BIC) summary(mod1, parameters = TRUE) plot(mod1, what = "classification") table(class, mod1$classification) plot(mod1, what = "uncertainty") ICL <- mclustICL(X) summary(ICL) plot(ICL) LRT <- mclustBootstrapLRT(X, modelName = "VVV") LRT ## ----------------------------------------------------------------------------- (hc1 <- hc(X, modelName = "VVV", use = "SVD")) BIC1 <- mclustBIC(X, initialization = list(hcPairs = hc1)) # default summary(BIC1) (hc2 <- hc(X, modelName = "VVV", use = "VARS")) BIC2 <- mclustBIC(X, initialization = list(hcPairs = hc2)) summary(BIC2) (hc3 <- hc(X, modelName = "EEE", use = "SVD")) BIC3 <- mclustBIC(X, initialization = list(hcPairs = hc3)) summary(BIC3) ## ----------------------------------------------------------------------------- BIC <- mclustBICupdate(BIC1, BIC2, BIC3) summary(BIC) plot(BIC) ## ---- echo=-1----------------------------------------------------------------- set.seed(20181116) data(galaxies, package = "MASS") galaxies <- galaxies / 1000 BIC <- NULL for(j in 1:20) { rBIC <- mclustBIC(galaxies, verbose = FALSE, initialization = list(hcPairs = hcRandomPairs(galaxies))) BIC <- mclustBICupdate(BIC, rBIC) } summary(BIC) plot(BIC) mod <- Mclust(galaxies, x = BIC) summary(mod) ## ----------------------------------------------------------------------------- data(iris) class <- iris$Species table(class) X <- iris[,1:4] head(X) mod2 <- MclustDA(X, class, modelType = "EDDA") summary(mod2) plot(mod2, what = "scatterplot") plot(mod2, what = "classification") ## ----------------------------------------------------------------------------- data(banknote) class <- banknote$Status table(class) X <- banknote[,-1] head(X) mod3 <- MclustDA(X, class) summary(mod3) plot(mod3, what = "scatterplot") plot(mod3, what = "classification") ## ----------------------------------------------------------------------------- cv <- cvMclustDA(mod2, nfold = 10) str(cv) unlist(cv[3:6]) cv <- cvMclustDA(mod3, nfold = 10) str(cv) unlist(cv[3:6]) ## ----------------------------------------------------------------------------- data(acidity) mod4 <- densityMclust(acidity) summary(mod4) plot(mod4, what = "BIC") plot(mod4, what = "density", data = acidity, breaks = 15) plot(mod4, what = "diagnostic", type = "cdf") plot(mod4, what = "diagnostic", type = "qq") ## ----------------------------------------------------------------------------- data(faithful) mod5 <- densityMclust(faithful) summary(mod5) plot(mod5, what = "BIC") plot(mod5, what = "density", type = "hdr", data = faithful, points.cex = 0.5) plot(mod5, what = "density", type = "persp") ## ----------------------------------------------------------------------------- boot1 <- MclustBootstrap(mod1, nboot = 999, type = "bs") summary(boot1, what = "se") summary(boot1, what = "ci") ## ---- echo=-1, fig.width=6, fig.height=7-------------------------------------- par(mfrow=c(4,3)) plot(boot1, what = "pro") plot(boot1, what = "mean") ## ----------------------------------------------------------------------------- boot4 <- MclustBootstrap(mod4, nboot = 999, type = "bs") summary(boot4, what = "se") summary(boot4, what = "ci") ## ---- echo=-1----------------------------------------------------------------- par(mfrow=c(2,2)) plot(boot4, what = "pro") plot(boot4, what = "mean") ## ----------------------------------------------------------------------------- mod1dr <- MclustDR(mod1) summary(mod1dr) plot(mod1dr, what = "pairs") plot(mod1dr, what = "boundaries", ngrid = 200) mod1dr <- MclustDR(mod1, lambda = 1) summary(mod1dr) plot(mod1dr, what = "scatterplot") plot(mod1dr, what = "boundaries", ngrid = 200) ## ----------------------------------------------------------------------------- mod2dr <- MclustDR(mod2) summary(mod2dr) plot(mod2dr, what = "scatterplot") plot(mod2dr, what = "boundaries", ngrid = 200) mod3dr <- MclustDR(mod3) summary(mod3dr) plot(mod3dr, what = "scatterplot") plot(mod3dr, what = "boundaries", ngrid = 200) ## ----------------------------------------------------------------------------- mclust.options("bicPlotColors") mclust.options("classPlotColors") ## ---- eval=FALSE-------------------------------------------------------------- # palette.colors(palette = "Okabe-Ito") ## ----------------------------------------------------------------------------- cbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999") ## ----------------------------------------------------------------------------- bicPlotColors <- mclust.options("bicPlotColors") bicPlotColors[1:14] <- c(cbPalette, cbPalette[1:5]) mclust.options("bicPlotColors" = bicPlotColors) mclust.options("classPlotColors" = cbPalette[-1]) clPairs(iris[,-5], iris$Species) mod <- Mclust(iris[,-5]) plot(mod, what = "BIC") plot(mod, what = "classification") ## ----------------------------------------------------------------------------- sessionInfo() mclust/data/0000755000175000017500000000000014157117042012626 5ustar nileshnileshmclust/data/wreath.rda0000644000175000017500000003623714157117042014623 0ustar nileshnilesh]i8_myy`*UQI2&hH)(I27L 4*B"!!9y﵎cžuy{JyJ  !( " ""Kro($8o;&TYa4LRB ƥ5Y@r z^Xlhs3~Jעs a..҄ k,m,7o/Fxhe ߿ՠwi]]62&\d2!%Klۅ"*[ji@\[{;aHц_+Paa^#xJ]F(Dx=R>o|zf1Ps6o>,˩{}Prgv yT|:#篢ַ]@dCMT}^mݱڧQQ36Y&&bP3dVn?~u?PMGr7] i zS"-I;Y\Eo'AEVԩ׿c;v`yEm,W ϑ#RID_aQ9׉&F53>Q(*=qCv¹dA`lߩ?'܀O29 |'ms hc!!?*P:XWn4xzsK>s<`9f. IvWsu^K$'17l5†ο*=8IˉJ7m@jɲE>#Ó;BDϺ4YA<BМA֩'̀poت U̅a^s*MŴ֚Y0n2g$Zfkoj 5vZ+SF ouG斳6+s _|iR#)[ P[=&#j۵^@7q;@Cl_W23-ׁOO>tT.{9ݗK6!Ψo~. b΢-~WrfCJvEr 赗uk; bQ@吚J4LDGYy}),z oje\FgLȮpR%'h*֔_kȝOX9炂"B1S FCɑߥQcb-guIRأү@pçl?֩C(,Rg;K`gP=$.lvCŻKo>MrvB:,z:I  `#*lv'sc}]~QM- y +U}aڒUur[[-^CUϽ|yk;{98DCEڛhy&_j}iX! Qm _WO\=Ѓ%!rfw<װ&Ok=A}4rNU[n5ɝ_R0"EqQՖ~OvKLT}99yT(Euv PUxa8B;eiol(Z_'""K9\q/.8i{=Ȏuy$d_"N +!7QOu(v /R=n UJbSU@ydgpO;ҧ~h0k#U^ͫ޸H`dsErV:\ID| t)'Ebk~'=Q&};U6%z‚7TJz[ǦQR*LէᲯqvf2x>*AwhN?QC8*%+-^EE(W}4w'&{Qj9j˜9&A.F$Wrkx\XȄh5APF@'$K< g(h(U}eNT$^P:+1* &vQ_Q)kS5 @rl,~%%Ȩ_F&[Lb (&I@}8n{.yЯ{!P-jţ!s0 u qR8!C`BջWJBƮM=m/6 O Lnxs,]0|bK e2F*o?̓IsoWZUlA%4~P S7-r-@!uݙmι@-)LƅσPG(/ov#<.'B=9*6PsSQ´4x$G6qPϿު2d))o&2op +ֺk wJIV8{($>|o1Ѩ$aH ncQP@k~(b@Vg~OMU.GK2 (WgȀImmKXK=&Bʶ͖ZěrJZ̍n0b&y]$ԤFoGMѩQ4>n0PHm3)SKaE "Lh{sm6P)'Mޡ'XLcŬ+3-~? rZt4; MhuIO8s6(^2*`~^]69y7:iw|[ShCzYǾؾ:MsAVݹ{wYC~ EvQ9QFUL2z ~ݔ'Eٗo5\ё=֖?V.JrYn*+zǣHk#NhЛ?~J;42,/3H>`jnGQ2uNy6/k!'ʒǏh㴪w$zjR OWE߅H(*34"Roрf9eRD9~wt7Z(:^ S$ 0I[T]&sƾK{4Ħ6Ndwe(hۦ.}%iXM4צf %\ (Y3(1D>ܳ[zJ/+B냴KFh|$B**JO,.S\A!o&o ~t*تkGP9X[v0ɜ*M\! $v=/ZGk <;tP4J?^Tҟ(7lP@r/|E7.^̦\[JjQ-a,/A'PR軣AFE+x\P׏(.n%.a!$[%-3QGu W~xa =(xW%͹ $oCL/g307PdDbN0ϣX7>qM._K9Ť>POQ2G m)] #:둑erG?7Z/GZW vu/HkIx?5e^ym+R- =A3"K˪)Z:#$ o >F:X\B\;EaBQ&y Kd@T@37VݕoIYk:%;3R"ڀ:`zE⽪Qp{fJ2fڀb?0s Yo)MݸC;oNkp?cX.O|ejA4-}uD#'"'t?Ԡ;x ?] TYǒYD'vI7-` Ŏc|}:Э#peDg㊇ hOEP[Y3:s@mHCw&@ZVAehl#?z7u$g*m;A=Pq_{J y~EӢK|PCDq+Zti!uϙ_k[e A:2jВ.LΧ %U!ppg.H6=U ubx*_}>HyNnJvP]ɣ,dED׏fQyn;TrO7O;?a d\i ‡@9[bɜ(27=P54yGE߇:,BLf}Ȱ/Gz˕쌁NSYnx)plUm:*Z]-Up1߃%r.(:kR4"Sz@DVA9mtLP`ݗBU[Kď';ԠUh$?.)˙VF!KѨ;"/) mi9VAI $6; 'MлȺت=1:'d%I:0GW7dih D^BC\w DB:Ĺ~χS֯9ӉVuiɿZKWޱ\@ROix>5 @y(7d `Gaq2H0*{EVoZv@ZTl[ RkFNsM.47/=qk8 kg^#Ʀ^+Go*/@IuyYg,"]I~wJi^=:#idw1ur&(Vj.@-HίtjH-'.2-TP6_dϔnFo:3@}UP-sXpW/8Sj]ڍrC󑘋@^"1P^ 6AUyTr3y+P]-̑qf_,7ԂϟyCP`Xvek(*X^A֡3Gn`]POl%Oк@۱-w}9HjX6FB٢ZJ\!^(GT{:vnYRSqo}!  jk*U/6BW9:@,ZdKҁU2+  ^rK`/( k>S+nxŘ&ʽthFǗ)Jp\ ԉ Bݫ{/"˶iǧ/ܿ]oilhAAQuv_ e#wANgPW˶(U`5v?uC~(h7/ISQB ]I^[XYO*͛@\Vwh94HsQtzO zE1^B-qO-OzLb;S{ArwDo4U=}@kй~AAį[qLf쩃7A^-\&]F=sKp NȣqC.Kh&u{ ̸Ui,GxqۮRrRȤxǝA.TssBPފ+DZh\]dIᮀ}bʼo+"ji y?QzߣxKO@Nfk&m lEwl`"]P2YԼuNjIj%(guCyWH_SDBι0Iܭ]n0 -Jզ@z -D~Ԩ.h8y?FUvc%}'A]p,Om| nI&5P'"ƌB2^U9{W©ҝ@͡'u*;}!JkW#oR^:oR_F Es ~|9JSQ1SuZkFPGWа'1~7 1uljh@uGlAw@S3A %GRmOs{bvhD{ϴx4%|^W:Q+@֫9xb1W=nl[Iǫ@Ghj@9S_N\#FMrgEbFǫn!P Wnr MWJuAijgXi*ŴV-F;q0<~ #(QO%gOڃ&_* C`O$ڔsKgh dIu?(7L?9J;v>}K?@0 yb3h?wU.yS{SPz2)9Z@}n2קtP?zPg"f$?^>ps`V3ݷoR5Tj`7)'~3q!ǧ{y[>+ iRCb^\q lrZஏX)@BJ'_ͽh]7_o#]1HgёMsTV0=dZ*\*mrp=ƾݯQׄ P>Tg5hlm4-mqARqjOGP7}UW eמAaoа\*~1>-~*+3\"4揺Nͻ|`+yn5wVj^_*CZ薢kR·-t0]*=s< F.Y McH& FsWA$}5sYrs^Q{0vo/ͯGXo4gmzRj%>(4Ћ:#qrR*^ނzf>PzO?X}^RQf9o{f˛3x0}\MR|N/%޾9KbF|BBnB#/3_A*QwDj ..}rE9KNh7 ;1La ʬ/d;] Jb/IǷ EOfVhw{ Z_tЀ~y2>4}Kus*xPMsެezho+P,63$;ӯ5WK@j{nQGMa2N~{1 ٣} 輘MG[FN΃ȉ-ۀG9(8{ eBSFo䣲צVoT95e1t]bZ n s@3k{zg Wo} Oke]Ԇ|5,n2j7J0p(|x nv׸RɽvsߥHp {% 5)2ʔ`g [pȾv^.?n}n93*hxGNsGios$V "mգ/4zlC޸+Am?՛Թ:,#!/M^P[U!gF 2 1x _W* qVQAĉ}-ME9Z#Ǐ;]2s?)yɨ01}z!Wp2KMrw+g󆺸}A㫚67j^8X)oWDzՐs[\k7ʻ|n>cNK``Z[dׄϠ',ԜTr[Q~A8ؠU|~YF68еKU{pLh{js_wܼ,@vuӨij(Ϟ<_3 lap4F^l1 :<_޴٠]<ӟm2mO +,_iNv,E,G>6G)mMG}MAi˽W+Zv؝ORI: ݯ'>y/)Z, HL* oE{/p*t^-BUty{P554˳!s ҺD-f0j*M Uq9" t()d`u Mܜ"f]CzZI?VEz֊wy8ز~'P~oɘEpe+QN5eo 7fĥyѠȟ@9Cُޜ~dcY'Fg&0>9&?!I`i "VA&r]|״} Q$*<덄#a=p+pFJ[@q F4YJ6sbyצFD`_ɏ:^*/BBKw7֎{5噌 5IжW<|1-o~-" dDNnj`쫬\ 0ی~t2DH"o"qI~&gQgۺ-Jm@lCWwп%;QWr(nHYE>Ԉ̒ Yw V%Hm/]xgV݄1T/or; X#.ǐK0]4Ԕ;j堭v~G% _lkEwɣ;cW#>ðmޟ^A__ _i,Ua:R.gtr#9*q'vlٿksħ\`n=П}ŏ@Ӣa >78QpL);X`p|ˈ&e=X:074~[ZO#/5c.i3@K&H]3ـb<[ L,?8u9^u鹔9%qWP% 8HehQhޞy-@'r~ihcP~yhKWt P\@H3^-ioҮZVePJZyZ!JAQ=}t34E^-翖9d<8;:qo /HydB^a`,8R̝7qUq[D dKx.EzXԢnaPQս6 OY޼ oGPTF4^u:MvX"Ndjj  6fGlQ/?5%#U,U`>LI."#"]o#z|X=P -+Sm ;D&]][ dy-!zeBHYF۱H)Y$4Y^#Hi\)ԓ@y[P`7|&>^5=5gN wT̤d4k#r;}$MTv{! jԙ7}A|a|f7"yV}=* 4Ң #r%mQRr:̦;؀~Vu>峾^EDK+xV, F`p'IwN#cR]:*goZgYN XJZiJtC K'FCA@8(~2}>*O[ `T/\4Nm@pad* mlo>[ p5"R]QS,RvIyR&'eLUP)1 G9@)nGH EۡH^gg ,>*= Q3]GZ:6А8}ٸ5we0'DQ{ӂ`8-$loǕG{^5ssZ*lq<>I5Ep0uio4Y8tv7{<m9E7rPjr_0>~z3ɰSY FWzn od?[$[BB.0)#t?|?mclust/data/thyroid.rda0000644000175000017500000000377114157117042015010 0ustar nileshnileshZMOW~a:ꀊBQDjx0 1VQ*&( 6o+CWIrѸ#1!B0 <ǜgM7Mڅ$Ù{~d/[ ux}_]K_.?}#fc,ѿ||S|lyvn-oU7~O5,7VCidvA4<-RI~{b>:,p:ЇhO=)t*~I҂o/t&O ۠&?vS=qSƅroFg}LLJ}{B␑2^DBh 4?x}WMό~ۛ{|LJɰ6^wiݴQ1%pzt=bo|O ~&G?xed]MH~M^AʹS?V7$n#-A^yR7 6mwPi x9ԡUe?_wϽu*7#lǗr(8ʺH ~=;g8z 'AO< {1NЯ`O_=M=l,wmIhwFdi_?RYPꡟwK>ƭ_-vMQ=%$v[ZA?x`>S~IS #!0ax`6/U;0~Ї~q:ijm)lB)gZk/vY}Q̯[+~0UmM}Y{GD?'E_6[^yGW\v~,qWSwZΑ5|A5ljY|EyW_U<$k(;X1􈯟IN$x|s&97$Jܸd=׭G,.aO\pǫA*|'~%ߜV<{-m_[2o[e_37ks=~Su)sMꖬgkhC;߉ňzU8R/qPyXW Նߐ:[ȟڣIȾ+田Qa_CiTإuΨCߦM)C _VoYvf#ƫyr]+=zo\m/ޢyXB>}&T97:swBU s$0{o؆;V`a[ F[O{.8r?>w>mɷ?^!:k@є[ݏocP,oj"E1O翷1W8no=1_x_C.7w7ևPۼGd]>5w} G?wz(n=l}E=|b߆-6~&YGg$-s/rOJ*Ō_iJwceݿ&mclust/data/chevron.rda0000644000175000017500000003733614157117042014776 0ustar nileshnilesheXT1@10Dٽb``;;Pncح|fy\s]uޞ-c23337(goyé?,, Jm;ߧg3 6ܢVn1,߾[_Y$co-e =zv~Vmmc8eTC& qϞff>]k8FW(5Cu- ǘK4snH5#[0 2s- !ycW?e8JEC{4w.ps}45_cTP=ty Xj{{(eV7Qm57-+cc#kNl:7+9a8j+RpL~#(ϛ΃UӼ:5Bᆨ}QiI y-a8lv ;^1}ϳooו[`][lBr rExu:T BcunG?}|I}vnBwa๥Ia,~[ G1G,>^QoJ6E1cQ)V&z/0oE\ZYVRv.tg(yy,^ F; ?W4= In̦zo`9rQZ A߶dGeQoeU{:)YeBW^nzhw得_ {Tn3hs`/\kf<$X_C?F}nQsc1ih_m<|ӝۚhFb1G?iw(oԐ[`IpZ 32rpg#}c- j1}D 1|7Ͽtu; NWtPVz9xUL.qۈhy7u8'#,\}n\?<pwc/x~gw_Vzrmq+/vkc11/4rAW(/i[)r '۵k;pWǵg-S]rw8z2z֩ ϋ/q}3w*Mp.W>r\ߠp9Cbi7!#jP:w }ql=Uw*_?&yڽBC).nBLKp Pۆ@^[wu _p;TJă_!okw684Ɓ uĐfq{c}q_7(^/;`? mg[`'moLh8&~4dA󏌅}'oNY((LqB]ND} F'DVz5gw 0\^kP8gz{ޯ A3gxp")|‹gaEt1Iu~IOVi aX"~Hcޱ:ڹ\(௽Q.[!;mBčri~}V8\4<*/TC"⾨oC-]Ӆs~{FKKYHٓ 'b&GF6p(馟w vQ,mg}w{2y39c{E3 kN~%}/õq*U⑮m!7@V8K_~i Q]~Jh~r徆c>^ܞ҆!~;? } :MB"UhGC WY^7vElf0s*Ak/˟އX{w5ep1iҎ?r|yC^/  Ƶswp=BX|sõc,%s*uTķЎ#F{y8N1spr3Y~3J8fxLYmVXJ{u'mYr g.:?|Q,c RZg2wӛ'@ܢIxhq]~̞ =<_va։*祏v ~Ϧloh8V>*}_u@f1~50ު5κw޳྘/8, >_iqK}ǚ|r[ {M=~Mqמ:~2YuF-5Wy7e2kAw=ῢ A{zT+9Y׶sseG{9x,iKܠm[+@}7?(=g6ٽ qOܡA/=<{Vehga^픘9Ah*-D=?-[O#aF}̺_~#-vbW ~(l(ăZ V?TΨO#J1O[rCAw1)ہw591~u^>q'0_Jk:+#%]_'}9uąyO٨]Aʸb7|qtU&/iCuFvkx.-!K1GdyI₊aӏx`{vFA_y=\=_l҆]gvG,D9bF+iGɭ }8˫x秿} @΍uG|h?o3rt6砗+l x4 i+n3*xM( cx8t+Ebyg5õ"-Pw~0G}@#Ӝ'C7}+hӞ| M2}&}Kʝmn9<yj ®kZrJ>̺ ȱSj*=C?ثFx5'Ґ |/qkfc|xqnk?E%B{z71O,}/ R7hDAr#yKɨcgOvG2nE4}O.盈;- ?V[kܧ[Ԓ8jݺ՟>xg#~|"Mx6☄]# Aü7Ρ t헑ԟ0 zǯ;ɾ]Я:GEߣNϐ|//K~ M>Of#?vSa0?7x+l/n1g\8r{^_ffg`^A;{+GOS]i7-VN\ N{NEk}y/Sf&gA{or^^̄b;e8=%yhCʃGd>K;- \l]{_3AxkO0s Y {x㰎/YLw?p-MZ` 1LS8>tD.U7Ċϗ} :)? 1?i#~8RnXڇFtBm=yŷ7+x![نAJ!ao_Vm n[`K T:Ggק .7VuaQ/++'J":lg?X UĐjOsuuX仓6X1\pElVyhnwgEPē͂PQw|E?+D4ibW8Z!vXzdgg6'[48. guA K홻(r¹?Sk]ܳD.ss㽆jz,H溸Y;kvA{x *pDL7ƔB~@541޺/G i5%PqB3iUnJ>qha_# ߻Vh4/S*W9gȶ hwˤ{E+!TZ;s 31 $:;3-kaˢP}{|S@<[>a1!aQc ] <;u;]rLqpeĭ!Zs"Uo 3ֳy雺wǸiZ)ӐEG?#>pZ|=xn)w)O'|uE~ΝGQ9"pۢ0 yz.&<4գK_?p7f)e߾&K_(1s (pOߥZנɇN(Ey!>Vu 2䵲b+Ir@TPGhGSWOĜgW-jDi2tWWX@OiX+3;#|K`~ٽH냖w#1[ {䒎qgTP82{]fql|G^ u)אUm^yA1jkXQ{k_J:r/8-[ M ͑x0J\1*pye&qK4 Rjk[A?Wmԡ;v#w Y'. )esV̎P9J޲ݑQ/;0.''Vxǟ~WiW4 yK,ˁF^ 8H'ugh(I*F~!a BYc"5FV_6{S߃]xx2^]ǷzJ2v-}^8ͷR%?҉=*-.؇fȨ戓j`|Kؖhe;|i9z.; 'm?i8]\ag¨}G!s{5_l\<_C/vӈKJ uwN SygS-DQ NA%=*uz 1S~V~ jY0^2n0WcG^/4gNԌ^eS =A>:14jwyEG{?yqr5"_ \O]L{`/)B.Ƽ]pt}2O}nƛu'ka6լwBy 2?1V?DZx9\HS3B狧*u!ǿx3$s3YGa'AӼ(x? HzEey^l£LCv^mf[fJJ|&/W gVc:"aMZ,r/c+f:]]:I3~69_>Z\48^dږGYoؚ϶]2Ӕx__9Kz?xޟQJK/"~ B\]~wC-\{~~!$"?qӣ{=?1a dQ _}s 0 JM6o 0;}ڸ6X7~dz6s͓Uxbně>LMW_ 7w~s_WCJMA\_D?1wf[mgOqЭ{UJaaBv6߇MKУCJN2ӷ7vfв>Q\p>-+ߞxӫZ9ߪ:F'س{g K|'t Tg9gUyMG萷݈ypg`!C|m Gzwyf;9z|u_eg[϶b=QUˤ g| y<8Ԩ-!7mlwfG͹*8ty]oS82 Q-'?&w|p}qi/nE\=\ii:ֻ͋a-gPϐшV7x7d4~v[`OzE<W(ab{D׿?A7]t8wPJ-]wۧ~a<^sUXwZ9GtFcWb<6O7関[@w߰] "'FMoj%98(m/AWk9܄]=iM2s Z~0ڥA{w-l`o\}JhaX op8rS5]qOxfתv^5WQ!";߱:\~MW+9H[~`uWsm0 "sZN=vV!,2a9vqд7A9\reSvcԏkq`nwhV&}jp*kx+ƑϬfׇ]W(*~Ȗ+:];L8U}| vߠN>}@g۟}lW,مCwgO>E>J5+ E1|j*%od|gҳ%'SWg'c=b]4aͲ%h3 v[{g7:է_ȋj]ahNzX7w߉x)׊nn'E9V|1Jcc}2|oe-~̩޾|}dvyu>O UXAot#}a6^wgag]knLuqwtxB!ެ<œ\?+{e==vyck)=Pۦ9-`ߋQ_J>d@Wb>I"mA)K}u0u jMwsoP֋f`]Rr>֭.c=${^WWLF,ty4Wƺqܵzeo܆uo'-Юn{{>wJ-cdN++2{yT'Cע jWcvV|'q_ƒ|ѦeIs}eOn6ޏH?*>]50h:9RK}iXa]R77eNzqI%ӐQ Y9ߓpyy]#^܀p{]z1=kCwy$~vȼiacgܹ:؃}S' @~Ii~7#a԰8 Џ~鹷1Jb/E^=6tde ȳlί ;Y*Mü2#[ /"owh)/ۈGswsvZiPIS?v--Wufx'[_ vň1Yj#c}}\iWk`OR:\y\#>voW?sJt=\m|$3a=;9tOʾq%My[m#~J_veݺ}e pgbuÔ.#ѿajt.:>Lj {Wjs(eMXTyχq Fk`#.n-_Gg*㠭\5k`ЭY/e}SY@0\SivD{;K07M>a>r:/;r.AZr2r$|;$Ml!Ho[[~?oeя#PS!_).>ΆءN\݃}Q J_buӬw7 }1E>%b~X7}:eVM!))ImSƭ/Z9;XYXnSWwij1:},6G<ꁚ_B_6UxQ=oizx͠n_!^^))ʪLF7T)g* C=G<95߫_4ypjb'aSXjuPϣօ6 pnr#C6:.~;eۘwֶnMK-/M|u_9~yl(%oF\s!ʕuJ}j )`[xugS#Z+qeWNsٯ2^zy*C#^OеL )dŎ&FNƾQՆ_=$揙٪ǽ_S'ǟTnUY:k .nnq 3^u!NW'M T絕cq>`w' {݊1m\ء5 wicO8aߋw%&cCwCY|Su\'Vc6"᷶{~1k]Þ_zN7LS#\%I|,֣-d>_cF5C~ֳ(oW8yq>c-WւϧN#ѮkC^c?u#p(wo8:;nW6ü"]-yb9ߑ9A''YLV{>ސݒVhAb71Ye =beסkFSs*%¼'{-eؕ>aԂԋ|QT[275/長ap$&Ow u/ßȭEy5\>c]FkǞm;qAMU F~7=;7f}!5>MRZhg{_,V,zor <5sbrf'R>¾mܗSd(*y;b5`ao8^"뻡7op>hecuG?úR]7iKۜIfJ\ay0 uz3aҨNMW>=5~̈༓uצ_;¿ţ`6\ xuvw3 7G\Rm]>{Eo_}a[@mw"c̈́{aikZIǕU}W<= KK}u$;(þ}X} '(D׼ 'ٹƛݢOXug⒵?뾆y+bc/D =}'M6aKݰ_ɀ:W(ڵ^0[ ?Kn1gvj{( vl)ɻfأ~urTYy v^xRrTy9[ oڃ[3f]Q/.cz*y5=+>ƣ_o͊UJ[bD<P[/֬mT>8I}JySw{۬Oeޓ >'W}z^+9vT\YG <',ǼH= (vo0qHSS{\~#^D0μ7@}MGj?c܄8a<ϮzzBw;M)xI_ϴ?#{TE Z9-ީDDDDsLe>n ͻ77Fd{?ON?{pﷆ2ޣ_;_|g}rg=Ͻ;ś=:|縷?7_|4s7| ? ?=s2?R>>>okٿa/`Oχßy6Ӄgko}9ig:}6peXs.~Nݎ`94|{/lAs?}g~1?~nmʸ׭W::R+ǯ꿭9®;87|q_ٿ|9|9{n<ϹߧצnZ. lsu?~޻bWú Lz\_\7vm p_~ +⁼a}w?~k'+~ t{O?OM=ss؟ӱ.y0q^ێ5l{/^}7yuuEǝ/wΨ󽕎.scxQa {y˰[ޟ1ΰngFy?d>?7M{̼۳}n+xz|_{:^_tp{v||xv=z qr'M|{p <9<ߥy_'t|lXq:Srן?u~aG~˛j8C=>?\u g?x8l%sOx}ϵ1xqC{וzmk'r'zV5=[{'^:T\t{>|aK}ݭ7=G+}G<?ךּn'+#xpOG=?w9ޛ}?o%_/:Vx '/X?/Vw;x?QktǻoQ{w;!}^z|NĽ^{gk>xNzë<8ߚ}ێ:PgZLG h'=oznIp|y-|z|'ˬ۽6Cb_<|QckQ;~U?,ǎቫljS78sKv;/<!koWZpۃzΤ/GoWeυ_z\ԑ~K>犇⮸dX {==9n{d#GK9{l (#yIތzu=uB])N[wv2gGAثzol?x%wy84q>a{g۾x^oT/&=.=Gx$<|X_O{^_׸@n"_Nn]gzӕ߷']y/ПEoFO{u>Ǘ~aE2|؟WGwv{/Fa䗁[}4z Y<ǽ7z\ٯa;Ki8O?peSڨy}H?y:'f_ӯ~اg Q8<u,t?uw={?z=7Rw?x9F={ω'n0|mGu.{)g<-cte}nG]0?~n4x?wF}pӋ_3\:7_F]O'ᯏg],7  ލ=ËOv/ͨ;肯[ԇ=nMw8A8f;Pt)&yQu4[+<19~6?*܋J<OէoOϗc=Vhz~s5WuVמZO/QG+u8x3]|X{t>}O:o$NַR}yGv5zyp##mFpޭu?RЃ_Qz{uhؤst_x nէ\˄Wn$|C/^[zݜv>E]'N Ի=z[7QzyC&]1^+T໥x=NWDuzfoSe,8=<(!G}G/'򟇁_6auƝ~k ~Apng^<#l>J=nO#,z<:D^'Ɓ{:ng^_qߦoKɬa^L?|3yKQ{Aungg)T~ILq@=>g#sk/T7LwƟa >#~y }tໟN'(8;KēWtEq_G|??܏x|?lE}<|D/EOzJ_ _(z/o+]p~.uAÃнi$~?|#簏~eޛf:8EZt-,t#r,}~xAW˿dzU%ǞC=d?/:n8yayϨ? k+'|u:0+<{,8ސwk/=g~С DR:`: ˷7BPW^w*p=>#vW:ܼ?5|L/gN&y *pW'z] /9q !{ ~n?ygӨanO;Hߌނ'k(=_8/u~) G'oy뤾K^gdxskA=q_'y@Џy9X*5%?==P#Yn <$~fm;Gޫ)]wgͪ[K+֙?U~ rS. ĹQ^ |2QW]s ȹ8>xMux`}ZṪ&s/u>F?5_iם ;Vr7{: Ov~:xS6kKWÿ^{QQ<=NOKgk{{mOfN{A y_9qӞ _o:?\}އ]Ǿ M^3vo}ްO;l~ p+,O;$mGw f;g+' ?nS<>~+<+VRwm~S;/Yz9pI'5f5g+t zQ8Wo?8uy_͑ >s< \:gjވN&?y{QCA᛽^7rqx8zb_xΟ_)z,tzQǣ8/HGb;6TIQϟН$:jy:ECzQTxz0ֹ0:\5?W9f۩y#h\ͳ}VO|~Pq y~})~8u{ Q|/Łǁߋ:}ȇ;+F93~qo5ux:'8Gu?zs>ߦ^n:$h-5^ۨxNzr- ǟ++5wwy_c4yr's puܟMz=>xvįQ/G|o]tL}2s WQxvHy<EΫ8 78 k;aEa|v9-{yv}VaSy~~3Z^o}ctѿ_|ޤUv|*k_2*>e3#pWB pz/X'<#zܕ)w,Xnwx߷η꿀 ]n{/u~p:|vot;w5ay3?NQZZF~K|O2ouvp#6ܗBw{C}~sςM,nӨP/O C=~?|T震O~?_E|AOG]aνo"^=i? >r/n<ݬ#un藣7>#]7ϋߨ[fDa>uuz>k~RvCÙOz,_Tpλ<\6= ^9 F|zx5{WӍMw.! xzޤEX.E硛D>Iy=僋iϡϹX~sxG\r ~>ogLJݡAܪsp+/;c]@sg5x '~v~cН>KiQJtɭ=/}*U7,W?i~rzFk j5j5VoNzlgxG3:wȯ􉕮x)xvgսTBTWU^R^5h ǿ~]tJfgpją׳gCVoyU5gx7"Np^ąn]"n8:?}> s%} z?yK>3S\? =>֟+a/uIt'yyTx_ ,**[ytt^Go+ \Q +8^9 > ~|[8Нᦺ-ަcy/OEO?{Q+=d'?^ T\3Tq|{i֛V ׹OWwi]Ègz0^eՁZޜ^-_^E3}ѫX+}eO_ |ΥE+>q'[Ȭz ֺռq3SMw>k~#_f~>8߳_S_<]?3OX {}F}}+xwogO\#ny-{δ^%!e}8꘺/ڿs5u}#ԟ_7`]ٗiDZC;hG㮟/W_g8? ~#>^ή9|oz4%?uO~vX+ QOTYϣ~_,!qU7=>dS8,}hquַm^ùC'sKs():ݔ^}:UCU7:\9s]܏C z/tpf.Uw ۫<'~xkD}̿I yMf=S:G|:Gsz˿W>[<e69kWK~꼝_+/O_<1yO}f %⏺7wg{xqn~EStf~G uء;Q*ﳇ \~È-V?ǜގQRxs;p:J?.яž&)}E9 Kz|(u|Vu?ClնC׭F]5 Njφ{4ֳx>|q~yWԽ]g󺃽Uz8F]{\:P|w?KCx ova:i>Wݴ:ԗțZ/5wY\\*υB?q/~I8L)~q oao/e=?wAop 'qG?w2_E—}x\W ?B>no~3·pz;^dߺ|Χպ?>7ux yA"ϚoZ)c>G_z;:|d?cxo/'̅9ΟC|o囍Л@~0Y~[ Qީ{/:oAW]v~͞O~E.^߼[?h@[u u{ץnO)8>uJ^}vn ۧU=|~yysU˽^|?/9pO>z_:KKWN܎zTsA|/Rۮ{B珿0OtiGs3GBw }ܿ{`=1;cǃO Y>TYWg: G쓾=2WÞԟu.~[ϽZ%os[B~Ͻ^VǝO>{^$n#ˉ^pVMy:G'9wu/ͼۏzoSS/ӎ/Ow$l X>Խw_e}pկr,}˺f]y_Y+9mq.{?:2Qu/B/.tdxw4IN:O&~Cu/^ 8(CB+zBzMwN/<~Zu78VË[ݎjaǙYGQw*'(ΗW$RǑU_8BI_|0|bކ|ثϯ.A*o#)> Oiwu~*/88?yMF^v+s3P&WW%?q9J-@g\@/w<~OzX. ?z|xX}3OqI{O.u6 Nrpn7dtF8Bx#ֹiO ,:?]7Ӯֹî/+$g>&3O">5aǃ[k^%gW࠺}9gszxv7^zvYM;iU'?A;䓚c;RB.KfW#2C_/|nQ*F8[eԕmE"椙d|徿pcx怬s@xWAWu|u|ÈgdQ_w;F|sW?A/~?+~K7T=%< 97<9y+Qv2X?u~ta.F:<::<Z[菫y?~/ =Rw#xZ,NG/A?ԍxyI]yV?yw|,껧oztւupBՇggS5:g#_;Ǘz/_xJx}|xtq}3Wws^?'9^^ByT| <9~^H<_?]|,pVH\}:Bxunzs8 U57{v]GgG4S[89|{q/5q#gBTYS?^wCR|{P'Β-eD_jnEף.Kݏ{+qE#Q!^n1OYVSwq""8G*]e7ysޣ:~/8NsJP~qh{:ч,/c<{YVz1˽N)ԼI}cl~Oy{:p]8VRp~Zߋ8*._O{:Fswev8L>_b:7ӳGwg_Ϟu;{>z2]Gf^G<;}zU Wbbp ;:]yIVb1֓rlvߎ:n8|{]Oпw^27^}Wv] /]w q=G /'Ww_'D2WAL<;z'@v /.T%^TO"gL?:_.Vs>w{8z}A=$?CWwC);C0x"?Vx(t>x gω}#EGg/{|{|.f=qצ{Y΁,4:Ӥ뷕Rn'G~|-㉗uΤM醋WO[j;p Vz\pA{ߕ>p=խ΁T}|i{gv[\1x jxQ_[~̹[čC+4tv{|yD(GgEg]kXKǛkocAg<z x#m?з}mu޺G-<*o׽4ͩUNEGՑ^F&.^ }Ծ s@TPx8k~wGͼ\jFs:/!˓Лqوǧ7U#uâKk=x1'>UG}q# ?oCzn?;ӏ0qЗ9֕]åO^ɯ"CoM"nj'oMS=ʫ¾˟C/B?8z<}:~_zZ]V^xߺ{a2qO<+˛ŇO:'OM> R |Z7F>A-섮yyapBmWy|s\/gZ\# sٷA*\<΋7Qg]ݟ^yu*>;FF\/ujAnů{3F ?yAԭ΅ޏ¾sc;_OBGw΋n/Cݛ%C_ >zF(= yr3"O{aFu|#/gqi𱏣>s/_OCFJ=_@uίpR<\C>:v/x$~oΥ8_|W//zc}xc>u{zjţx!pKK>Zy=z5鼩>j+3< D:{z]^| \f|}.?x"9SSou\BX+Ӌ珃?kYq]UͻODcqC/UqΜJ_n>u)6/COWUݰv^*z&<:wz~sNFVey3wUwo\ ~¯؏~o疝I;Uɸe_]ËC*ĜIi(V4x7R/;Jud[}Q}&,ٵsq9O|C!G8~0#]~ó3o _a`k߂Q_./fvɣF u>buʓk 8|1fdi^KQ}uJާ)V;.|i:珿^|ν˝ Q ?OviEW\x|uI <:[KX΁׿}4ׁ^~yx,z^o<R}63_,Wr!#{q-໼'J5M_O&٫7޹57oȃo]{/u׷#c|uK:%.wf;xcyeBpʹuIMػn%v{^gC@#pQ9ĽG[op~=ۉu9}r8Q%y,tw]@sm8IC<>f>k_ڎ8_K3W8JoĺtÞ8^-a'L{ѫE#>O;<_̜Ǟd|$=^TMtwuhv^9!75SM^b57`=K0Jx5[ZZ^N?uWd'+lX6Wz?|sҵ}#1xDe>'|ᕽVGz%$˷/ߢ9gYK]opN)\8_tS+|qΉC2Bz tsk? QY0Bוŵu@PO%{=>߷B9Cs | p׻o|ٻ:.2'[oq;V8s:ĵg::|4xl.ekIWsps+\}$5u~]ʻ?Yޮ93kNi>W w_E?8I<ßtR#~B7Q1W,GaB綾ѧ[Yv81@gWKY/fQ+?\~{Wy߻Og=Clily\ܨs=>yWNtpu>yσBGůg^GOQZ"xsCϝiGn;sx?-c0/ϟ|#xyT5Gu3|>V+{s/ſsO/gTϲ=./P~oW#'ëKWuM5ٯu}Fk䝝ǃ'+^SF=>Ө=u zgעy}{0<І L's0;O'_ُz7'2F=$x:VsN9n^zq8U '𴼆o7oX<cox`x%ϼvG~>],~ؚ:#TRO>~y%[1Nzqz+C?V{?'Ցg׼Gz*<|I[;zΦg7{WSO<]q?|p:3|JxSЌ;ϓo}{&aOs>k܍'{QLCoN=N0wBQݞ<K|{ޤ_;QWЋV^owOf/mކY%CGӈ[=o-說d{ y!N#־v]xO:>tݮ[xݹE ^pZ"8O:Sg5`uZյ|?x:?_]sťY[!nM<(/G Cɺe}{uB蛓okn[Oy/kXI\i[s*xݳ񭺺!ݹfq@9u"yß=P߷sYQ~7ꔺWa_|+|SΣy?1.D}>Eoq%Cއg?sዮSU}YJ:$x y?Q,g? >zn wY28z3(=):@GCRpFԛuzGWĻɫk>ä/}x=~~Y{N6:]u~OŽ;GOdK<'y7xW9pxI+$ռIcѥ2> %=xE=(9;WKϑG(]ǩN13;uc }Afql%~^Z|gᘥk◬sݻ1^ͤKP*ӎا~wFu>xK|j* >}֜8K;᭚+zty_{ſ> ߥ_'$`8 Z>NJϜuzwͯ|2kӜ//EeΙߧku? sxPVI? Rב_}8X0p|9x;w| ׫{P8> V]9.ֽiTTyZfNwF=/tB'i0= ]4tfu>]?oǡ_jANj#Nk~KᄺcX:x >T|N\ۆs~~f];}|_>}-+,:#}z5O>p\ZyEx h_>stag})5'd{Psz{]G}{0Gϯ_`{%>;/*M:`w&j&!yѨ7~xi<罪_ǝFIDQo4`G{' 7u;>~=b`N?_Xs]\՜y/AwD&z`k:}Y"wؿqo5_vQv+7ۿ=̏5:$~~7<:ko& }/+ݏ#GGy@pG|i~'!szY153yy< Cᇺ/cqQ1/x-tpR:^{yEO5/hSGdߜC|:6Η7)>ΏE= ]x!hj{VG|q_ݓzπwؿ~~zzWfyw{=eBOzyQ ˨O>߹cLF\hNRԽGKa\ݟ>ydU{^8Qܐ;ܲq_gܓ<}iq5aI?OZ~.F}+_/8x;ve0Ӯy;QWQj~)θt{# c}1H=|Rmꌟ/s#}\%u^k_?У-x/o]@w*}> Q=9f=!xO=7y[cDt? V<7.uר'yOqeuQGl__|zUjI9u_ub5xɚo6:WN<3UҿϹg:5stu޺뼱x#Asazg鸭{Xqßu~u'|Wݧ8]7ݮV_7s~Vsۡ=2/&>Q:v 7Cx>}+yL5B?WTJ|_~÷urGT8 s#XJO~#?^<ᙈ9? 鼘9/\G= >r#xOI^'_h't2/__ }\=ͧ# v1 {Cogp^sWoαC~z\CyQO3~%KxsM}j~ ͊i7Ly7os_KW^պwSՕpݳOvG],𜿈:K]'/{;od?Ap^??egosK]E/Iw?Y'qιSuUsx{<Cso^Pg=o>qSS">?|2E9W};D}l*vտIT^:_߫n^HE^w ޿:AoGǢ>zi]ߜ oLO;_#՛5?p9p}T? >8t£o=yn#|}ੋ>Izm bw|ǜ$qAp8~Χؗ_q(o<W:Jg&NX7-h h@ S9_p*|[5{E+=iij:]7|Ȥ.Wz~¯ѻj空vw|'_ C^}>=$^ۖOs7Z~q'xЙ;|D]wO|0}xqzɝ'u~o|y>ݲ8zԋCg{O^ މnX}17C@yΖG/{ֽ +Q;K=~Ï<7CuvWxJ|?ju_TKuҝ/ox.}]|a˹vxc_ԓ_K_fx~ ba[߷ҟ<]N^'^=yDܚ}~J#/p8 iw[8ΏF*.] 뙈p:E>{:xx'tyQ{<Èwۡ[/#o;yoz|v>@ſOZscpsx#nϥI^Ù5z}Owy|3t۴n%t9x{nɳ?pzM"ʚϾe|{GNr޷t_lc_&QwLΏ;`UuW9օ;ꞎa^F\ :um˞v{Ƣ.bs?~y;E乽ODþk3:]YUu=Q'ZCgݞEډXo/ި-+Rÿ:KOq~K_q=LNR瞢~::%^9C 'n{;+Z7}eq |DWK3~vNS^&_?+~ e#=JmݯF.F]|ƯgNee9x羽>_z9 A}5 F޿;N97 ~C/"/T}fU}s gE|pvΤw:_DկmyD6w1Knp|џ{vK=~gM"^ ^|'Psz>'>#̂Xz7?yA X?ţM/6xm_{vXGϋǙ{(͉[^/^o{[O~?8DOE=?9wv>_E\*p]{>Q? ^(痩<8Oc&ف߇+?w^?=Ocυ.xpL?8s5x$ OK|WsLaN?x}ZoYdwCSOg| WKx|n¿J3'_SC/xy੽ߣ6< /u'Gd'/9c]QW\$ϻGha?t=*;_ig:>~^Rg \L}e_C%:烏_>'<yuX5u* \H'sGk̨iξO?kܟ7 oek=_kwg枌sf]$>G#/ҕ5:/NW ~qץ3d5sa߿rk|cً_eҜ~q|3εk/ܓ"T| ü+"?o~8E/AxX0[^Q gOgo/Tԩ"~?Gz>/s9T^G}zťyg+oD?yWu~vMî?Aϯ/k<>8~Ⱦa~YW#.¯Rmyqq}K(WT^վ_Bga?N8 .xxaGi{u;z<.ﳳos2<7;jahy>.Aw]zr%ȯ=ˮ \O]bCϢ睍:Kf<xOZ ~~?p&}fު>88Loˉ]g9xS/AqX?=D~9{Y{7>G8^9_'^|/}Fo />:} q'ʛSOv5p|hk^acV=/u>:UEϟ+?}8߭R=^33Q{|3G. X ĺ{ooF!Խ7>ҩlз!k]<[ Ŀԏ~<Ϣ~u>\LP.*F~|ޱkQ_E<+?v>Q_E=+ p$Xײw^T#^ T$Oz>vw? jO%n~x.:{Q9_9x\:z×pI;KϮK78Q9}sH8~ԵOïOEܴ~RW=8'%s+o~AocwyNldnE}gޜwz+hR7vXv+6߆y\HׯqDxqpˣWN+oY~N}Ur,~+ߟZxw>B{~_s`=pz͜س}ϱc?Ww|n}tΉ>r#x/C{zC9ykQXը J׏յ+> =@|_s!'^I}m4x:I_?T]'מ4xKapx^ggkAs_‡; NDJP}ΝV}%Z x=*oiRgš;_ C܍x=~vEooY߻G_U;>oF$Ozo:;ڎ8Wt=xGqFt#A8dasP ukj[/\ ;7ʯ'ϼ~8*n~EEO}F(L4ގ_ WK|ϼOO;:`nQǟ欥҉']Po\ȾN|'t:1+=y't}<}꫃n/"7>^oE}2v~uPR\[,.s)U3g#]}Q3G_oGP߿3;|]=,lo'cݎNQZu?: DyB׵#p?􂃾z{{өoE+^CBCOE}#c?:ASීo9's] ~F\<9R}yLc5O-zȓ?~Km= [)~U7~>ԽGv{^w3;|"K)ݴo5f燗sxox;Kd?}L ˊu۴^=8rX \ /Iյ-᦯ns0[UǍKkQo OQ;tEطF^N3Q7 ;g_ zz1}+QN?Rp}>Ջ?~@p=tOy~p"jNYf$ ϳ<|T2/|p:tѨxOCƇ·n,Z䱯y5sox!ꓫk>;sg#_Wݳ=ԜqORHa_^'Cq>^ſGT 9@zƹr?ooL7 ?7Jܢ4 o<<KށáM ~bB8zQGsJn|;{{>|!2p*ϼ{!,yu7sӯq#n:uGGT?o8X.Ox<7D":P^/@S~F OW<\e? nہkOy_-z~U0>I|5;=u/ǣ&ݎk~3꾽IGv#.*?#x>As"?Y&xS??x7 '׳z:T7yp_t|s g }᷑o>A'4׃?Gi#;~un:|ř໯x#:T>G_0^C9p3>|G v>t;xk^zޤ9uGQgէ>xC~}}ݞi'Kvkþٽwl+?<йߋnЧ/25pj͵t>KcݾxvAܶ_~G,ZyOtw?ΝN#/}u>|qu(sJ#W!oE=w#-\+? \w#/BcY|/psg4z~gιoSyz¹ZA><yׅ߫Q7lzFP|o~au~}a/}ŽWzDۥ_,z~Gމz=?}=u` Y󂊟'QZQG./toJ}zBqL睊zW)>Cy S9bOzn=}fQ{ζvߕV}[:5ӑׯ#NL{M٨1usSݍ6a5{=xka/Ü ^ /9ů_cćQi?v\N|lQuna}2Q_'ֽSRHx? E='}|հ'=}v.V]8N:zj~nD=y-3;v/>D^QGܥ?ĕcߋ:܃7=N$xIb}uI>qdč/cssmu >xx,$i~\x! h ^P,7i2yӹ] : ? {՛'z4"^cu9y˿d}!3/q|>Gr+aˡ=u?RO8p{=Qs΂2R#~vϮ)J|}xݎ^yggvjt>#/(97k_܃Os _iYˮv{>o |q=kwIUj¼?|&xv?/G'OG5W}<ׅKWVB/Neޯ\ExEY򩓷ױu{K}xt2#>Խjn߲kyFoM~}n! Y߿Í/(D⽈\Kz? JXE:u$򩺠tFԤW9c{Lz~g#df]_|e|:xG|;p㍨k.ACR8FWD|CܟmߛgOGc?VӷjCguUTO{^=Е's/o-7 :>v{ \|/9.yPu=B?dJWi eY5gu]=>ө7"{_+gyO¯fwyΚ'28r1x3?WNqxo?s ^Ά ;zgҟyr䝕s>F?sgyyo)xLqoӋܥs|#^r5i"{~/;Ooٻ<Нj!x[O_<lG=zSo'CUwh^=~ U^?w/'_Q1G> \@7yl=yб0y9x2u^Sq39+&G:9h{NuНгjnȸ>q̼͡/=_t];?~.t4 E}A/j\gyNv;WY ݄:9@/"?_ ¹~m 槟~:BI[/Cw=aYsЅ˓On;:|p+p >E7}r/U rZEW ^_ yOCw:j>\`}Ozo=n _ Ή ^ +NT|Or5xә&7B娸Votˁٓ2x7~e3{|h a|&>f6>c:xc# Ძp~}4H{oFp{׭~&qgvq1x?|K'Ӟ.F:zP]K__̭xqFA79j>_qD=sP Owsݣ6 Ns~$]ϞY<`oDj?o|O_bTt?~ɽ ^Oz[xOCըm][oz9 ύ,ǯ?~|5y%?Vtt}?t;v<얿}}sx.~^Ǘ}4n _ M3y'>La|]7 Y = t4?_ɏj>^CWПw/BoGF'e_^wuŤ/#ߊ헡_ r2b +f쇸>/t9p8iT?;޹yTĺ&: oׂOc̿s~uM3}+/#,ַF=?l|%TܢѨut޿t5C܄o/%ǹQ?}w:5./ [{{>çwfxk'N.;s-t ߫_y<ӡ'>]ou5;-_| }d{>kM7jd {|'8pPz|v2d:ŋЋ^Os]SĂw';R]q-piMܽ>,Vc ގz]]{ԼwN>:>w|7{K/EV_8U`}،v*XO-^FZ<9?,xs?ӑ!M^ 8j~ :>zgiS~kj qG_j}7$긚=umݫ'<B?zEfvy"t~Ոgn=z;ޫs9ݴ/oW=NG{*޵E}r6Rs+8=U\ |_F^"D݌/nk>D6'Q_1w=7=  OܝvFN8j7k]}}=\[u:rQ7*8ߵI=/GG>▾+nn#؟Uye=DO\sv{o 㗣=|fB/vcߩž ]Oeny=>uuzj3mD4cRAWǕ|è1DZc};3|i?;o_5wg-WMzcFy%ϹO\Q6@7zv|#[W|tSw{܇O'C9d#xq_Wu{3z|{-H})αFޭ:a}gJS]"Ϣ;{f7W^ON#^4lnO7Bw{26 u}Ip OZYf_+Q! O~^||U/t\<-\|.z<7B8.O{@=.& pxp79iԵ_v{ًRw~q_z؍Ɠ\28/ 7빆8AwsKx|ؑ;._n^xJ^ {QA묯{.r-Q8įOźXo{+Q~sNEs~Nϯx8XuϛWr_K3ϓN.@dm~;k9?\+?Z'e r^[t\q+H<EVUD޶?ћ/=յA⩹IYQaKws%yxAA\vɻx9ݳU7B/`9U})pGnD7_Ws^sوMpgηt?:Vwy0sӃ_#W0l~:x"}aǩX[/ռF5UW¿7|OyS Qȫ/?xqn`MZo{ Zg.v`#,6ϋxT.k|v)O9+߬o] v3?î;'lE\ VcOw@?@wB_/w=8_y4:G|V5Q?|<ߨﳼ oBIF/P ^nwBg#a oQ?ÍGyqأ+:DEǂ+g=VkQ_O>}`Շ/(xÙ~nms#Fԣ׃ǻҟ=yM'x.Y:SnZ ]|nV~58y?f=O;?~:f$o.z]~7xOŽCY2GݳŨ7N//ΨML>U}L[= >F=_o+Y >r|Oӌ~{3.J:CoD"_kjJonu:dW川wC+|^B<: i9xDAaos#R/F^eY/^;隕/ߊ8FЇ_h=k} ;t^j訫/}J_WL3=xqX]Gg“w ;Oyu&_ǪOk׺<εxL9sW_WO󵓮;]R7CW?K,zlZ_oVʢj:慲y@K[3?1Y{#EÛ܊zӨs?n<ς?bc|Eb[ػ}?7>v+9}ȥxO!_Cg<~*S}FTҸӿ,b/~iZO~)Az'}}3>_[w)1?:/Ox6nʡKZ?>]_o8쉗փO>\,xMnyM=o~;uP/> J]89;S:j=//W^_O&y'n~5=ooS^ԙu.6eO՗zspn]nzU_W7uXxޫ''g/'>9{pvS< /(Oaq=+|i=F}5'nu$|>YGv\}:_o,N~Ko^Zd+IQշ^Sسyk\;Tô-9Sѯ7B?|WA}> DmQ'SXx3Bs>'~o-|ϕQdEq uv{ |9oo/9}w>Y'W>#~PEfU;[>WT|B9cu3p݈^‹nON{N>t8e#v'~yΆxXz^G?xn/{:#㞿͍{8޿ӳa?̻zaSW'|yαU^yoM ^7?W[:&x뺧oq}wo}wׁ|x]hyVu/y|.`WgW=?I 1;^-}sq~qX+ Gޓo|^xIS󣰋Cf-p?k+n?f þv;qr_/}ՈW#O8Jt2|yΡ9bj<8ߛWn<~{8V|AkkԨ娣8^8ICRz1~!|]^ٿ:J^?stnNo?7{}y:[潎O[Ww8MGnU^7".S7Y,{9hQ ov=ucyػt,t}+{] *|N>οîSqd]NV9-8w|Poy!{9qߚ51 }+p鹁 :.g5<͹:u0zG;_]=R|GD~vm䫚Gx>bL{շ?zc?'?:8n.fnp8z^H^Fy5"p ^w#W C{F=>/Jq|nF|>\Mqo+ꪜu<4׵Ёԗtۡ/;~'pzΝ_8[ QzAqN4G}q\4X}QP\w#e8lrUNn|s/[;wv;v(# .^'#\c+\F:gok~ӡ7VTg7Bq  i}:ooyZFyWoy;UznQspt}qx;t7ך(8ϝdPյxg+=U?Ymgl o on?x;Wǟ/83> ._ >.wD3_-?1 =&z.+2ס/yqG}zr*OtV Au}xo%U~=q]ǕgKGvU wx l,lF]~|]=_\~8GOuݨz;9٣*/uߍ:={^~;o|>B? VZ\Cz%g] ޹k+z <<{{,?{}3={O#^S}j._,|3SYw灛 C.O`dY}d9oAy׎gp9\p5Vyw{tCQsq'7sj?Iٜ(߫k߁]:Ϝ Y_^gms<| w;פuȫ};ooD\tKHU3zg{ u;yZ'W㽦axD?j߯/[آ{Wϫ##8x#p:ϕ]ޕΥUa>x== IW"߇'Qsٻw#Oi竺N ]yݞN]Ӎ/ަAۇ>yE4pi K8F}S?;xz2ծ܍ý微uexyYy;~:u{pxr]R]x\z'6uujG{|Zӳ_|S?x;OGf~XzC_G\SW/~3pڥvI8 VؼGwRSt/ĕ'qѰ܊S9b=~C/;?^hqMaφxW?u|r9 \nSy e/?z[5.wV4fߍVǢۭ?Խ/W}ӞoF]_s=Й zG|ͬǗ:e7G.o_{\8j)]sg>%8'"K%v~s.z'x3vos1Dqι `7}+WǮ{7ß?1;z㏸s#+{ߏu+Lߞ/՗Y}^xQozB\E^S\zdž~-~ L碮_uf∸ˑouRxе>W#\E}كʽuOɴǙQ9O7CO:pYCyjs1eڟD躧vg^_hY幇𣚛w,+qX8ϯn]xS<_ .[Wx_pT_8uytOϥW;KsN>{3}~+qzsi=0ϝofom_|sE܂w_zi<0[^ϳ/ލǭ fԇٟr-p_ov\t4zy_3:<+~  y胹~Wˋ]..9&^?ZFϭ=7= |3l]XsYzRςg^ˣE[~n/?{^g:wjqy8>\vq5vg6Cߵ> G=" WO=|uTȣ4mqZckaOt'|/>~h.~ԕzI?=?ǾW57n}ϳ4ǫn \J߼xQ{6"Uq/"OSVQ׽±^5?!z3簮E~è+ixF\>7ANjB)}+GczioɫWMUq wbw+z[wކ?J}y<#Q(+ɳoקu0aw>Z[k.f*8FJo3oGsݷ2zxxj#@uxp Σ~s#<,\(T^XA?d)Չf{koM<݊OgŜ;Q}2[] ZWή8e|Grֹϻ/{Q.d:wt䃮/Ž9Q?[}ß_).9WY|4?7z~|' xۡ+y7߮~o}_ ~=+c΁k=G Ïk#ͭзo՟fs}OޗjOs ?.z޹uNDY sÿ;u:v?uW.={o/+RZ:YVOu?a u/q7^,a}ݝ% h=e?ԉH y><\S/}.S7B]p=$=W֏bMz^==~}8~6/:K|+Q{^].c=+{vԱ57i9"E|?g{y)/>Tivuuu^SK+ =zd)py3iwC9Ma=p5K}z=9Ɠpz7B/y}*msNIB&V)'򬼃Os$V< Q}屺7g;Q?3nGܮUoXv ׯF3y߇KɏoG}`ݿy׃O[O xͨ}:݈ 78URӬpTjJ-=.W}/B~xYw߯Ntn/D WܾyLpRɻ_|Bg+9W8xl(G+}}<ä/#[7|,v=gs5uW8\ ViɚG o-]i SOt!˫3nz#p}jv׼yߧa<ql}cy= ;=}zǡٚC~ꯟtޖ=ك^GTT#.w/{{ғ/WG<R1qiox. zK8xBo_>\ܗn?Bw'',<sqrw١~ |8SdH@OXBssWC;Zǭa#.ֽԱg dJ w㝈Kc ?cVZBg9)Sw~a7귪ߟQG }/xo ]z3_}|/uon+݉: 󑇿 ~uooЭ:Z.bS_8xazpFkZt?t7XFC*68B'Q=oWZ5}{ՉDQI3|v纏-qc-psE_͑ VAͣP:;ꔺ ]}<:{~?WsG=?G Ot7p8^yZzto8O=_7ñ=Rփ_'t< /|=p2n榼n?gv~/p}fNX||ב WM{\u?? y/G~M_\y3:W~~Qּg=:'uY:9gzK|,p~o'>ZA5o=+VNYb~^j = U ֓ldna]XFհ,j,Oz7 r]pe<{7ixk84|÷Wi媷B3&窂oLGP"=^iUx9UӪr+jt5/)G>Xu}?xԟ)7[t=dx]>~p'+nM,D`'K*pJ3w?TuoGPnַeb.TC|9u܏8#?X[2|ӛߌ*TTSUoZϱPK5aGN'wU=<*t?tn8l^uLᶾ#W[vnK®7GzŰNt ?sOWȣ֩q9t=|=<ף7=^ԴE^}@?=:׺CG˝0tq}jPl [ .; ;n:~kMe ?u P~oq'|?9;w6Ŏ?tsVosSU|+o>:du[u~^~&_ `^D_[56'Ygxz=w{8M2'ʺupQiUT ~>^tɘz..V1l}T֟oza=_t;=3$b;_y PqՅ;w#a'ٓg=Y?yHw+\EmQ?_{qQAՄsĭ[fO#_Ra +xu8֝Jzjl,;vZ{gt8SruQ[{$r%[~7xtqzWK.z|+tȣVt(}K[AuS`^G^PاF 'A۟uAxV:ڨק;<<<\([=:}9jԥ^fO;MuZu:ջY븯i8uds Dߌz, ֭肛Qls ]=ǩSk a'`E~u\z/xR~SutLL ?:`]kZaϧ}S?UWB۸8d9̂/fONM}8ݩzKة.u)v  ~z\?aGyq{*CBx{kx'ԩt;u<{,7o8Oyti ?:8lԝxL}(; iU|([A/zl \tgI:0%{kteޫN﩮O/,?O'l g`u{ҤfqnOvX][|>˿p*~᩺Q}vxY<q^(^j:P]Q˪xZuw񎸠~3ޛq+pNY ~(4x-cW"?/>T~Ա/oQ?Wz~_waKO =ZTUY ]K|wM^5yǾEޫy-:݉) FIg/^_o?O9ܴ,Nة_S>$}=[V?ӛ_؟线ϝgN gx,:.U8z3F^^~nٞ%.n䗺w|0zhvO'['ή>1p|0x\?e~~ ^Oo}>yt'S_\>awմAWt՟* )y!}&pzcuowS|QsN>xot)t7=,b)yw=xtnoJ[7= 8vQSjݿԭ~( <~/x G7\{[.=nQR_qqAL{ewCaWPJ)OURc?r1W..O'?zO<_.9yT#M:x? ާxyWD};yk=|c }̮L}x5^}3uQNu'Xq)p_y;.[A7nwj*ecpWi޻Oէ L{]z>~w"VIu{V?]ck:~XHG\m?8ϿQ!Oշ.j]Ч6CO>p:_RzIݔf*>:Xs}Ñp|ZgoawpUys^ r0yyKu`:nԁ#zB>7 ?Uzn;'d\G.ؑ}D3;:WxnX1Vz?9#>xEݤna^G롿8O?,pMNVS~>5:86 }yҺ5jU֥?T'Mߔޭw{["oln|:9yW|G繷וo<},yD<[^[>[9솞Թ+\e"N=xsŧg_[U?ufJT}O6#q:/9W+MBϚu^n8Ϳ;-6Wc}k.CawB[6/?p'?k=ꗝ/JX4EQxnk׆2n=tK7wo? ׿.|VSX_u۳OkRot'Ij0ﺐ~G">s=7Zxyy72p'8j=su;?VI/~Թ^ Sͻ C_G:o[/|||7Vs/rnu C?noǓkXxn{,c] ESQ cw]a\=u;$8ͺ+V?︯mꚋN#V}oG\?uJO;y_P״<}!zn 6B+}JwBD]e\>/u:x{V?/uX|$xă8?xu4 ӨsK=?~胾n^qT|U@S oowz/IG=*FL}W@WלQ,p9z8k^n[u(s =z?>9΢O"X7Z:\ VRoɇRk]p>b}fpWނ,pc]実ҵ#ЋDEf}VIO5v&v*UOt m? ܛp1]?ѨO߲CP/wGG wO]v)8^Nw>QǑ ^>o'qnI~տ֟p?WȢú ^ןh]b ~Z=Ag֏_]smqs>L.} \p{,8\8,«xPyܧLѓn7^nkr7eiw|֤gg͛Z826 ᛶC/OPy_q3>לI ?6ZuŕWX/}Vs3t0|:?Z<^Ck:u'>F=oDWsg6Cgb&̺ɏ_3\^?9ބN7͙|z$_W7D/S'9<8՜78b񇸯ߌ~y~tݷ<*l9+~Rụpb;ON:/NzsFt6Z{(!Ax+7=2}Pó.k@pF55tso==Neܜ$Yz-@獟~S'ߌu9^O{MlSQZ<~Qg냯}я?ovg_|z>=[z>EO#/Ɏj.?y_tq5:^/q: ֟<y7z?(<"t˯n` ~|/x}uo~|ӿ/S^_x-⺼ bԟOb p'Uk[:O=s$|<^ϟ >n*s<}5{oi\i;tx"Q"Sk~:xuȰoBWr>Q?=r)<~w=܉:>% vĮ^QW5wh_}=܌8}?~Ú#18\7oL߮|xxAiݓ(oNv>tOޮoW}[^ЗEvཱུ%_.s<YGu&+=<5h{o}c5⪼oy^W:./z:λ^RΣjݬs8 }nU̺~#saq~-G?ßUw{KyE);~$~.~Ybip_sdQyI oE[*:Ϡ?]m q/_Iݻ1^,i~N]<}R痎^;aT ;vS^}VϢ~_՗ɹggoTss=_:w郎>S}7?ztx>Wyx~q|O&tyufqxܳYGpsH?IZt\*9iU=sK>ȕh[OY`tG|<S|ak)~#ߋzTܪsQ/oQ묹Cxہk.kuWv.hQ[JjQέʿ&}}85׮o?x%fw V= ?.NgQWay{:})kFpnnU֜ypuz~Vz#oQXssjd u\o/uVˌ{' wܳ٨㠚u]᪕~yIH}cA98{I: 㗭Wzw⳸_]|H<Z"}_©YgsUt,Bן_ w{Gτ>OիXu/ռݾ}QNQs;ު$&=|4x֛zWDZh-tU~ݎ[oЩ~^v>~;VQk. {OC<7~-%qE_1?$zxyN݋?)#>Kk7Cg{:,lA uc?Oҿ|nSց랬i?^=JsGޫ}6zs:Q׋x]v-;/>Q𵣾[[|Ay5q΁<'O_/ -8k_ϣTn&tg>7]+WU/"o7^^|\=TBw-[X _\ԯ\]?<%NKhY?'C= ];Q開^pЉէKG|o<7;o^yC{Jޓ bG֜s> ftty|#KGթ࣪%x W#{;'x?yο~sؓAO~9wuoOz>b?ۗ E^~8-籪k(ejA_?lo=:ޝaNyQ3zռ;Osτ{^GZppP6uz7.:y8z^Υcߵϩ.y#Uw?ZE^Y8M߻?X |z!/z>d/{?:qXQG5o=:yYXt;o\t{6 [{7:>G~S[/iT5xzKxZZ,u{SW`weANW]G Ur5/*/ɼ4|Z(0x^du{j.θל'>k=,}at{l;5CP]F ѩo>0xKS+ .~>@'wSŸfO"?;!~<#->蝰s gQGT'K='< ~@|B׃+𚗃Iגoi ~w |8!Zq.ꌍ۽ȗ.QB/tV:}=t)'>]kĺéՏ?9nw7Lثz|eu9u'1x/pihCF@# )jkIC1ͥKwv{EV^QKw*f4ˤ&2iI3MY߽(~gp*JJՕr9yz\'Q; o_M<'v+k(=rU\:71a|:_O/d> o#>o/{/tצ>^uq"}٣wB>O>_p¾~ |$+|ȫ%kyC6~'ކo8 ^^zGׅ~i!pwSr'}BWRQ禖z<+G|ɺ[HT}{>O> fxN,e/砿ԹA[7]ȯǁ.DqGTZS>{nYؽc=zCƣUB/ɾ~sЅ?B_N(^ܤL݇1qG\G=y k]՟"Kg89Tְs{߫x8f<|9q7 >b+5>A.g}[?w;R>Nqr&+% αnf]Xs~z}'ǡԷt;tq~f=ӎ{ Ǭ?FŷO:z<.u[uEKu8♿{?غw{s^ťS6#? <>kqP\+z'K3ȳuFė{g0>uu]GnՋ笓; 4j#-Q:37?|X-~K閡׽1t;(u|ۮȓu?{u\?w㬾x嬏Ww#zx`A[t'>^ūA|&=WB VOg׸P87|S31K^sz.|Doݎ8Ǫ06論ձ/f֥];|= =8Ժz> =dҿ.gӹB'N/yO'=ٗ^UZކ~pc}t? z> ']*iH\Rphuۺw,xPޠhzCӛ{g}~&ƫoQuӾ[=}[ O_ =>QǑ՟ؿax~<wqv~fgfqׯx[}ÏzQwo^$/;pu*nuUqh+JKWDC? ?^?8oǷozj՟5OumϓYU2Nuwon ikWNs^i[=I{~rߋūo} \umM:|QܽgNշ918t3~ޣZ/|N=m%v}veQʠ<[,YNj/kn{ʷXGWLF?ΟŃNIs;IݣZ ~ ߨO{=8]^:&u<yq5|^ >ũ}I/;xq:/WEtl=>w8at޺A̗qҿ ݰx}Sݾ|Љר#=<`|<g.>67C(8t:q+y?]-_}֕7#^yN>`' ,uyD$Ϋ>zQþO3՗]SUn>s/ٝq:y͓>؃xN`788<5/u|>{B?{'ƻb׹i#V|ܷ?=Apݽݎ^x9αL:>[7xqM}$ \|W{SOubϏu :&||V—uhE_C>?~ۡɓ#ΟvC{nyQƾVx[>ɬ_7_t GNxGuF \Tz`u?ν =:QG 'Uߑ3XB+끳_m.?C>@8麕Ǻ7Qx<ׅ/'Eþu4|9ɜwg˝yea kq._uspO?}3JesL|YUiu]KN]yIygx'? n{7F}st`y>OGjzܑ}RY2|_~*GNOJ}i&E|uȹI~CGp: o-+Oߊ0"a_X77v[Վ:~]o D1nGqg/tZ_^_zZӺG}ɛ= htx޹L:yW'|Qc9s҅^?8W MN;ož2n1/ oBΝ˺Ձ{7:WK8>a~xAaWIGqzȃK8y/B#u,K~ݳg]o~'=oЁ78Qt}ýy˻BᱥΫ g"_ ]|/| :_ϢO\|>ȤH?Au+Ǭc>o]Y|0_}Kz^4ƫ1FiC^{AzU[x7 ?/P{IK?~TW׹axϯz{}Y'x 'S/) VLqF7u?/_gw8~+ps{<zMս߫>xtNrc}8}(pxmx5ֽx v["Q7צYp m<:P1ʸ)w}}= T}N3t(UQϥO@>y;/oeIXyBWpb#շzx請?UzZ5·8AU{=sO~>>'뾈Xt#Q'ï[ɧ՗5xƨxE =Nz}c(#D=u]zԹcPsIOGӞOO̤QîU'=8Z9gKt-Fs&o.k-{3.\'3D /m^WŸ>NSN?G~xW|>p0E[O|Bߢ?] ()g=n(>$x!SRRIoe,ŽcOֿzGzp_:Q{>>\&}މ|IUgS;θQtTfND|{8Sٟ}K uxEY}O9ӎF7K|u]C7[U;y{n.>GKxkk(=8tx_:bX7>Ы)N]<&|:^K Nk?_?r~ݥ{h{(2"oqr/|=҃;QĿ_}H#[/݊+ݽgY_U9wBkxNς.7G}<*Ͽ{{G'v{__$ꟲ>r)zdwgx8 Oë>gOLf;$Afҿ_: @G{*kҞt=?U0h3~QO^4Sc~ϰW7蜯vr|n~Aua۴X> nu~3>KoݏoSu}}M?s.i>и0>Us ϭΏ<?z\vÇܒ8n_On~_ &ү՟㩃qb}|?Gݨzn=|r#|}_җk/Ft~~f?Ωu esg<(ޫK3_?󤏇uy?>W,x#K}KZ*B21|Tz]8ιN̷Oه|Cwx'te/'NaCÈ7uʨng-ա߫?リϻ~}y'#~dCpxQo8CwU\kj%UxZz:j/Xt>Vս:9nyz5zO 5|X_z:У-7<8VgRBP'R>Taɬ78d?}qza_ٗV*tGuy83t4\wк{Յu٠-_K{<:0F}X_$ɟ0nw+ ~z~ WY׫Qo;:0k^íSw} CWӍXy@礗ԥe~ t<+>߇w+UʠGOyD>_q.V&kGґy3 \T0]ߓ>t>>c5'}ӟSO~K'> _ONz/mוZ 꿏=G}]U|uNmu3tc+٭Сc[okUa߯7·mՕc>em#D 짓rɿ?yB:W+M'|y|'xa%t9\No|$G6UGlD|{ŧ}/[?qsoZğO|^zaݯ4i?O{\s=~3^xyIǹƿN?<$$>3c;ZzU|ׅ~~4}?\:S}CGo#=N]b箆.; >xOBU<!z3tSu<#oֽK>YީnJ\'ߎ7B2U?5|Y:s:R ~셏w%B+ gZ/~4|z<}j 4~]7> _NzQI }5"~s=<%aoxu?Wgg_̟Q_g Y}}=Ǽxr:6yMo=sUw^uG>zsUw?.FǶ\f agz0gEps^%q~/ǯE>}zGBO3]yX}0&]^|PiqD1|n]xuHgޘ<*m/ݏV—_oXu^a?}񝮿:[CۮG8'x^rx$?}|r8S}I'`~v1xzĕI'_gEggk;.֓xY>umu:p+̤?u7Wwsչq gL>:D+bUuoqRg]O~ qi|:p'b 8 ?//8YV Ìk+|zy׏CoC'Rݎx tȪ+ g*n ;^ }]!O>Kg+'=o/92kq=-\zG[C=~ =l=ƳCǘv\_%=|7w7Տ`''CV֡wp0~94uG~Fz}b<%xz6/hͻ{#F[ݗ4nc7yC z&b^tZGþo9}!W}n|r>~RGh=}Q2$<]:ŏuߣø٬c|^ӞE~#o}yC=UKz ~Tˁrz܎;oxy/qh3l; V`| .=zv_yF}Wyo?돞Pz݁ox'|f=n\w=}o }[uPmO\{n^]yu9-Ox=ʑĺ~o>7W{uxfp5?(<\u>̧߫scNZO~2~yGA'>NC/>x_|%q~-u<q{퇟?] ~Y/ 'n5N$cÕmzyWg~C7]=胞P=\~6}K+~o%>c┼Q}kAEs\.^3_֑<8t㫡_W_qޗ|<\J;~Ps;и\ 4,qbVbzn W^o_8u7)⻱/ q%^>f[1/3xrt\_WBճy.IOc.\}}w;y9ĵp=g? }=uoD㉪/ wW/UǡG}uyyzn^h֗{OF.E^q.o=x$Oqw)w Nt09O!uqvzu_ϰx~ ;< wG}5pKsl%>ϫosM/K_{On>-|BY׏·ϡ?סOˏù#|u;p }սD<ɺ> =G>~xN_o?򃼺9e]·t9体>NÎ$QǗq;1Nu(g*Ozߌ8ǧ}sㄟ[kN·$Ϫ'㎏<\׃ubG:'+J zTl^{=Pħg :?'k8BOb]R=yqyp>j_L:{'}^6t;xʹ6xľ/ɛxqbnN~> u=Kw:%O zT0N;ϓp ﳥOX\O}o;9giU,i߇g3oǾ?^{Z;m|i-6_"_$\ﮆ,>]5|wcߛwxjs> N\~4 !IGg={i+a7|G<GЋ[1|d̪ox+__ y'I|?=?|DU߫Z~t]z:8O?v_Z} e;wϯ$f >8R]-}Oз$y9Uqwz-ݲtQ9k- cP<ï~7gsΠ.N:rE~K>\u)q7|x=8=VonD^м8@C?5Cu/j>?}1w~_FgA<^}`ϫ}x3xZ͈zg7GruCЬOC}?VdƛOo,^ }/tеيxmgzs~F=[xNy+t^qP<?7?oU6ﯾ'o3+ӟe<bQgX%s5uxm3nA=x_zN]_O|x^;~:Aˁ?e`ClȺlue|c67xoq<(ޯ<?3o;/_c ;߈wnf8ݯ듞~G:%=5ȧk/7Ọ>SۡOm.NǍz op7x"] ,OKu!t{7둟đ>~U:쾝<ޮ;:8.Y#uŸ=(gG|>_I=~^p~)k}{[]87]|r#p_Oz 8؇.SuꞬ+듞~ڧ? |F$H~B??y8uhd+|>r3}!|Ѽyu=E=%u yY~~8J>ty8랳o\F߫_xc}ui;f`v\sYto] zF]^Ǔ *{s䷽'_W1m9|?[sukK:N#,|<h[,vѝp<ǯꇍ';~ƷK޹^f[CהqO}kMuDg=[U:x- :~'PǫD܃G/Bw<'xy-l?kQ{ԅ/ź;~fۺoyS9c_UqבL{.Gsቇ^Q91_Bg ~Zc?_^RQMͳЩ]bcȺOg0v98J~;;?ޝa_ō 8 }h>W[r/c}9C'kxuWu -Ϩ͟sWp ;H,O:/]h{mXq:wp.S.JT}Yw"/oH@V_Os:O9/nm|Nx}Rk~y?VB>fӏ{Ρȯu.k}UlZWӗwc>κ)o+z>kܿnvvcz!t:*d)i)G~ssuO{6t-/o>= o?mxY>q'%_ Ǎ_k,uu3o}mEr%qۿsWWW"ߨ ߊ} ]˾'tW?iBw?慨'U<wi+. N|p,8tfsEOc]ފUu/B3ty|f],>I~D(d銗[#l_t^^K(q}8)g|o8:Əͯ|-<I^wݣCO~1x P_5]}ȭY7/sO#W},zYy>/Gʫ#9w{c&7*}8<={˿to\zi~~9x]&=>}^#|C~OW;^Q?PST?Y_ǖn:zABw<qZߺ'o׵o+aڇۅ ~J>|QFZWÿ= =UM{~7;{Ͽ ^k}lƼX?z>ޏ^qΟ*[O~ϝyt3|v=C/÷|Uެ2pŮ=('" ݻ#Ϻn&K6I~qળ~yk8 tLb68^ .WB~hEzB=^zwZsx8Bo9O}S+T z;\Շ׭F<:7}]g~puG/?9i־5npz8ύe _xr7"/© _;~\\qnb~Nά;CC\4d͘WOzozq?F^o'C:սUƅ.n=u{ҫC\q}'g'~G3Wߐ_ YKKDP'ie&=x/>V #}}Ext@3|N#o'͏G?ȸ̗X$ ^bEި(lo_38dO~_~kNK\o:5M~xY J>pbw1i~ﺿԟƱ. >Z<^[] >Ctknn G>0mF=IN>[>V1ηÿ,|o#|8:Yxp89|~=t?$>\zjB WEG<ǡ{nz{ }8nQ7Bv y<68pi t\~wuqWW[ౣЇ]p[igtg*8'Zuﰏx&dq#pQ'CW.}nnZz~v膏>\K?s*궪bNj%Qt^RuG֟|uOuCY?p;/WU.;qߦ]~yq^fWu GA-Z7]t[/U?Mq_Ǟ?pz?}'nWyU^?k<Թ]uLuA3ċ瓎!ܵK=Ήt\ulg_ow z}P}zN:+ֹA%[\x[y~ ~_|IMk$tu?w=s h=tǑ:t*8??o'c~qSx;aÈ;/+%zύg/ ^2 ƪzA:| < ΕnT"Ӌ_%m_z6g:꼕/U}J=~G௥X')bgA]=7^kuo9'uyrWH?ת.gո֗\:auk"֧Obց:Wl^|{Q?8]e;ֹxM{6C/y?{z{KQ~_kϻǡ9|z,>p ]pH a9S`Wot1[⨺ߪqqD'ֽnO|_] _{b^R~ҳ_ziԑϹx͹]ox/ ]e x={u\SÉwCoowB'TΗT\N=-vv];?M8  N6O c>q䙯g>wTE񙞰>?{.O'c=Z+׼4 :ute~@y,zo.(_d1<tTz·|o0U4~~׃ϭ F?qV'}#[osru+;|A|ǭ/˖Wz[.:isu"QxHP_|mTޝ|!NbΟ9ڿЉ&=l:%zێSُbϏ?vGc[;on8^u9'=N %,pf;˪n(9܌8g3.#>|/> JO=~7~@i'^ doqe~Qjt>? 6T;>g7~O__՞taϻw+rkt?>ڈ|8mѽoF<7Sϟt[vqWNzu0h:|ҟv#c2v!8ɋu^yg lHunqq{yonqĕA_:Jή'=n۟Nem/o O˞ |g[mo1 ċ:sra3qY/AqG_ƺW?뾠wY |+}b;uIQu3nME8L}6W'[xЕO +M_qRvx+MwqC9YJٟ]Cw^~h?LLOt7տ`n0b-Gg]~|<]qVx^:m%x˽+=?lupCgo>uHx> ̤$At>=2GXO }Ϸq8|++wC/Ρ b=z GH{}Z&7Y,Nƺpܒb_7y8վqo$^F7Go}+jyZOzjeUM{|z9|O>9u?ͰFw~gozm_7g3"}^ĻsY\~Y"}{D~:>j{::@r' _Cqd5pDy{>rH<{>x< S/M|M7 9΃O񞏟t>8.ttpZOx~:ܵ#Y :]qYo>O`s+AA2g|XggW:$/ecJ''s- ]S\Y7_OSޱBVC2~x1.>pd[' OOWwǡo t[:<L`ywG.F^BBk҇?Y ]ݛ?˨7hq.u_ˏc\ ~{ \|3|ҟ.*]x_<߉}w7|tx/|Fް'u~>qAϻ랾-}9F-tNQ!*e'tIyXzgxS_;ȹ_Ά>xCxzza4>]<ԧOPWxg'g]Uw]R?ZO  ߋ$xpŻ<_y@]u  >ӏi5֟1sg }w;Ϸ19Yk_[t;uwYwqzQQ߯ޛW=^Zotm/}Kcp-5"-k~x6~7;>>a%>:Qni|ah#ިxct_yYp[mnjً+Y_y׽3KϹ ?'@7O>G}GwÏUgOyKn߼>{x#\r/ƓH_eÎM^y}Q<<}o[7ƵFNT:iO¯{&G+ωѝ+|s3֕ ^iԹT}0IWT~ȓt:|>4uE8 ﹿUUa}9Q|i)ЇWn}C~qG~0|֝c '/y|3pqF9Yש!뀮C/9 }v;.7 1?K?o S=uZׯN:>]LQN>Fְhz::wE=z෥r6xgw?륞}w#t1ubxE ]ٗA?o4׃o#·ѷy'{܉xVN:VBoۇ'p A#b:?oЕ#_?o"? \YB'M?]ڌ&_E~^Wxa>v{Ry\;/Q5]0> |iTwysw?N:޹[b'=/7B~)U~*xj7esϣAOOy}+nO5*:o!<Ǯg,7uNwNϽor䁬"/U5JI>.q778x]:i<b>W/<.]u_ cfqu׾>\礟/ĺtl^⻃ǬCgZsuy-|sQ(· =Fc?׽%yI[/λs#ꞄO:qX\Os t,8NŸ1q.Y D>kO׼ ~i~:u^>ְU+;nyXq5>Gq_ w.f|\/= ݰ{~'̓< 8NOo <:gf]wB' =O77#?c#9DKcQ>Dqa_3[kԍx_8^:9՟g[wЇ~(_WTJXm8qk߷4NuO߬8[/'+'K]ߢ_''焫fqyZ|zA=]ʧߧ.s5|o>'/~g>Խ3x|VU:ouϟ~ }n^P z9[d{%t :֗^셮[];8'95֏qY |~ ?8+AqFn?ϔ_n~p8%:+_u<,eosyBqCkI Y_:tA?i>^.$.n~xHix8f=qſw7գtY{}3xI/W߫ߤ>߿wt\ >3۱" Au{VWC*rvGx׿o7p<)xs5t^g}ƛonZX?uN+u7Lq}88{  (Y?zB *Uv~ߩO=yPTc㽺4YG7[C{8rM}ŝhls3QIօuOy|ݫ8|^ I1M U28)oxtZū렯oҋ<}9йw%+FzdzK#ϟ(0)?[ơo.u<">%o%TqEWoD\ډNX[k]|Pb?~%/=>_⣺dk-x];It S_rJ88M>:m瀷:9炗_.Oq3t~w.N> i%fccW'ˑ_yN/ ~?ef6+_ӎf-_?D7xtDyZ G=u Q k?YӠt08̺])yo8Q{8+Sϑs}:3}gׯFˡY:C+z:N8 np1dP{ :Lξ[+֛s"]8 3p ݈.<:Ƹ`y_AxA+σp~c>:~=9 tpqI}Oݷcaq{+^ =ڼ/vH\+tP~> M%L$~%ϗn~[!;]7B7~)ŋo;txз|·Gt\z8p})a]ת^Twҗxu}81;7=#'6BW< {z.}{Ium?^w ]kIACLJ܃Ylޱ _/{ziǥt7j/Wyy;b;ݳq_o&⁷g­yߺ~煺_px๞:|˷ēa#[ y:D|_m%NWiԹ7|%au1u5y֩}*.ZZWg+ިaaT3>$>N5}|GxV>ְ?:FO|W MZ"l݈tcϿ~ao=ǁ0?~fmz}.>_^/KA?sk69y8QժoY:G\_5?<辅{+ ,.!wtl5n/t z}Hqă.dOIi:~1^(/g[i t:g9ўu #ic+;s3xZqqнT =|r&{gc<^ _ϡf𯌇c ۽ϿjA^?Gn^?:Qo-e|_խN.tg7/^_E\/Wٷ0RઽqI˛C9nS~]|_[^XtHƺtn0xut=$xEw ~5λwz?t9mS5:ι3&Y}VIOp(|X~z΃;y#x|txn.Yq?/o>i\ !.9={.i6{rVxMDܾEc7 l<> =| =WW_a}\ k|oynEGP0Wİ[ݏy߲ K~y6b3Շ\ }|p5R7]1.z)|qG1nzk:~}ų_Zs}sT8[υ% ^:'1_sϳG;n*+|:/;躈|"NG >} ~%']^o֯|f? N \ v?z*}>RyIY'>G|$nSGZ l֛#+:$f7:Ft89:x\ۣ\9Aw+|puL0ӫ?GL+'AXbڿ$|Q=g>g|_Wo88#vn8r }XCK'7܌=u^!W}y3un|wG >^@:` }M''-1/uڴ㢏B<^wއe=~N}C—˼,6a#|øЙ_,Ÿd*MtqIKY͸P,>K7W{8a5kփK#5 w['e0뺑|._nkާUuGw0#:PgSw_}<EVTO.7}=AG_+}ЧGԽN> g QgT㲃Ѕ}Wb[0Uݻ%-8u.L?#?-c>N+xGҝ{wbC7<ۿyCN2 OoW80uп˅wQѹ:GoǡsWIQWg-\5\gS?Ny=Z_7wg/3Azj+|_=N&78[d|] ߆G^Oؿ^ ".ӫEi7b? T|sog?㷾~ˡgoll}>Rz *t+|_# {wBq%s(t/o}Iq)yUo'y[> 6/ObԹňW=A}|'WtgRv [ӿY_ˁzЉ)><`=U˴Vzgm^xm +W::SG})>wg]?E|\ q>I8n;tv"O=jL}pR*u3֩ނ[ůG<8:C⃸o^}TϡuE߰nٺfqRs[~؉Oo?oś1g}oݓ8xy#0oN[ ^`'7GRx_ۿq .Vs-r5b}qD ~b[u~cW1~ގa0 =7НK~t u2<ΓO:,~GZ?/:0?BK= c5Z+slп#/Q/OWWǸ?Gwmyvp߆\}Bӯ{]{?:ǡYrS,.:q1x~],T}@Sx3^UWHoOuvs{/ I"N= \Zo}>d?\x5t!x^#^[j6.ޏ#qOyxkGY\ wW}/U|quO N}<3z\?t.ψoODWC{~|Ko^/)QƼ9w\ _ιt]7o>uN4|upi]yy.x³gB'+.k>nlsni =䟣 q;?qyU x؈7x>ֽx|'yDA4Iȗ7(> S NZ ~Y}Ooԏ;pZ1yøT=B~x\F=W;I/<tG=>'>>mzWmglY}a&]^ TkOɿ }p9$:]C=}#sƣ/b8w>+\w?(_E냟W󅣞gn(jtqh9x<ȜNl|u¤[WýDyTnʾ+8^gVa#u[ )M^|w=\>ĝ'WӾ~zb|}9|޵nM甾uWϻǨ/|>vh?Ͽ sz<qWspm^:$"/>=|-x^aTS> Pvϋ+F{_ڪ7uw5tQ"у/%EGm~|';")o:u3qzp =Bԕ\ _I_/q#Np8ҝC|ѪvC9~Ag_x7a=>eJ' yl?ܳ|=B䓮{7pz|:O7Gӡ7۱_O?ϧt-x8P>cj {|婺gjqr?|~L_9qn%F}tYߊq#8='ㄺ#tWxGG7{nL߅kߪkq[3t?f[{/?_/j?GuaxXu>jw簜:wGGy~q7L{|>~AztBF]uuO=J}뾎iw~ҟۏGmuOi^ICsؤ?.ǘQ@:b4 ]=~>w#4t~ܨ,m2W-.>zz? yw#|| '\Wv!_j=r.?O=5Gs}D_;1ųೕF=/9%략?R/ձq|}(c`=Î.%x!]{<|wn <}+C|~q/kd?q.&yq3u3~gQ89}3CsL_y{fw>8~uoMYqz~u= ?^q>y_=?<']BA&DOynV] -XUo8<\>AF:7>gO9צnn/k]|~uC΃FOrХO;q}3|7yquz9/wB?.7xa'1֥%3_/:Iǽg? +uG5t :z^/lGuu.3+Wݎx] "?V/F#[_ۘO:ZGǡ ul]G}n'1_z] לCKK|7!O.@#/}C>wZ']s< p5麨}"/0@ڊq8^Yכu]zIcB8ucG %3z٤?\|䨯7^ ="/_KuQ8qnU4RG.Zwb=YUVcХn=yB׮yCY ^uNj~wN{[1h'.:y?φ~OȾZ9u^c{G|s)ĺ߉xDXo1^s>t)y~y~: ?JOG9qOĝ/O.[>oAF2>E/V'LuA*pz6|G1OUJ'<< ]^:28Sq\+^#~%u-mTm񣪛 =~uoE|tKG޳ԋ U31u>Vp/|aaq۹mwUg=VI|W8֋skȫE=ECƷuc_ |пYt ӵЧ՟N"ݬ\>Ư3$%|l0|y=ˠcMg|ǟ <ߨV"^9I1F*&NI_|q5b~ I:g-D^R >(6(~#xF2G5^֭ϣoYOb΄?Qwm}'X{".{'w&{ oen`!uo77N#7tvh'tAyrֽ+/v=1;]sxS7Z|ۨ)_l┸t1|7>=}4p?7(d A!I__+|Jn[ͻugïR]Mzo탳ep:?սc݌{5/}Kfy*Gy#~HIA>ӎBwy>BGV̅>߫_ \2 ~1|iqsz|w'|[uxMw_q=nzgy“7~`oN W}|֩~87vq|WOKWG>_^>l~4|ģ& K}#֕=z/'xq:?_(tq~7/ }~z_~5pՇx/O^Ku.>{nqaq_N^𳾯|$C7x|罫MթWَQ^?s#z@%t/T>ne=T|=<{qГx.ź4[ң4Һ|Ϳ_ҧ/^w#L.= ΃ސa׭sx̸M,^Ow޺Gczg1^s|B7loS-A//W:KyJN*E}gdA&lVElt7u tV]TGݗ|Ayy<5S }=:Y~^:?0 3t@Gi=yG~ϝi? xOtq#ƁE',+֝ߺtyq?M|Ui8sO^?}Yvjy>`nx5tgǝqDGO/<zYy{3`#QgQ/ه~eP9Þ;VkO&_;WMwÿ.i/!U88R<zMtf=ozvNC ]5Ջedu p= O/ぃХ;^Wuwg?YPx'1Nb9C>Ub[bsqM]3'WŸ]}Ⱥsv=D^FBYЇc4\CGxW}S"[?xI'O>qnJu}Z܌U r}}Թw،}`M7n@? u!q_|Щo.b#"NUqxQOሪ%_ n,s*u>/~8=xt߸ Vu7?v}f+r-t,qy  |7>N:z~] |V}B_S'=8]| ~۷xd<3xNk>?s}LqǑ^aX8OuGRכ|/U84zxj4c,t ۹w'ӝO yj'W]=ٺZ [fOw3􌇡f}?\y,?x&?G[y_.lZ09֗.~:#Wu/iԹW(N>N9ۨD<.v&t]_u/n|Ke?CJ :(}.^O y^w"s7&xB_W"7x^jҠ;|/QGydyl{8ko'/8P'ݧs]R/<74|WGV_wO{}D7^zr݃>*nی7~^T7G;υ=VniN/!KW_AWt=|&>]b ~gW'Ŀ:_7n9ת=^p?t.s~"uvzs[> gOg?=}WYo͝Џ:qFߍU霒 d?}tGv<,Z\I«n<}ygCu~uA_⽸UvР'܅On~RO`H*A}t쥎#.:sݛ;:F8Dy )g?1ߣ*8,GV/>ο^iGc \>GIOoEGÿ_7ľo/G^pU._O5/ïpYz1u?g]}ޟ4xlg7؟{[ B{-o܊7Gu$}mOs~|Ⓒp>QW'oҫ|cq?YxA/'g1o߼b_=k~x~xve}9 z:?w#ZW}wù;~r>|Qzg|nlυǸt'x2灥GGQ+tYWuBI_8:g<q38S][ޯk_7C|b_W?Qu^ }~c /z\zWpa)߇OΙ,pG ^iuͣו,/.ty[suOlibq{?\`"/~> H_/x'__ul#oNը]H~5Ƒ_ 6q|^G8_G"~C?oW'bYqǑ ;o$9o!_ }|-x%w:Z Džc'(:=,]\K}l/z| W:p~ٌ}{3s+ =oc=ӿ§Wχ>  x=Vݿp?=:b;#Бt`hu J 7^Bs"Y}Oo ε#1s>>]/7?7n>VM){5ݴ y;^%U'%?yzi[R8ZzRyfSеw-Gux!F >4ո\ ~O7cCǣ=)"? aV_DN34W~|{~=t x_{{~+-8Bߥ9W}^g}o{ݗs)Mġ?\=f]Q|y9}rxuZ}ʇ]oWG7~ř̏,ICwz'>8} |'(rǍտ{o_縨tiY'Ï9Ptם/K/<ȏ~>̗>A|:z]^ }~)_('t;p|jwszîop|/\ Xk_/o|;? o+|XCݷ>ym׃<;aq_^}7p~Օ:ު6^毩ǎݯPgy77;qD}?^Z VSk>}u1 /Z򸺂;=7]_mgya[vgWekp#uF\# ʷ =/0NJ_3= _x'-꾳dž>i_x/wc֥| Oc|7&8{8sp&lG~+}JI3#7B-7Nw)t\kCD[UL_^Au$o=ߍZ = EQzl#z#D3Ozz:I;4g?e|Jպ{ŵSx1ܴ<ߴ?o\: / wz/'Waj?$pvryF^️}e>t:Ym:7ֵ B< 7y%pxėu?o8| ?w GG??w=t:M棾~?_n_u|K;;;v:'@,jy#߳#7Uaqxsug÷+(l>_ }>Niy\ ՗#a:#zd=·^Oރ̸u||(/]s5g~ozR?%#/Cqyߊ|o҇k|ճWsׁ8ṣ\Y7[txx ">TU=~ۧuj:M?֏q+p,]_NC]J8\ t)tut<\ g Hoʾy|g~3>O_z:6V'Q@R9iox=:W0 :7:F컪ugN~C|ą׿y.{iuGT}-BKkᏯn#_oDs>|5Y3H=.KۡN'[=vp5/jo s૫gx~;gy?ut =ᓾn+zy^'*pI2BOo>Y}}~χT5;{B)}r#1 dn|Oً.=_XVd~1}n#p_;/<בۋwF|W_vnկrG/^7y/>7'/tt^t;)W'/8zQ4}ϯΟGA xX7}Cuu?l>Ώoz^ZN1A|_}fq0A{xIѵ_m_q~AW#xnwo~n}n]~{a?1. 8ɃtSgvb9 ߕſm>9,}8t?>$|镘7<=v}'蛅~?f]Fg|{_}}y|<޾.~uWfߌGwݯt=_Au57w)̿:.3|ݺOש-Wn>muM7/Y;o;/ ﻾m}};zSgWGq _y P ?97 ߾}ooMźs}/xqoO~z.n5,YW[Pq9gq:/UIooŎ;Vdz'cүz<'裿u^a_N>J\h)J~ǫ>Bwv+C :g7-r@|p!Ofϛx?vY|rOy׽3nJWU/b=oB>x9hп^7S{пow?ozܳnCiB棎˿gyYo|:jyK-=Nm̓=ƿoֽ.)k=:h.x|޶?ϼ?v;:ts;n{+?s򀺋yS_`<~Tu/BOQAxs>ǝz?]}˧ۡ 7w'':7y[CkGFA 0:7uuOQt(^b@8Jg]o}qxbMt>xI1\ |oyqiyL?w3~YW?~>B*^;|0=n>g;B7CV!x'3}R)]|{x}? Q~5}|qRO\:/au'C;^8vb88 qw}_[OHGN胟X}^}YÏ륮үhZY:91N[E56xC N/Xz5>W^]OC+?WzW|ĺ \zUK'WqF=|f@ɝНkxOS:xS N'>j<_㼓}إ3Iޒw7ū^=;߮a~f/qٸϏxl>9|Tqt-}{scɫXu>8>{z5٤Y<>q33<z5\ͬ?3.|<+{W?Yv_f%xMx+[Zw1>rϓgʏzVkq ;Nv?Ah[|nM:.Mqs֍u/<=y_|V'ds8iU>}Z8xC 3:Htuͺ^>I;Z>%ǾώB_r_u\÷ckZGQ%njc]ysN?K7 _s/:wslWupa?L{><8|͸gȯ :9U;zx\}?||E=ahDOUd}twJx0 <ewT_?_N]ð 9 O>v&޻7uĵٟ['q`<N(p瞝J/yKk7{9k9}~$:G؉?w^yʳga+NxΧNOzO|+q#<6NwkUwQCU+ṅxėQw>sW2|qq^rqyӭ xU0/C?E:^{o\ᒫB}xX8y՝]-uyz;0Jz3]øgy4m!zOy!b792|xh3xA=}̬/qܓCa_N2OG÷Xuǡ#^h]w˯t`W惪_OzN1x8_~ntF?3(fe#^'P}N:_E'[jwو疇p>~=t;;E_sy;^Zg'5'ËqC.#Rz԰#+~ˡO"xH/n%ӿ;_Vop~x7>nط֝p6eչ:EsZ_|/^P/Ea׍ pt.deyy rq}uێĺ_t_93GՏW% {7?:xmatvqy{1O-_]Qox7·SCPw|'8G^{ޗᠺkouT|JGJBӮoT=^K_ృ7[r]>ηVu}pW߹A/:q:-tqľ^Kнᅺ*t*ygD^ _IAmݼAƾ~o;G+uŹ!|بmpu>|?~}@}ïqP6']GKpqs>Wa_#˾'d7b_t}6Z>EW'k+Ae~aXCq|X_;}^'yӽDAԣNXsv';iG3:?8Qߎ Îsp/{K/[ xhyyYra5\h;Jw~ԫ^ Ft^VMW' ߓ#`޽:>{2WSr//B}sѫwߥ=≾Ltȟ늿KW{w>/O;rnY̛>֗x_EG]K/t~A՗$t!\|{ETJA?|zyq?R/zУ|xi_?w=9_DG2|hwz}+µ]lƼ^A~NWOU]>{q.· c_XBp t4x˹5^ O闅KiſiQW')|C'}:c~U/tIפ7Nq^^}η=շny8|^ W[ShCG㹃չm>z5YjyXXd5ApuNW7]_^: ;A'Xl%[su7s͓UvN~Fjyٹ{7ta {~|K7Yo^GmoO>]h?mƾ[ |O y:z os/_'ogw,ਿ' ZO }zyWVyaП<[>>OqIeIx^?adZy;pd'yҋ)eZguNN_}չIpsx͈3k{ kg\QGn\] q7'=ΩGVt~|zH뺃}߽}W~`\cF)~ ޷Tu~v]غ}/']xd>r+^ϴ꯱zx>GˈcA 畺wd"+/~ E֫OLksDext5w'֛F[m?lx#9ꘇOϨY>}p#'nq*w9ﰺW1>Um>.ۡ~}5?@\ ZUt|ܥwqO&}];hy'|]ymםOO\뫷w|]j.熏'_NC]OR<N>+VHM>Y輼궎{Vߥ䁿t\t=N_'~οV;L)&zgW=M;wA'緞/ V :/7ޟ+ˏهV'Sꢝ[xV}TnïV^=(BwOY}ǫt\d:rO.7O>7C_XC/? >l~wC|^`9:f?uN(tw*ϩ +^`_֏zO&]yg}?#=I_3q<⯺^˕!W.u SN;?aM|ATwu/IN<е[~9|DwCS1/Ϧ8W_]d^IC'?E̛AG^ [qq|?*>Gt]O^Yr6gwv_<ꔗzϳŧQg]UT|ϛCAYt ,!n:p?ոo>xW|]mzNh<ӓO3~AC<}V_O_^ ~|}C߱ÿ7|K~:)}6akO)jsN~uZ~nj|d#~Z!wWļ7ۡڏyu8yoz^:k*w?܍<ʏ\m=}"/5uk|ÌO0|k9uU'ÞK}y~ W?Qǵ'f>T?is _8=>wy3]u_@_OQww?҃㼛: n |^#ϏgU߳A||IwW:yWF ܽ&w:$|^Ӝt=_] \Zd'OCF]y(<<u@{foi&>4<<};?<㓅ί߄/}|>q{d?0V~Nl>j4O^en+q?SL;j+|0뾭Y U6sp? RERpxXuФϯNy_ %tK/W?~x|F죺ly|f?0q俆gz L]u|+h4:=Xq=Wl\⣮\ eVያ{c?Ļ8V;7r2Z?s|lx~r3c=#ާ/._ۃoټG߬s㾯W7j9ֻpξyp:ϳ^yk1_΍V_IC/f|;7cߢ~Ooe9WGf|߳i|O.:'>W,ϺG@ۣ9<#{aײ.{M|_~B_dK;.bY7Ӆż+%7=^<>qVW.??>tsuj k}q>`*?.xQ[/κ΅Nju=NF} u]aQp^Y4g K:/bЧ{y/δaӢKOҟuGǿ A ^ !KO})H~Qq`#-8fs=pVp^.Ʊo1"9g~":{ nz?Kz:ί_g\9yz.'w/bjǑ'pz7|u#{Eg=yi׃k=èց{aNr*u* t7!h*OϫzA5)Ѿ_]*Ћ}Ӫ`x.mD^jB_8 O[\=tYkt?U/1x[gPd֧82e+W ^ۍw>@; <0p};}с/|r|~*OC/&nBį|/ȼOf>-|՗ .=8WrG +wTA/ u>st-?FI׭W|wi{WxK#P.w~ٯCz7~wyBÈs&>7>pvMgCO-v?@VX("zoU7~ACo~V}@B_4}^>}WCutţOYӾJϘ}*O|h\s8҃_o.92W]9V3E]|%.G([>s?][zuuC~?t|*>¬uʷ=NK0*mzs]א뾌/m_A<Υ /"=s\(y;AGOz3?G|իaU5z\7;U+/yS O½x7w޼_?:h#[s%^W>ʹ㮺/qoDV'tޜ.Q7ϸ_iOBӞk#̺7}]|0_߫ot{҅>r;tҗ}]~ B;[>y|v |~sO_z9/bCo_ߵe~㍼|+p̫У1yOlz8Vu-zY'!.>g⽜{ϧ]?Çyc7~m =cRO'4?>at}=>QOέzN_rE-~qNuoV SQU|>C~`&åu>{QnFܿn^b~*~Eoׇe|uԧz4w8gfƿW|SDv=bg Џ?u:pdq]<m.0|` 私KUWxd?|vq~QV~`~)U^LO,?ju&:suOj`OC&}Ti>ؿaT^ GwVø[!uyG5ߌ}O7rnB/2tG ?}UiYYT5<*->/v?8P[?@='\{^ _: !ދ'Z"_Vw}XuP}cjaѕ\}5ljBWķSoCO~wn^ԺwN=>kuݗ:^]/p?vBW2 pU$q\]|;txNCקw8>8ob<"W~G}o3>uI}l6Nۡ>2}>r\a|o(iyS..vNyzT?qΥ<|t-8O}!}לw1}oOx_:_~M~=C,Su;tIzv<ٞ}[C-cnwͷfO:.ƫnfq}q|ćM`gpO=~ɏp:EyD8bu< n{N:^!~;)tsqR"=3L%y<˗}}}/݈ d//q{WfEyק=>];麵:7cuAƿCK xux(ӯU~=C>W0}~4JRA֙]z?teq4KoRr.xaוu&ƕQ}4~?8 ZǙГB'k5}Jz =.yڰzF|żG:lނ~sIǫ|~^ǹ|/gN&[Ӯߟ>/<6r8^>q:o4뾙Ku pgk/zuoȗ|:o)g>;Wq?1 QP~mu=}cޯηN9c3q՟umG7GE ~0uO¸~>3?t|RWc=qշ{]+ߝ'{Iz1^8 KSgvY?)^WnΌ)ˇ>\D#|^ VA zJ^Qn̏_SֽJR_Qg5Z6N.A!'Hu/i7?|zz/߳{ io8yy z_N!FY ݥw~({?`O/_q >]{:+~ X}L=o׹Q>Ĵσ{ݏpR~~.%xu3/t Jt7c#Wi^R||W;zI{~_Qx󓾿]#[߾8S7x6i=Нv#q!oWY'}\SGyG|-x|^@ •ڌo 7zUoWi5o  /YgVWbupqlxﻁWL::|.AC\'>//ގ<*t 8VŞ}΅9j[KM]:{2:<|}wϚų=O٨t8q'ӧ܃P}gOz<_Ǔu2O·un<ܣCk3=뾅sfuJ_u񂟤.?N:ŃۡX?+>kR78Eoocqz}?_u 碌Z|w޸V >A?z}jR÷0~%o~!n? N1Z,µ'𤺍ҿ#\̫s+ΗU_u"~kpkqGnޡi>:q6xQ_ \[_BWw:?u:Z,7W5+n=o:u sYz̟ȧ7և6_g섎Nx+~:&Y~Ӵ5Ӆu񙸦֫s|>Fc߸.R_2N1?u?w:uJDւ'҇ E_t'c-t_g8 y >N@|}U#u'c;M){ )Xv/z̷{_MAב,2ѡ'u'X瞧y> !շcY^Ŏ7$u<=i\ I68/Uduis ~]'^WO}UW‡WG x;tXI:~7}q.& k/ 8\~ux NKe/~d۬vgv?1y0}4Zzuom.?zx\¯sOb/:Ȼz8 >aǙ+̗zB4+ޟ+Pg߼;$7x/Y:o;-#➾7އO79H[d߰ؾ_3tUNsKM޳.%OG*tuuv֟tIzyxǺԡ_Q xuX:=w 7|>@{Ɠ-}ԎC8yҟu~9.]z6W#t;o:qG78O<:. ϼ9w}/>9$wia=YP:O߄?]$Mx||J=c~p_o#B>{T4t|$uuVx-x%lwea5~o=yt*{Qy0xz>ǩ~ox7nUxz۵a|g=:|% >xzu:7u8n? ߉Ν?^> T?W8XK}.d]p.i]O:r +OM8F]:<$"<,}W^I__<|}u9teu-'ZG}yjFǼ:n?tsX}}̻8Oy9pb1>_X뺜U?g 7j?Rƍ]2u`w V`w剶orVUz;Sl[?6/w"5-® x=앲˹W6ϣ>u1/Eҟ1K=v)B'PEȿ^![HH>G9D3:>ە@]쉮tOO֣YEdv;?F|Qmuij P:ma^GٲW88)d.Xs| ׭жmqn/ k] >{ߗۓGR)#awr?)=yyE3_hKP>@\𷻊m8_!<_K7qy$%:8o W^͚_q_sWouX^A{un}?M%B ޺;x^=@era|+uz]Wߨ ;yOYrp vq8v*AM/Sw]rčzS&cxeklMimi^)}e}n]gnN>>>>>>>>>!!!!!!!!!!1111111111          2*dTȨQ!BF 2*˜p3oQ#FMjhkhkhghyghyghohohZ`hT(gϿNmclust/data/wdbc.txt.gz0000644000175000017500000013772414157117042014743 0ustar nileshnileshˮfK)v y As"Qh" T~=#'~Zfj"?_?{/___?KoO_ӿ_OI/?[ O[}7?~[wo}{W'?ӗ??k]ֿ華h_UϽjWmo)WK{_:8vFm)w23f=~}n_jS_cZ{5?lkgƲfoYsߖ3w_,/BԦVvK۽_kOgkSKz;>{F<{^SZJmǮ;=y}uuߣz/oG'X?zjm=5+}_-*_~xOt=VȒ˘ſg{u}vOC/cշ?t/ _[.&)>|t*w=~ sTNU:з͚ᤐqY]~KƪkKFaP_=Z2"x5"6x=:oP,-fow|(Dw j={H|.~s }b݊½*F򻜫_ nq|_xڏh-wս|wDGo/|?=;{աVMЮFb5bYx @6=vv,m#B֮Cm\;I7HRx@^FvllqzsO]=!3J cV~w~XRO [E游_wܻ^-$XEowi4kr)|Gמۥ9QA1+b]'n9;S=ILD:b7-K:_%~1 = 2_8|Cx13 pwBeոV戗__Dz >fR8}xo{JA,Fr8N^)]ݬ};Fx Y9NXk9C<>E+}i g1z%meEw>̷˴m_}8}k'/R)s~a':?P[k|ujw?߃+"GT/YM "b8#'4^B{jFzW^vSfpImt_eHE>rUr^g=SCy>G Q]g-~ғx'٧JepA0© d~ƞ T?]b?N#)2|z~mU1u6Z ,p aQvmb>H 6:oય թN$ɋr|ȠF|uA.O{F ;WT}{|)$S{o5IDwϐx>iUo.qݝN7|(q&6z؊,եT#{>k4_x-߯ -uҧ0房Fr׳\QͿ>5s ]\r-rcQw66KWQcۗH"uYF: _`Ypv:{mX}a%etb=ڦxױ.m.J0xll/ҥKwBҖ-3T ^(w~Ĩ+xgM%o˽v92tS.%4Xޓc*M :g/ƉΈ:$%PKdѾ:^1H:E_oH)63` $hػ6f`.XR@S{icwժfU#i/9DUhhՐ{Dd&:|:A Y͗IVubg$ ٸCVƁr9]DA9utއҁn3:JGzaunu4 sifd͛r&/-yuaoA(vWKhJ*:l1R@kpmhAۡ.GT}Ez,`bDgw6meKQ5רNXE:)n<6ykZhv~S\'+"qGrt߀D?&>8][Ǿo=K| B dDr"[pꯛ4nvjn7Cv]Àl 0钹nJnz+hU n#1d"^Toy(qM8〽aŭ0ӭƻ} eqƙ~n)Xq[]]ͧs߹C~(*=o)񝦷 qBO z!ޡ h>P}B} .Զ,I}n|ຽU慏~^<~'.oy7s={S "iSS|mx{+6 Ibn>WIY&p֎lnꜫX16GĤ|lۥ:kdZ;HUO:_UGUR4+cY֌ 1(Tfa/N7@TekQuo ָwpj0t;6&|ߗrDxGe~tz\f|,(3|iʪ뮴q #2',Jo2#ע[^]ccM SeUWyG$uT̋"fuASl/EpBpZv]Mٸ N3 xO1,w2NAkFjLaYp :0 8hڢSv޻sA}h6GWы:H4ą$vG*- -{t/°&g;uf9]htexZ_Jl G7z$^Ю) q@u*N?:j^];s8rtks t/m:9f~ި jf~gFzGgY\w>lyF7B/X5Q2nsP33^1j2E vx2dY87~''ZUM.蘻 Si O~`ksF{ T@ ߽$hɲG+A~o-Շ`;dt&kQx,zL?GM3Wo{e =|znI -Zd|4[W"Qg\3i%GFiL:m_KL$.+.~2ѓ;9׉+'RW 6AK"!7 giW?H} Wff>̞d+/GSJY00Oza$Kg ԥ˶ךrfI:t2g~PP 6~ c6!ਟ9h8 &owB^B~鄒{YެN-z'_@NtݽZ = Ƃ2|*?YLCQwo8aOGҖ.JH񼓕7z {A%8oF㮩 /v;~ޤ8ES?24i.͝n= UyX# ^ qF.%Bl3b[֍z4;5x=^K|w_@A9?o1r1ݧna)o7ZB2bwW#Bpg9:3"e3OiqcFx8h2|Znoxm |Nk7|caH(կ}ԺN^m}0'~ZquG2LBWц) Y a6tZ q︇z EEu2XRlꝝuV؉äVcQnb*TxT^FGUec TEm'f(V$'^ݒ ݩe|_wֺ$k1;[/,Ɉ.\ e3W{h cJt4Brk:bNuiNd7}TxA 0k(12?d-Ld+1,]{XqBfPA;(05=@"/hv$>Q~}l3[IN_ 6xz{yƠf|)HaVL8L$4 jxa$F5v[Gxyd٥zrF#%},%f($h"i1T]e2Y#@}}.Ӵӌtxw;wd Iv=s+ ¼]/[&À2PZcd {?0y4(56UyDa.X_[~cl=זA=3V.SV\h!<(P!gtP\K}ˌ !l5ݢߘa/= fe:#vvtƏ{Q N䫠';!v@!2L (QWIlѿ^&GD%9Hأ#w208Csxgk~ *6iJѤsS*v6qǏ_lS ӓ #mǞ a83"d^vcGzDq_\]0.0ATT?p>&qMA"p>E6]bWIA"s⮫&n;HFW!S5%2k%y.@P NnՌ7Ks4S^;q3h !N eәTO%S=ڞi:s':g[Yt۾C‡ |C"#Um"P,s:Sj-u^K*5(<Ě ܥ9VS_tϻArhC&^,l$~槲T Z1i/b4QCf.k)'ȃ6dh)NpEDy%nZ/V0lq8(TbI#jW)nw0HWC[e<6S f"H*)ƞVF*RnrkCԫyMlmܩՓ̸8SRXk55)"+{qmL0h<PMuݗ2)kYҷ#xLl5Lf%\ x>m\#-b*Ři|ǘ4 O)rx*o Zp' u*jծ6+ONӶu w6 *W~ kiBK;GAj7dPdxUZWEzi Xs/q͔ЂϤSx eTӍ(HߕDș2UUKNinVjWՠi5QHaV^~Vׂ;b,Dw[/ :>:U~}vkŐ8~4M?hHXpLPO9DyH~Y/M/}b`o!crB ;k T0\6MY*+J-B*7VՠQ:G slD+ܯ=[ $޹n %=FOEyCMAVZXϚ}7έ VDBK \.uj,6CqkrUԯWQUSu7>Y2@fe3`5F, ]aH(,(Zc+ʃψQnf;WA^0e`z+iT{Pp"&E w#5EjN";T o?$sw}Hf26 h;=+U_UnAc|HG_*mCFu99b7\6N:KP90􈇠g++SVvrS T([z3OAU#0UaQŔpYuw<@j ݐk{G$'[C2亢 h HsnzBE~J6MWM@0[0v&!\J 2DnG F OLIO\`~nĻ8F#FD[|ĉ,4Uj 8 `f83fli 3N=_Xˣ*bt:߭L%yH{s},ѣ]՛3m~(GbԢy!dtx7s>M-TnNZ\Uf,PV+=-6P2ST%le9hgkNrݨ?tWǕB]hɌkk5kstͿf;ULϪ)TBgiQ0U[tCvaeR~o.rK3jH~t.5fr,u}kNVrkRq̧ViՈ<\*[sB<&w-(`z @kE!JQ֙-YZGZiIiCqv5tόAræA{PS2$`+QTfbnh>sN=5tW᠒O^{ h-$hjAlC7l㢍*oG+`#75ܔn< K_ L氮{sPj'GLӉ(v~۹2^Kr2 :oGGgat"*ook5v" 'ҐS e1U%7I\ǟ Y4;1Q74"E{]Q ?Zݫv]*!['3)/l1!n 7V/KӅbCѡE<,tLӭ#!4nֽSZ ױ ڤuS5F̭0i=\w䀐/Tl$tz4l]3~<!Ler8F6:h4x2_f%p'OWr 4x92 ێ l+Xp\TrfR?BG[%!Kv_ #-oH1csٍueRVWe@:3CUtx3&%/%;Ws^m&+R>sRX{f?-۹x=-w0e,#=22@M˗Cg_~u #x"}d UgЬ;'jtMOȱC=َT(ǚF "G [1OXŚXsU@vrBISiȦ=;JI珹2\=4LOn|83Y66UW=PCmKLfG{h>Q՗ZFkZq z;)G0rϛ|ąGjV״"E[I3'tc3}60oFZՔeuĈo֗4ZJGy[UX}+]vs] {3iS=D`3 KF/C%eBDjTox2w:L͵޶_yl^~:!jUlw Mz@k1p[jDF$iX6p o6zk~'4u&i?g׈oh]W' Jl6Qe5>xhp4E97rڪMfAk޻v?~iqАJWq\[#1- d*'5ʍG|?hɍ>Q8@ݑ_iQT&4}Pr7m]{'ȸ% =N95KU?+Z)J>C;|ăއtڞ:P |#-e5({z?EN?:Jgelfܽ ([{6\ > X15^Әr%7ǐjdgkq4b_Ǖ܆Sy4OIhG}@KdWAMlvwNUG"[!fCH)_[@WȰt nVw4Ä-LLaw$İ3^h)34=j,A%ʱ8k&%S4; 4z  CvlGoN4Hiϝ 4n4_Sx?/4D2jOSjx7).i>H["s&'7 I +~1Kk)TARhAP)f;'ø@0{h ]w d`RDF/"GP79$r bq nh³ȧ;QwYop).J^0P`VHH!B 4o Fih8};銵N:mYn\+j$hʰp=⭨ǂl^%ZG;˨}9kNqR>Ex6]?9gޗkh!A2 !E{2;OR]*]>ۣ@:#B}I<=KՄibL3ϯ,nor=9*I&m beh-[re[ZKaZBIDJY[s9fej/:cP;PjAjQ*f;լkrxЌG[W欷88^Ȓan덎R +B >xQ-'(K"Dw"ZJVZx>1 \4BZߓ$]0!h½hiiW9jh3`> WHIЏfNb ]$4! Y#-a=l')'k{ -8H' *?r2@۲ɽO&k0qU4<ۄyB[O5<2fSܺIvG*Ȕt$<(ىވnw =8xL? tҥf-!.ДôKiutz"7j*Vxoߐf`R9PfRkS@G>ދ`r)UuL[7k#:QaXD V'YG/z;+U/k^U<3w#+# )h U,PYN?&2ʤs=֤;#:>sꫪ&xꝢZK(x[j6Rs(@F[ :s C24 ƏWqMX:zo*xAbMW 'wkPFyNB(#ޓ&>hJ8J>$R7"2J,6 &&ފUu&(y4 \}Ve,u iPW.x{꬘nF?~Τ.d-t\Gdx%¦IQz5?@lHh9-5UԎ*FP{e/D=Rz\kyra4+Z$fN9F&To7j / 'f&eefkFn3PƓEZI}ˉ8nG0M8/ݜR6f D ԇ5 kNE|%B @.oteWޑ۟MKö3^E '5;2ͱFL}Ԃ+4'M嵎BkhAX~r?))Z_w2- =8p#7*M/JSf"+oRs֤\PlXX 1gN1y(jAj(;t: -l#4q(j}ړ⮠)7=~"X%u)-Cc!֗ޜRxSp)Y{[P^r˪:wrV8|RԛmԂ0V8Ų<^ζ~VxJ:T4cqf{T7v4&g'ϡ3H=y@շji]f~Ǜ"АWQ.oGK D>bl71;?enSB>AT,\Y'c,W.ȋ\Zိ~QvBC̡ˆ0ٲ{Uc Jedy졊nӥ$xCH'd81Qck4uEFˎn Je "EY9c[^e1ӧ b;K-z!0P,Y5K[ܡ祈$kT-xLjuEJK>+Fe<; c y@fi|[2i58,䶤3g-pW hWWq]E,ٜ̀1ˉV<҉ WH@ ѐ,weU$r$]AJ^^*qƾ3V?ڟ1iekjoWV`p ]+he-Չ;)ibilL1ֹվ?襞E!k N1ă<_6 '/kT71~B"\Fz$q<-AM&>*CuӜp T*؏Cׂ|6r#E7Ex 9mGȝs ȟlݢgo"tݬrhp۠ض!&XOLDKk=Te=QtBֿL7ZiS4'ѤdHK'5lQR쓑띴lu5.-]c%tH3Ʃ^^lꙮZJEU;CD4P1Vl{w Hr. Z;^Z"uDy< 4S~Y|4 X#C2u!(+#o/S dS.%ZlhC3+ju\DUgLzAwũ]'1 ;}\X)X7ݪi@ ]Oك`AaŤ</Jb]a?lHMNJ\>k0^zB/@bs/!Z :ڜ3i"I+WQ1 T4藖4d4bK4rfo &Uhf^֭Et_u2+oT{,!1oU>-HyDxu,^٥6fBƒoK`=AeOFI5~i} t@F&D@lKXv{A W_$rBƼy|k~pwj=!󡛱$E4PЗt8f(>:{u4oZ,|14<}5%h2Ysž"HӵAi*fks9{q<񁠶(3! Li Ҹ09dzPp |(InLpΚG2246Aw&OEvŜO([5Kd_M ]l%[h"-7FyOy;/<./hE5BntR:xTA6%A0TLQ0ψmCJ9#0sBۥz}p" +"֕'C˼[94Y;~ 8fΞ o_-M胾J#;VGO&7c6On, +'zKʼnڴ:&MC]/Lw(2Y[dG[6>$Aܩx{=Z _-u(;꨼\~ڈH&)19~$ uܙ!y$߉KnftjV+3p39t*#&t_wgSc&̫ S"j4Q"\]>v$R0fTǀ [dP kO"2Ee]L?0`-_jxO!bʹ7#3(aޡxm?iZ]F@?g4A;r(u{ u$)k9mi4DT}xXc :D7唕>z3Ϟjd6:\ޚvtlbuURl9nU sz=٣*XoA+!:$QґrTM6j$6?C g!9BG$P{DJ[Q6!Y4d~366ԴÐnXo]s|Jǘ2Y-њcˊjL//-js4+ Rj]o}kI mh1{ҸFk  |FOo0xR5q!לF<'Daꆉ4%BO6~KraΙ3ts,٧ !gWU aBTnKľV?Yz x8vk^7qt4}c|xi vbģ7tfܘ]K@6VIA k#HSaw{K.nĔ_N|!9;9b$Ǿg~%+UԪiɬHɼ bc{Ƽ4jWa} Zm*/"NeV‹)u*׉\lsf\ϖ vL(Sބz["E#+= 7oGrPa9UTTS&[XNO;<[iTz>wo.,M28ДѶpf\I#SR2eWPsP`Nf4Sh%YOJ<9yYNe.p4Ա($99\W2~(WӌڝdNvjoKeqVաBc~:fc ^\mfAPĎ3NrG"~9`ܔ+n E)edJ hi;yVeMQm#eP?Dٞ,G(\qj~?LA^D |Ԉa'!U'52ZM#f)TE ں8;fpdPZҧ mY9`r4j>JbCfyOOid/ݾH-)T6n^ZA1wf4+\G0?w鶜ob.x6wԬ(gJZc0!&ns&m\DA@ffu\H-.97?/4B8]v&H5,#8VM+)aՄj4#VcHA n7І&%\x@s}iKIB"O((,XR'݉{y|̔7dc{30Dz< 4۽s`)2'7XEDr+9r[LNz(%y"+ mNd ݤfUV;{0TbE1dfw)uW PL8\|UjJJ/*:4[Nj<imm |nND 2 ;9Jp ې8#9m3ˀ(MAD8ʫt1i`G)P&(mm]R_hL|کLVZJJ$ߪ@fdo2:q~C\[/ 팣ZŃ"NSGttuѱ5q$:!k9u^΀6BۭCe\ 牳6n{M lTB1Pn74*DK!O| fh)hOOdo#'tV-ܡDtt?;A\ԔCYR$l%WZ9$T>B6L_:m̱`9oyg{OA1Ժ4-8Z5j!ŠtĞaX(~Vk2뮉fje@3nI%g7 |/֢c7bܦQuƄWV71<\v[@Wh>r땅B_#'PMieJщ&E#" jbZ/Il$>c,EytA9Jvq'7/./gS\ZaH4x\i ?qC(Lٗ[Ƌ\htQ#b3*tPE0~ق"V<%45PүkaT%QǣFm}Bs fw{ItSZ9\xiy<Mg'$T[VOEA.Z+" u;Nn/F{aP8/.G)ڈdk*w4Ұcr,ȑnj= SNo&K Lyr }^TWp\oƢ8Sd!&{Rb\Ǯ6WveYl1i6")QaȆ!܎"F28qC/,7XVh5irgRslQ#l pu k;ŁEӎU\9׿ wZDx7*W(@"lx&F!A5\_M[Ҫm% ȄVsSiB!)8ѣ2/Xq23vY uYeH*[#ګ ђi7~P񴲵zx3G9dtܵCfߧcPRyNŦԈ?bVeY*!]wHZnnŸjH,G_d6OPA.`Ri&aO,z=EqVo@![´="{𩊙P *_?wۛM>MP6i_ABޤklsl6_B1CW–\#ˑ %"* oHFjݓsLRgaˠvƔ^IQTE >Qwv(v:bn{X58;"IɎ kl\(L2krmC$@7zG 4?y~\vQ:SMO:=Bym ;p2d *UE.ph0G_2YZ$X~:w0`?h7e֬ѽHx pQ67E+2ej˲q6GB@:MjֈogWq؝.[ߖwe\aZܙ8mwOVTҍGO]Q-6|HN4LO Y5n$py&u]_(o5gɴ$Y~1#sws{' T"z9FE 3ez0p" J@%0? RBwO]yQ,-J⛄ˆ8f;cݲgV!cxpj.\B?Taf@'R$ i LiH͎>HxZyKkI2N"g gaMDF&fc^LC}Lj(rW$"]Dw``qsvk [|VMy RR MuXnV% <,)b0Ai:o"\@W練3#ktq P(u2<)'.}ac\Mɤa3 P8Je39,Mo"ck8gH}heT(E7Ej%HlBaU-rk잔Tm9`9XMߜ˗J D3JƤ 2]BߥKkl*wëq$ɂTP/'n/[;5Xju2O~I[FS$qRAˌx*Ad&K脅mɸi4&>g|RF\oM=]c8t}8-D0-.~9uJW\az"W~{+uU$vYjYtC>^Q=A x5yUM uKds=K-Ջ8hKjv-VtF&NeifĸMLa+&90kDTkn =XrNFK-/Ҭݐm(Ecw%bFҤvTy:BmU$O+^g#N zTlgL_YxPyk'j~Vdġ_ qӀ VGPI56WZڇg#6M/K)E78Bki #wOJ֚8dB֞w77)x ,\bxi4 hP#4>+x{xb9OfWD=h' [IY,v3SuQE7kp @?`aXֆHQbک2"pGC7Z7/F`XQZtHT6xWt>YmkH.[[-s3w'渉澘F#8wf4;:抴b0-UDj0kygAM` *"@zɪJ5j0Ja*?2cBHRhڠNM RPZvЛӚ } /ysX|.4!%\%GCd7WHs1CuM "OUKZ =|/" @:8v._FԾ (-$odt!҈O%G!Ʀ;B*~/#}o7۰ܱV1+@\] jP:/VnJ {HBq&sg%J>_FѼ`pAp~%U(Ou{D}dJjR+ҜѢ QK èo'RN`OEO7GE2be)׹0nf,2İK}Ľ6\лو NME4 ]u1{vO)E#.[̡ 1ZGDGy,o&SAu6#w(qu8MBCK+η*cK9s6ylqs75g':Cɹ?Cn.3s;ىUh#MQ 8h-M=ѻ}`(guו < Ijk<+> kBu՝)mB: 2 qԖDhXI<QeߒܼПQ}A(ݝX2 =fc|5r +,sfƣEQGz72KaJ|GrhjCk%f@gROtnp].<ҊS@s8x8uMAtW֩5'"u5o$=.d#|Yd*(iZ,.hffߵS-φwkYK؝ 3j{gu*잙d_=ud'G?p ? YpCt[c .f" /j9e@>ZUZLC |Rut &3uF:Ot' ~c8v˓]WvRhY6[jPtD`C0#N]MJqkq4m+#Y'0*w?Iuy4eH5g$*XNhB6D/@;j D 3&T.$dj!Ξ4_mfBT}ӅDw IWnݙDh ψf+DŽ11j{oz7l@y !rvJ3rk[wZ*yɕ 0@91՝l "@pлwsQ$t! F<Ȳ8| WuHЂqJu̴ .=g[d/KX:£=mcRCi(jHpm1[APEGԄ, $uȜ;bhUheS&CTF*`klTiRꋟ7mMZ;t)SsBܮ2=;mJ{R :`a$*:Hy3E% -'1@c\GZaB_6:^b)jS`,8odua%z84YkhAC;p Iu .=e UDcare`ՖzF.]ajDC;<7c`&'13[ZTK$Hrcyb><~04؞1wfaK"P?ГgﱄyFB}d!3˛ԦV2Ks;M׶?HhԿa踾Eə_b2Pa1]?5zo<2F8zIviwLJLJ ^;Ey 5_K^(e3d  ۃ~}i*'ϫ"kFc#S/(AeyHӺ5gW0Vxfh9IƦ\#2] jssʷB-'kZF=-zq,tOM"y=M$R.;q%hڌalfah.,B5-uh6x#;2g44BA! R=˳{+auvLzA .ݠG,ZVߺ$R:ThÞ!&]6{h2huKYoޅ2 ;YnVbooosPbі@vҖ%NpG? TƽlN.yݔv U9UAƟ :x#BxkYmӡbea,3N&6s1o40/R| 3$wLg7 -M8dCkWG;@ m?W[`#~@r;WƍaV. -) y vgr-7m-~F"f' D U:1#[f$C9{3 ' jGeӖp-&j''gJo/F B5gFh}e𣱌uH*6{tŐ6ki b}%}IqX0I{&NUj)?\ 86כ]ſ @v/Nymaa{܎'7e%#{!'}ْخ Wْf2ߏ)ev5~BgoߢN\ND>Xσ*3yHJ,[#߮ZBkL8oĬQ{ՍH):?ԁJC^ouCoFM0*B1d5 LbkI}`qT塭hUq^f_g]@dFZ01_ ^ZۆԴAMnN[h 1o!Z}P6ji^}OI%Τ&UwXASD^nýt^^RpId+5jt4K,'oKVէ2Lt < $S=Q!Z_bSG{F1'FH-Cߜ@g}PRQa2j,!UK0|g2Znp9*L=5M1=6HXzIFåQKHIV9_O FY?ODx/'Eg\W;pa%g^&8Q;^)Xw6HAr \ TQGnhҺ:5[٘xA`_|^u=)-5S 7#eTO@#7Ņ: f/f 4u{W6\RȝF\M(Ѹ8WO!yrnµu-a63Dw4#8m>xR <# O|{^<߂cGx"Zsd"}0)C)ꢚ[+G^Nc`jUH"/^ s* +œu9 `rlr,1 6uAukT{ijݭ6K:B<g"+&aW:" opj9EN\χgX3Q1upP+xoqNtA5=-~[w HRlX#^rH"t1#̕d˯;#zΘ4QW0|+'<>!=ԤPD%EN`V=l"7|,! ^1, jw%bkm*[Iɵ6GuﲮtD0AQԹZf{}dZo. Lg'UKSb+κ/*G,@aӮsg(8kn sayJOB0Tx31x:-2 s{J^|cA=O18O?kWb n<+Tc_>όy0-Tv@7]xL:%'w+Xm9aSX)F_Ӎ/9N?-d3". \{ {H,' Zm=d9'`)qƷM _7ʝ$kTYjWkb7L"f8s{M4$ 4S K B8n7ɘ J7McX5 ~{&bNPg7$za;RfTL?ܵb{DBS!jRTZRV`4ɎKx$r *T(?\nV ]aؖsE{)luW2_xH}QE3Jo$ Hy+ʓ)18 EF:TN+ŬC[-3vwwљr(Y*/5p)h]g`+$sӿگ$ -J :6\@d|/+qK_wJe7[&V `A򓑭Xag(8)tJ1&j虢 ,z*<[CbK}Sgc{s Z]wX7x+5Ș€20ZM)-j&+&,8[Jjj,夜yS2 ) ٭iL߀B~LLkb{q[u_TQՂD{n{$վ_1lYx;Aܣ-v [W JYq9PSI*隳5|`RnW2Kj8{j~{<^2jZ ]x/v$a6ON6~i %x3Z1'flklh6@/1WMoMu&cdwzQ0ϻ\cU DTV:SW2cr㜸\O{wm+wU鶚J98zum ⢱E 0bWE4tg!7kD|qZw4Ge4Ý Tnx <=-ZBL4w)YHOo8VQO;&y:. p::ea98YT'Qn]!["ךe[6؍C$\7ba:aYƶCxD&ÉEQi 7qez} 2kczC͎hRdizJ2&!HۖaYv8Uǵ/fOǾr4fUl2_KIߨTBCQgTٛ!#Ѽn1,r:q3mULKzQ|[Cᐂ4_U;7S7M҃LA:)\H 7ϠŨg{ ;￝Σ|KQxuX(򭻢x,j8=\"<:> yy{+ue2OJ!?KPkėߊ ij31EMwR uxW%gN7:3уmv?p iq ^2.I`i$*y;/ΕnfY\'WnUr,SMK:f(?\A Ncz^!qD rg";  }|4^*OsЎ 5ed'sjzrL-w\z͟;SG^Iî% &d}E+i|ORٺ[vC+:~v {-|F4ToHUջ;hߌL:IaySߙ|tKjFfTYC(9OؙơkS>G9&exL75 E*@4So#UkAV Z-|d/0YI޻i㸗$S\xJ}8$R^x/SgMYX]L?X{ >M-RR` wb׻wJJL%KO*r@rA*୰D G;@pV-qdyD{#<^E f|-cUuY 9~^øiBڝi _c i!S2-s g}qtͶFu$J ?u]poYZd#I][M1 u_,qz v鼑c{>"?tAHr%M#i!c{ .%fbSgmXܩbFpX OsBRǒ-(M' ka6& ~b 1\[_N1GQoKSa_nFih!lIC~ɉm,L]sO[ۉ@1GM3 GG*h@#~V.>fvĄb\$ 2[;LUJ u="B1H~|N6r=N*m&JZXWjUx9zha#BPf}h"3MrG<Ȥd?ۢTBZN7j;ZL>yoU!GZ cH,F`yUD6"Bj+|*>ב6v1 Du B:fLM#BԪݝ3xW]%iSWR98jmo|jAO_ _9Z,Χ]a L7q. ~P 7Ucܑ}G#ck)FݨAlY#b䲖h>A'??R裸 ,WU;UY,^hRu.Gel3qDG5JHNiydŒ6`6&qZIԕzt߽;0ckJt_$ND!VWveS<㿝qđh(^#BV79 %u® &\ԓL[?R٦d8=*qorxiMB>h%5Z$I-1ۥ9UlZMVQ=x$1{h[gPw 3|b sKŲ{FbB;h +@TJq!A>b9~;"rH]+,S-eP4FJT!p`2^"'A<$;|lFÕvץvͱrTp*S\2)4.c,i=P$V0 h[u wЋ)zM.,Vgc[{1hەE}7U݌oV!CTnQ5>U$0-R| j1STƔ aYdyBIFs/0s ;7Xմ3ʦS>yHr:ror]J*1iHcʜR͙ʘ6Ĉ&rro=Ik8HhjQw˘{; 8z'ǍB~OOxXms[MdK=Yp #>530Q#as, t9^vI*PxmܚMvgVE)L7ˠ=5r*y/8.< ݴtX_0AE8-޴H2Ϻi)7Qt0n8uviTі7O2BWCIs zaf<ٓEP򺊴XbhΛaYd/i{Bfq#]CVhuoq=5%B'R-ߛM\,xdT\wsjzd`pVUw^Tz (7yLmm=`T1"barѲ7I8iF_#30[ -+:tCy̤vD#U}ު|4ծXpTQ*c2q0a_A~+5#I}-Ԝ2jT'(!Dޅ0s݁TF37oz܈fD(IJ(fͽJP>9꘡t{K/cs޵;EZ¶ZWU[~1H&zW]6P2Wr$=Z'k,7nt wSgֈo턁p| fuM-B;DӍz\$Yv_{u߸a6R>yd;KE6! tSRQ7tI:T5Z:B`*N x;ÊJ$c&Xq=9tߍT@7͎);bL r+Dޓ iC~ƂAP:hHDaf ]@W*|ksͫQwCHI؊쭩|{X6A/7cnř0\85;/^%94b *! 1Hᮊ|掀IDjeR5MSM$M]c5.+i#RƐBݟy'*ÃH1V52d$QK`lR3ŵ MSR[#Uk<³MMͽ7Ίѱ}W/T~rTעCUQ e 'f,0{Âj* **qW ZB; \l\mҫ Q( )ZnEd$5i3l"$w_4-ldvrl9 %j%^ky6?PomOz djq.YC={@Wi e}DPOnM6xEOOr f߯rL GVShW@ľ,{/9 p5y]֠»[t.wo7$>Wvv4]ɷL۽&,N^ki&n{ #hmurwO ĭ'6-$~*2:db+c>2 +1 Z@V݆N r݃EX+θ3Z!/:Sj`0TvQE ;CGZ"PiN&8Az|e^ig㈁:hP?m>qLmk"qֹ@_&zkSF0|:XZruՂ/|0}3M lZ[[y0D2FW {9}Kpޏ@\kj1hƩ(-ACfS z4lk@ۦVJQ"ykozB x|]EnVb:^㘒+G15/gv^?FͳgMhoksEr8?|vf $k s-þT(] !E0N]a(t}tZzi-SuxlmYk.+U zS#ghV<9DbmUg^sj/Ş:an֦x 1΄uthBLi a"OZlW%/m:O2ꃒ)2i}!X2W*]ݲ:RMi{!脸J\ Mo /.#ju9.fj64 _9{ASxJM2sT`L2=~O!K{l.{.F+@ p1CDT.ȫydB^gyjlb6 TEaI]cZ- ՁTZytM^MvV4A]͗F&ʙ%&ѥȣ s- mXr>PoVHl,ihTlL 80D7V璥` ϻ]u|,`iJ[̲M8z.qAT5ȘcHsx|E#}DDLYߺUn@=LfiR䁠Lgvٮ;lJ̞6ð&TAbmJRm2YxG#Zbc\sC5jT}%p)*-X1<OL \+U~0Jv:ǢZ\%ݻ#XmG̮-3&H''k5gYxe4?|k/&zTkC%OA!|)%:)VJ9$c;Z_eP72x nߑ$sDyN s֞N9XҬz RMuv:x""5GN{Ly늧IqD畹]=zCxK[4Pͮs$U@`qӪ!SbCԬTN8lOR:K?̾>g?* 3ǃrLڭ#G^e@h+ DQB#F`KD؎N"AM+쫉M>oqQ̴w[pQ#HAauy-*\#~kZ¯)w!q.ŵCp\Hҗ 6_l㴲OmKVxMdz-T 1,3~20DR jb].E܉ (4!& Z+Reu;N:dØ%í/;v\{%-$kP[27dF+;0bn9RP̗]Ŗ0t@lK'^wKg0oc@Tɦ>?AwRvcZi bX̒Nn6 qHO/:z&ׁ˛fϏ,i!@ozLdؽkR#5jk&}pL^=K9L$-_KK 1ͧ֙5x k7PEհSX>&Y]5h3ܔM?ڇNV9x/R V{ވV"j)_eh7̍1%(8 23N4mvʋgQdsXYp F:#F 7M&oDYu M #}+NKD_< Mgm1e3$a(`Mpi+ћ:Zݻla J+gRӐ;*]JjG-M@3i^Ӆ*MݻqE҅f R=Wfdd3Io26|ˏ/뢅9tT.Km~G/4X6fcq( ZIfd;qm c21=Jp$, =7!HBg̊fH%= hQ~Fv.K2!V_CjdWP~ѸN.ZjBL>=t B~:i,Vā8wrZ_ڟޗbH%%hG䊩ؽm^ϹLYTk,.܎q2Kۤw ns+"c+I8.C+1 i 4(wPȕIs_4}pG%ڊCq&;ŞS<(^E%p$n\׋!:ZTu0΀Eh? _WiLF0ifsfKq#vttCk+&!'WLEhqbTQYԭi’Q*D$aޗٷ^i;`F p*B79\kFpZ})JJth Z .QQUgEu=F=)^U)I}` ┭:p=']iBSKQ}6rw~HVd|eáʟ|iIKn dD4.,8`ֽ+K \UKB_k{+#}{'GfM]|8hӘ[,WL_q%uN?Ob2ժ`I2,=[0Oe_f1rOΎfxtf:SB}1$yuiא欙I&y(#4!닅NV"4F~FeC Q=7wj7M^tNvp3MkQ4u ~OwXxO9h5VcvYy>O}pW!-@%kj+KcVL;7HZIqֵeMe DV672NfӀk6ݬc QD/EYv# "`q61l@%;Mi!ם 2(cR#ug-RpbNcRPх0Z ja9ޖB")7uٟaŖQ6wuYU."Frq.By0תVsfe1 G8 wtPsS,'u)tUq]Wi=ԓO`Ix)YS7h̶lί?q~EF !zoq#GNFtr +7mNؚ i ͑\/`23d*Q;2v ƻ'1$Qd.z1^g]-+^UcŴ'(|l% 3SEZlTv(7W7R2 (z.!ͷgK􈮮xP! jwT1uDo $BiH[7;F\jsy_^3--$!ARz H*_kFp[iU(*KYL#<6T Q_f0n&Ŭ(Wv)ܫC jrl\MS':Z60>WES"npRѴ3i rlדMϐ%"I.d\T},H/Q)]EziJ$T.a`8`ÒY20v|ʶ&&z8RvJ@*^|ӿ∹i:dqpHm2`Z[U۪сͪr8@qqi DOӟZ7)}OO!^%K?Rc[ުԞ9v= mQ)$_ ke&~5.mRq7Njjtu;YX_ 8Zwyvgf꼔'J ib)o"k6|] !z |ra3RDH%uBDޖ zÇz kiDiÛ^ `G5s) b;Ut:=q#0%_ڊQ КIW}ѓa Ӹ`n^FMDM> TQ[Iv&R4uʧD=mp~Bw#/haJ+ݼsv+DKߞ9!l耞8ZvRz#t?^Ryt4{- iR$Vd NllJ*Mbˡ4CyabAjmeޝAec2JV]'o=T}g6qÌO?vmx_XL˳o=gg:V*H3a{)WK\$٘3}A@450ӆU@L[ص3MBkү0M>hj.Wxp8ws[ͫ+3وUh$Jg޷ .C6o@ ;M͠^ڟw0vչwNJ+m FrȘn_h K m..Ciu֕SMN9Er/"m$7iHUᅮCѻalMq.B" W>틘#@CeyOD%KKA ;\=i"Q4y?q6 C^Y`>faG"k| @ "JEZ޸Cfz݋Z NJII@4L@͆;ņJ 2Fb.h{#c+G445 /jߒsבe4UjNw:r,&%IPTvŋL!Yă] l/ȏnqbau!.0iG7ޙk&;gt /PŒq9.b+p<..7/6ː a#f#m!Bx|_LTebPRU\A{J1yq8LPuQܽA1Ң :CH%3u~\58 5<\, =\yJ4#6Pt6vY-㎝oQ툄zYb(w*`=vgYe»Cs[ttgaٺwScVɸ#Z}k BT`ΡVH|FwCDdV2]D?HvIB(it* <_QC-gOhhpH;V\,n v2<-(=Ǘ|,kM>}hEnId#qwG;tb|dՖ (sYD?;kr*i$4\:jt" {>L &B#Q_G'^u7EN*g -Ï/P{ۑR@MۻiF"~fӕg4X~v=}ͪN_ZԦLk ^> VP#x4yVZl޸p0>c¬L VWYDfASXW(Z.^y_l<6TO9Q؎`*,)\-"*к>}XfD/uGr>!m406dq4'y)|(z^"lD"+.ϟC :d䷢Ϸ_iꞹq.W+afFK#GCEuӈ-{* T0A I#/|VI)9ZMN=Ա ]G&1p0>vKaL.UㄾbvƑ0k{lĝ} '\ /~XDkw{ѯ  CNǡO4I y7[hxn"K3 :(|BgBY19PA;)L&bsF PM/cw`1쨘9qԂ+;UL/(˯ 4fҚԖi$bru ,ۛhr}U+P,.?.MD%,`yU|MoG ?Ζus[43v0dzlF$M9`~-LfU5 T*gxЪEE퇉Pd[-T̈iB?xV~BC|-953-%R+n;imJSCZ}n!%lT- W^T a'!y[) :jS로Xwbje:'q"ۙ33`'iblH"(D0 w DZb SW|0C?4-AƪBL[4 b7#eVl@D}b.T)fHq,Hm '^@TR׮0Er23 0 ٯ`JvR]힇{r&'&8/̘%L^LmYB֏P~`~- ;a2箎½k~sFΞoiMHqRv1_g38JY5QF;̺h#'&d2c Dv=|eEEׄ$ZW 1 y0]Q J(Ӛ %+򷈘>RQ8mE+!c?ٵ6ZP)9%ż zG!>~Jwg`Y *KELò.|_*nӹ@>[#71uhhXdo65= e>$c0-/Y<r:.DC X{-cGBC=N=o]#ۃ?* `ѳp꧘'jj4J}gI6Y 7Q]PZL}x>b} ڴ RMA1: >()$n~*xxxAUM͇f55/un Oe5B>G)ױ9TT)d^x`ɚ>d ,vŅ.~S(rsP`!3@66esM4?Z1k_P'I4Z*) 5UHy7T/ Ċj oB!{u{A>Vr'PoR'UM&k@ MSlAðȱ[k>W㭞Tܣ?Ăt`a/_58 (@OBĄ߸\+6׾];[j 腫1EivLݪpwپ_]o=e^Jk+rkcAӤ+_c4(`-K â4[591qPiGRsDy1X4xCs6 $S~5ZPrhx,{8bW<)ORJ'ū8/ >ztIR> Eү[ oN?I ;h;9 mXA@"^(S}49 _cyn o坟P2gbMˀSQE;@?),Ӥg1{ %tدdC)2zmT3s c(9cge7p̻>OU63ټArO `T2* J-\<1k|)$/0X9"5|~u;3ťW_ü\,X\9/$ Ц%0*b*W:=,I-k-e &ak"ZQe9,FPj//AIsSFY?|Wv5_kg"M^AZg ޹wŁ9=YP۬n3ZV9v c7uZ;9Wgˉ1@#Tr? NEF191N瓅lXE`[h :)`I(,=NֶK%˜4s xOby4,j3D" ߫9q m0c`+w/|:i,'u#- ZzGԋS0٦OƼF#!`ĩ)&7* bގ=<顄*;yZ=z)1X{^ȖUjW|+ 26tP*}::( DY jJ`"K7^J]g qJ_3>VC+]ɂ뎾 aṡ|0@gO݊( gK1niQ+kĥ=fm }b_rҒeVJi,\ۘF,U5m .#/(SJS߿k#22obbIz#")rk<1 v:2W^3{i\Yr52i5`ظU &ܰy-$Aa)w8NҤ*FζW,|&d!ӷRl[hKX$I KvS\E-p D3iW5G%AEeӘ\J65F$o~ҝe%(Yש k OH5_2,㇏~2ch#M$wX|̫ 4_? 99i·5> ?FȉXɊz>XY,MJz ."CgP$~ZW Y0!b1 ޯԀs>dbQ6v<;E(s݈;t-47I"ǗNMMb}1f/c{ҰأoO!WyQ+H8h̲0=&"~K y?n힓~lAN+]mX,Tȶy+;>ء)Š'͌db&bPS2Hm}! t91ڭ92v|v0)9>J|4Աʗ0oCpoB;`NUrfQLzs PF>P4rXEͬsGIP di1T -kT]wPGu .h52E RV 774}'jQXEDb&4Yb~3c++=V}8z=Ծ{u,*rNngwiKw .+gVAM\P3d}-Q䩏XAwA倥-^*{*bjv,{ȿ{nbCűB',D1<0KgCSֈT r=[=UD9"EhYGIϓ0Z'gnF35Ч8z,?B^or[\hzo$%쩇rNޗd fJh;*] c#,UrZ` %w56=*}*)=%yBd}B0#X Juvܧ,b~+4kPpi|+Qi69!(I<1Ŧ.)ӚAnY=}JKA FoSgK\Yb%(z͘;,徳Fh:N_ > ֨hSi#=Pc"tyO zsG*4C⏑]W[4?_UeF{kl n!,;ԿT׎xҡ {Eb}T9wh΅.$淍^hD S*%ӿ#NjxOz OKT? Uw j{AEF+D"ѩ5^{.ZyT>Xe'a]NY}Y/[pT~n3LKv3j3]e6jv3mh LZlg5%e-ҵ(0[Cq-tP-f秾 6a3`yjZ+໾C:y`q-WAW-E1G3 Y TPLc@!H0PM:]cjj0x~`@͗S`ԜyuЃII9hi0U6u.aOLGjt/hI Un|#HTPO|\:갻 >vܸ>,ڋp#Û[EaFv dSrZy6.Ҕ X1C_4T(,-yаb2/X8 j%Gk\΃h14!HWwWU>2Q0U[Nmۛ^D]9bAk/ᲄy-`X3c8ytK |l%v~?2`85[-c_Xp+i EuA{mgF5{DGseǶ+m7yZ6-qDz: V5FgӌQ C% _YPΫAqLB*Xx5%9K`bhp5m-*Gdey6 ?_iXT5K#>`f7(9%. b )ɛ<ecr j>i&=e/Ed.-XLH/}+m\3'6wĞF Ш5R&'f F! %e|_K2R;\s{vQ;_ fĪnyP*D7Pg+̻ʕj0zX8sK_>fL먢{kٰrit3[wi0*>HH|AQ`xL.Z8fs:K1w>@o:gІ$&܎f1h%yh/2;:/o>wDFR_xj~;+R :Esէx%%Eof{_,?ACʇjP6yn:?ewT>-<*8ћ:9^=LS,^¬aGhK-bՄQV }gz/mclust/data/Baudry_etal_2010_JCGS_examples.rda0000644000175000017500000012236114157117042020724 0ustar nileshnileshy8? i$sEBDF($LCcy>~~|s899{Zkmit`3::::&z##O&=;GSUMtt< 9pj52ODy}Ǡ'MA̳MӿOdžT9KZ㮁w1վ3c2>j.c R hYXCvP')21hIw )+4q#ʈ6%3b >??{)wWqu7Q6cq4P>l '٣ro;= u0 ط77$.z<2[@r'dQ&rGsrиm==J49pMϾK6i>Y7 oF#6d)sJŝf[B;6+ۙ&{ 휵B |VL\L)Z_8MV2G.RFU n;6@pX1 5 [FU~}W-) ժlpec5PӃ}_=YyІK]FT '$)Y;5]չe͜J{ gv)NjIu54:p%P}T~p۩iɯ^ُ#4,48Yg4{pQ2OEӣ).~3.}"fո>ûMA7p@"a¨7%U~fx1[4I2S$sQf$ ~"T6 yۉ빎حh>aJC ѹ\K.|@w۟+D!g\۵+_߻sh' M\6XdK1Xk@Vo͈{do菣fxR}e%|;JJJ%44EУl LT[(1o#EVCv eO.~@n)G: \y~ 0&;ʇmިʘ^e xa'[cvYHj A&V,.0E -U{3R ǣ)?qŽXY=HmQ%\Ԛ"Vkq-6@fլ5oR"Qj~K]f^Xk^F[F:<␛eJP MrLDgPpl4M&6=?Rv>F 4k5(;vMHJpǹəh(cD19b 4< X:jf= m6gR6oHZiuNԾf5ot*>_* ھ dRx0pt<K$i|'k'j<(tI)YےU Six&iŔNDEK{Z/EԖS@9Qx:?GR *jαY\nֹ~Y7D߀xM֒vQ3ax{nmR[ʓ36Zt)㨡y}:8qSLwn9Rj7ov_[wᆐfE3^@) D/PFsL l+[v;h-nlr?A37.$cxA[[\VP;I߀xKDCфG(GE2XT=Lߎ~vzKsxgŲFy4gHd+Kriq1y0փc>}N?gӛ=p?0jhyĂ.>|,-(Up+ Oy4bgowࡓ[iD옡2HWq$&67M>3nnޟDm:ch=шcD8+в#>sZ8'̢;[7:esvF L"7rlF=,/ZP:I%l .$iHtPF&|Cu"P^Jq4i[\(@ӓNkUJ ( P c.2!Q QcFO3<6cޢZͨ[h*f1i0O t-+<sD Hm=HKG/hq0Sx,mW${Zͻa;b4\TD Al!{$~[zKZ)(MN:祣Ca;YA]_ӎ҄mEE[Glofi~JNJ*wkF C ]d#ԁKar3s@41|$ ɴqF%hhSIYǓSp꣉0 n҉+R"$DLͦ|Q`Pqd<MY_e kiE=j+)d6vF=.֣34V__ONW wN'}_N;渆w;Z>!gPncjEb7PsHMD4/ KpIL"AhsQ-@1^, E[,hfqW3cqXxʥ{/_)ow׷#ɚ}cQv}|Q>4q]y-/#J: dx$%'ɓ!h}ꇈ]@y'[Ewqb˟X`⽘@E ~fR~nVT7ATMn:Fu0l;l4h1Ń׍Y[Cm=f' g! ̷*P .u'|8?U 2 '7yktBu?رr~ُ9V6Bc/f8b`f *q=8w X * A\G=.S8_${]/AnRiw zy?{.܏<]6yQW6onSlbyK4es^֡77'+4q*}3Nh+NV )=# RI{Y ZHȌ#jyyikV]̶@."Եu}F53O1GכA@T#—f3G%DWl!}'Pgp6PmؙmȷأR/eWw^E\ Oџ8 ր7L $\o@~(%wj :NOAv{׀djX 9)5`]@4i{Ww\bG(l*~8!&ޱ>]qΉ׻%TV~&$ʩy+cӮpw?W 0脻t||'o,ja#r[`[> dg H˭_=HRpWLDϿ~K_Ѩ-b{ wH&z<)hɫL I;HD4 ( Òp@,2\tHOYq[V,$jG.Nlo7yqAnݏ4 1(,~&4?xu7PpiӌAhb諶HSLU%M {1| ۀ.3 p05{>_OXt*Acf ~y?<xtu"Y???]$:t Y#vN'Fˣn[{[=S[0<>Q-v@R?Jg.ݤDA_Z\| kNhy\ 5)A_ɯoB޺Vݫn 8]/鈡9Fs4Db1Bm eҠX0ӁРI>b]^u2qks"e)k c Swi ~HP0ntZ\z j]~2ڶ5%iy8\d17njFS*(i[pE}I,eqܤ X}ɧ? oRfӲ5qexS!@ W-Лj튧_T6x2@y7ؚqK瞢ťrހ@KD* o]DlN< 5H-ѧ^ >םݝk*P"Zy˭s tuإ}e;󁐽 1s],@)KA@L9Ť%͒ 6j).<.۞,yёYnx_MeCLxy9-CAEh:9]+Qtl+%ow&=Boqȫ& y( 7 ?h۰$rXɉOrOF36N{^R=͇Ozg/ҭ,ǎ4 U`%N~HްBï$so!b>:/\W@ o.nY@֧'B 4riNs b;`$$5m[=q:M@(l;b][I B!Ou\Ax) jy./+MUqH^{|ɝ(Qr ۹w[&o ͒x_vNP>r$Q }:qQY 7n$,(Tߣl4f:Ɛ[qUQr7XP`w^-|r|F|~A]ƶ(ῄۺ G__x 0;$c5;X2! l?wd> |??.rg@J!vO^2uϊB 2a]C Ue$PK]0}ŽǓŀDU'9eJ*vXBڛxn9^b8hgB#xt,٤cyV>mCU*Njt2 844HGP ʁѰA|pҵb1ws$2A ,4}g\h{*g3 2 =%n)~)q#?\N Ԡma 8S^xM`d.i][܄FELY"?J`m.*!H1bNw<zDV]O9+`[y8[uXIި\HFĂ+EsA CQb9eX~b~ׇ*٤`zzKim@ tX5 Ⱦ!~5@"u 7>27 |X##臡ݾ8- hI-gd>P\nȉVnsۂ;@`XL֏; kpdE:u`U|!TpTw^I閾 Gy ʍ{p P5 2NˎPix`܎F ]P};- Znj+MΤ#ql  [G_=g2BL@`x=ԹQP%?9|> 7=ZDSh$P{\@TشLG_ApDLy;W-GOK׵V6OO)_R$ׄkaM\4ߛ.Z3;4{<6Y"W[" CX-7!՘Kh:@Jܦ04t;Y7dt{kS_pu>zZ\R=^*a}}59g"_۲Ϡ()3uG"7z:UltnբW/Ţ!ߍ_[bYVTH=P1TDB( HPWVP$nS2(fI*TURVf䏩}RGS;xjz\ Wtէv]ٿI $mNi@K_mt!S_@ry:VTJ(s5M n3S50Բӫݎc?kn3ө]gkZ*lD5te*jFSQ>'A' S' a|4P4̂Z3e~{džGYfxt5w1n$[gZrLP[l CA j*iѷF6:$3oIɱ]^ MW4;='lD+6G4ԣ'1_~*5Q{?><9fLJqC4҅)7@"@pWMt&dLC P[{Woɺq.hhj}CwQX:%C a[cr4s=YHjl4s&${_9Q⌷Bh4@x_ Ho;}ֳnOg)vYYq~-)pZܶFxR\$EB w3 (S@ABO<ڈR~rhFr]\TvFXߣ@ k JѪ(LK:XLyUm~ҼD'ܿ)-hn׺pG3V{V[$o7>פX,+pJ0O@ϼQ3NKy(I \/_~-ۏɈ/⃒ctΈ8XARcWMfE (Nf!~)Wq\fx(B[^X٧31] J SZ/sS D>KoMI|8_VUkcy4urnL `[^M:@^]_BvGI\s CGz[Ar-YAںe_<`R:ÌBٚhH}w07HZLxt|]M\S [LZB̌8iN)iy_*Gs%g<51h`ē=o^_~,͗) WG(!PQ:$~< _PMG Mayj zEw{Q֥)_ft[ l@l}*9 v@`)DAP'@)M|LáܔPƘZ"Tէń 㝂Nf-p"y Nְ3tX MSob9~4gPw\)j 8v(%D]xeƠ oĽk ̾?>ʁ 1Qxx$s,ЬGAݶ`Ió4ʐ=-]bɋ4=-HR2#|`iI8+ET y ISVn^j<=s/) )4$fxkxU yh-9w=%h5؊T*v4͢4BݬwLwقHh˜?7]Ezk)H#7P>=9Z&KOWɍ4$QWP鎗 Yd lnR J@KqAQkekq u,?"GPfnM^m!꡻>)rꘗ͆, ӸJcj}i*xKЙ}%RrmЊRJAĝc((#)뇟 HTXyEf]@j 97ǸܩshOlYg\keP*&NMTC䇻 9u:(~{@xV 7N=DnoZ3binfm^6Fjr+vEh8e`O5Ev$(lg2lQtV^$bkoeeDGM iZpgţaE4iQG@#{_>r6xI(gΞ ō\Y)>SSo怤 ȰGwcḿ5VujSٓ'z6޶/Z#bЀEezݯ4])NXlH4nͬ[|;Ļ/BAkQڳU6pއMhM@3Jl>jw8 /ԣw\+cdfFg XoME|iMe_MU?ep+0ofy*)MF2J@KhPHEvGT]mN<=K/&Zۖw:eQm7Xtz0NuPܥH3( CErűK*xsG~a3ASvˎGd: <Pn^f{$ Yx&{3{u?g%?e^ٓ>m{ܷmA1ig`@Vyl[S%Q#cNtY(M,OќDWZ>r[6?"R d14vN\=:wku/(ťoυFJ 4_nU.JvP~1\zqsIOwB]onw@":KIſݕS_v5ArdfZl 7ZzUq3jOf?f3k4k׎.豈vD+:mFSY_w@9r<|pU{Lȵ K(omG6rS y'l`o&s_h%sx` }qJ}Ip{O4TrST0F5} Z?n^rqҠH $m]/ŭߧ[z(jπɧ ̍7QnC%*89I4͙>4->r0~&?;dŵϦJ(ϞS4{ #Wu.aRS簮>zm64ϫ/B.Ӱj\#S=ƥDέޱwjhCl 6{s!<@V5*& x}ظcEt<R'J%a%kS'ί}9!43s M4rB]׋߹YJfeT/{NnC9zoOo2[nZc׍k@Jzw-&si#vyPĬ?@TU87؍'G8T(^ָrcȬi-m{Sk/<;}/@vD/F@<~rd l}н.쟍u"k)QW:!k.iEW<+4ps.{6 !x۳@"n,MAxǾU@ ]>Jۏ&3}RAGqVQE @fTdDMy[̻.롊}CUluU:v=hAj?\2U ~e16d*U$;S3Op炠# ^֓m i`P E l/;Xd <%ARxqʉoqr}q5.9oO J J3r4.ᬑy <4MY4"WfT6oTg =@H|}w= y:y <~^0_74e[ P ~7K[G]nT0RY{j#2@x* q1/ο>pd*CKP%XdZs-8 }zq_в ,KfMQLp|иw_cM︌&ٰc$Ta Lfo=GS>ޭdaWFI3ʜgjㅡv(hsbDF\Kqzyct#д[~9j<}q}\uI._7+%“%l;UYtë)dUۘtp?͎EmERY3ix[1._[-Ś@ ՑtovԋYP{"9kvxfZA4|b56IrS+ c`r n 96>[^%yoT;}Kv $6E܁1ႚ*}).JC&A/%GA{pW nB m7Z }i^ɕ[CCQQG <%8+|O6/λ⥗6;oेqC#n1H/Z_Āj[I- ,i Tl\+t2hexHuM?p[3S=_J6oҷmAe'^f=B1apZ\_y+83{ԨQ'u R)2pwM(6Wd@p T141x9P: ´m<+r&ĵyi_NGoT;%0:渥{ﷁUK@"^\ z PuV*~mfІ\4u5v5_ PGRX@mCRӻF' ?~BJ'n?f$@nj rۅ8;?hN^g4x4a w;`f 鿍 Ͽq˵+bZ.A +NrE`Uj+)c5wkL]I ™϶ Ũ=";BEeLCsr#*Q8j fgsADgԆ!^zb)o9z<]N]yo_:~䟥KĨqL/y'p2Tw8ވZ$hM5y⊃S@tɇ]*b"o- aQ"njb}qԷx2y Dv8 x`ۙ7_aWc+e&Kx'\^5Ȍ|S𡶬xǑk X~iр4 Q[I3ݤ͉*81$:uy&pfc?2^a# wP$+r_:v9 yL3su{kߟEjRg;B@Nb觇sa 4νB_o8[͑*'Fܻ / 6Eŭr"EUxp%x']ý{2 wE-Kp{#]EϠNt1@ -hJ uU[k_W{ +@zFc, S~m4"_S'Շ\Ĉ{$ gw8aqv\0?>>z W'p0n=boѠ } 렮ApDhô.&w+SK@ci!t=hr@aɀ ش5|P$|mZ\w}s3!KWix0t@ܶi:N= :#tJ<(HK3bѯ)WfOd nd7h")@9D'2߬h3k}Ϋ}&;\d8A`uV=+?TOo%=Q@%7 udXqn  ]N\~N/ܦ+O)o *0 A(cޮ:4ᬖe7{ND"6^r}n-H=m Tr9kq~ 6'}}~ o PN F8}LG58'%&B$9`j{o{ҙX ͓c훁7yP/@s3 D*jGhV!KzC<Ű,Ƹƴk:g:])껳(1ɿo+{ 6 ;,Po <7-jLLۄSidzUMlJvڸ*c"8cdqh̪=;Fz/bbh0Î8`MVrN}:N;HJO7 ݏkk AOro3/Vڹo*k/h] `;F'7#pLۣ'ʧ/k_Z k GS?x)k?~ `(tI`.ϙ51S5k-~}wpd4d&l%iqvWԤQS5O6m&}lP*L)`. _l0c^z\h;qX$K̡-6r~#[ EF G8B?w"͆ϸtJ#}[γ&*+^zF!4tVH/<܉? wyf>Iʸ~]8ܵ4)<<CbDOHJ mk^˅;:PrXsR,!N5*Jn%IM?¾XUb큳ñ ׷-6Ddͩ7р͹t6`&;?pe0|Ց^#h9a#yZp&S :Y޵x }j أ4;Ѯk  τcӥ 7;% ¨WkRVB (4'xvLf]í\6 qKB'QwշP1Bȡ7he!T@ǂf>^omG-swG*։M*Wr~3y`=3tެ>C>t|KAU%n)m Z (vC΅u}+q6GԛXkLle'Bc2ܣҽ滞[ -`TiU pJ7ّWOQ1394N9ꨏ|j-&-r۫n-uF;s[_(D ;Jږ&\IQڷg+nwq>"YcqE/)\Tt'QșJOqU$+VWG-yB?־S#?q:1<7",_,gos m4D}S~<=Hho]eW.mJy5θgǭ'\9eSX6\L~J0e6fwoj,a~#/oTwC#C+k{͟CCNu3-@sSVg-@C3E\Y/Ήv +ţVD<ͺXF9Fm]Ipܿ 6 NBi\1U)6K=sG h֬ELPNPm8/]j_؛x?xF3<յlXY*EOq+v-m8 җ Vwaos|mʝ7)\Nfa?ԕBY3/1> 'Jm4Z!5&|8Nڇ?BņY¨EIN*k~%WЈiuO* fYAZ3Ǿ8s7ڷe>S"mwڄuui2ylU*i*h'}mOeٯz̨z6U܄* O9 Xʡʌ#z^]{y,r*'ç@้wL//P\ mPZ} 7Ef"!+ R/\Jp[c/Zyg1Oy-8%}ENv )gB|9 ڧ'j' eѱ/x(/cc J=aj U) \@̟C5v)*k -4Z uam-|AFt99bg$@ȣ&Do }>1G@+p`2roD! y JRw;M%.fflGxU +Eީz}Oe D5?$mķd?'~߆m?ʠ1gZ +ʿ._vY%JL'<~GR7c %&CGzvx4[єІ8-)F wn]+{lGy@CUI6!Z o-%>Tݻ;ѩ9:z ~Ȇ2秡YABe+Hr6|bsKß)~@SuHH锚@N^|hA|ZAsܛZR~v krBkBlp<)H -J7a?[˼"qà+| D'D9 ?g? ԛԐ@srzyvtnyvwW{{hJOB#@:{o=~Ww ͷ>NetO=!N 8 ^ xÞsxI &d͸nQ/>?E l s _r>C#s4ܭ-.ՍQ(d(}%sVTW|WY?\uJk??ejteE,*SԦ#hFށ;' [8BpOi.arJBl bT@zG7幋Wy noOY燤TPv=WS_MK>tρtAindsn2{P^J< _)TCBkO ;K_羈HSg7cYM@ٯc,{޸^LӘBe9:}^we 怱fj'ߚwm{=+ac~X}z32Jŭ$SO̢?O[M~M睫R@Do] Ne4iƱA8=T^&m}(`i'EfNWX Lڹ"zi Y'dfgu*G6?ڹ Wo639ossMV6\lJ+i3iiQcjqy+ Zȼ ]*;{/m8+СeML&ø+|C aJ|; &Ytj* SIRɮ>O /)y~F#7Ax&L"ү8 ~Ʒ xtfP#AJVɳ&*PLlNԢӛ&;MgYhI)3K7x u"wmgK ʓ:/6"26yw>+ r5 uH Ν H`=9AoݬsfTʡc hT1v(D9Iq|޻ߤy D6FEco`‖']F$L͏CzjK0Og gT|.x!~xRǺ}W*j}Ui!=wA _Qo"[Ats/%ZQ89ES<DYgp)Rs)~c]?H 9 Cm6~u tq Ϧ4]mA4S);_DZ 'u Eh㖨2c|m/m>6V-4I}nC]A2-DΆu`0|3gϯ< u @rGo3IgcK𬻳Ş8D]|A"|@ⶕt޷@Od;6|̹ |ol*T@:e nDNد߈6T5 ?x/u$<) 9 OZ5l~6o9:~7)q.{6W`~({}I^izBTT;p6lN pihLO@mO`-;%+m^n{ESA7n "L/+&罁 iWhnQs pa9wKr(cmBuz'-{p@@GЩkN'C$gB)-41P ?`@> -FkxI3:&8iN6`hMK[@T$>*we `pI+r¶|ɏ@knkd#n`6#A8Ǵ$x$n=>WUnt^=ڌH=sU=\c6*hkp BQ" TP6_t ]6, 1o߻ [ l \AI+UW$nNੈɟ[PS'+Zu'n'1] Ţ3sWfyQ1s3pae [o9?| Me"6+Gbkx4XeO K/(6! WaT#ĎagVtn N7|o=H$zJ2}8/Ի & AzI0_ځt.@۲5JW?j=F~@ٵM7fW~$ *-ef*U- 2 $=»҃^(Zi>Tm&D1ֈ=z:s8'? _d?Ax#Ǎdiɳj+^K޿2M5ʨA0ʇ@Fw!BI'R"5]%sr- &tVqw/DLQYlǖ$YxS~u6 OZ1i/ k#݌en r+ALGn=* R*]nA϶dEOIek˲bG' z*ݍ/↬@~v"MGq9RwXT*K'uܸp:тIӴ*ZdcqR9r]n B 9߀ɏ̗Ns* s/%La lWm,[/X N*Ǭ-EȬȸ,4 7{3;;{~B=V}NԟLpB͝h?ػkxG _C #=vb ɲ%$4.|W@Q){*C h,I ^j@e:ٮ k(c}I{qƳ6k ;] (X0/5,)\K+1 17=Kַ6_ ҏ–o.h@Ag$v ib@0k Y"h^5Q@ޓM42 caLTɵ o=g_w~U5 kAĩQBYyk:3DiG ;}=g>ͻ٢k}\T@<#-%#a!3_@rTMǟODZ54ݕ>sgm 3Wx?y]rvU'qNUsJ"n, (Čm@WzR/x }LJ=7Y p[>kSfB֜GܖXsu`SRQ pOLKP=;Tz&[S ,E# Ph+r,Y6=8d@l|$O:/|S=x?N|thy~?/rEH}/6Q_?ߌ*(ܒ)u-Ӭw^:-,B FVT^XaWBBOBݩb|Zƒzu7,\~Vˊ~!W K7Ŀ0=S~Q9 2 /I犗h!&a[>q^u9XQoÊTu_j)|у:+^Wӊ;,.[< El MpkE/T#64yGwGOLtif hN Kqe=tbx N7;]v)5o8?{b.;ueAl\d4} _ |GZ}t q:}HnbiccDkcƮE;c Qۮ_×TVIEYV4&f{u;wЗډ[+EL"92W0nFlĻ r[6#K 4>-oz&IA6h{ ~}R؍,vlR\؄fモnZ)fnߚ.48cb x]!LY} q۾x2H8x~;IXQDՒJqd0ڑs2AD%GfRcݰɕCOTȕ돬I ugv*CA]퍷lE~?z9*Ζyt @QF X}B=Ffؓavv "]_x\9o/oM*oVIA#y!(Jh > B*E՘9N4[<'@^zrҏllPfEcnTBlgʷ:j7˃6xg9cVTJ/TFcժ[>[UJT{`9*cJ7=nphק] +Ov+;{myπdA"8^r\j%d MJ4rC>[9g=Iw_~:?p%thý5|Eݕq]VZb/IJNvF铮51lN4_X^"^H[w|oglҨqN4т̖H CޟA-ѓPWK'8aZV%,m0 ~d} LqP@q+`7[%CZʏooG+ A3moK12(呿mMe-_+0Wzr۷Ο &ٸ˛Bm#O˺@U\z7*yt-12BhVpT(tY G*CMR`c1;a<$qo>xR6tWM?Ҟ+/^Pz2`5Q=tz:W1F9FKTeޏQHuUNٳ@.Py]84(OT:}ˮr1cl1 >EhTR+@CRy v 0=6klKpkn,'%ʯ~3hZo=]ExBœ)rF2ݺpϙ7|ީ ⏧$7}|x<&6'Ą9FA­qQq )m_XAN!|z9R$Ge7ԥE=Æ=|exac,p-Ah.ߪ5U 3Ɉ -gVR 5'4o E8/aF4g ÃSG'(:w*ZtKAroL]ZE ep T,z3y%Hi|ʘظ0R|^~c} Qt.Akqo4zԓ @9hCWipp)OX"з$nW=dp tf8j3LzH{o&1RCTcx`Nt?Ȑisý]oZ+8Azs Pڴ *TcfA~QN  v:̑ u9PT7yBݣ6X9T>nn@}V5tH4M7+ka !A[O gGmQrwUݿG/7+&ZTLdޛ\wgv%DgZ yjN G-A^S w*yC d-rN~`8ovfKn`DT(>qNiJxُxw_ڹemx98dueK_ xT$y3={6M^Ju7K  r ]y4~ɧ(~ w߫ЈJU5YMX3yˋWTmI߀\}: [<U~r .գ # ͤ"< RdB_ EM{' :~qPҖ[ONN'xXࣛv7|MAzWT"IoBS}6@~k]pPO*>W;RPoTdJM =KEwY"ޅ6l p=q WqZ:^9 y!r6f},q[$lpzඩs.)@_ٴ͸bn-pPz< W 5+LtO 'ߠg^."(|:d 2*+xzSD嫳 c@Ѧ; ӎd}7vVil`2(%.mY (-Z;̠mܛyl uU4OFS@JA4|J'6-ɑq57'gKzQ߼sJ(\]u 锎Z.`m{`([e%Qz;!=.0gUl;Cl c,1\9Bù2C$f+>DZ(AGUۗSFBא ɔ1JX̅H1C%P%DQ(QP$TTD[Tc`ؓ)!C|9wǾ6=Zֳ#_ZշlӒ8RQwXu&7`ѯ>z nC\?'ʶ+߾^0-UWߞໆ+o=ܠ`c-Ц$ K支SH|_u zc*Vo!bH f(bo$+{.X Uُz.z9 b2ߵ/Hv`Y8n1o"VE-aǽ{}iwRtt꣕sOqx R>}eh!-^ Mm-uWkp!{BF#s& zc6mzNܻɒV݋"M#H3o޵b}oV[U:vHpBuqs'ycjɒS}jF5Rɽ7K`Ag,>8>~HUSw);/= EblG5ALR "R=84{Wj&`GCbBܘ.SWmHKfP H=;⺨}z%49ɾjTeOz˫_,h@[Hcޭ7SSqt"9#E/"HΎ{2e鈻^0'4n" n)kٟ'#HQH@{rF,h·HgWKHjkxV2G=[{;nvepNc*^'CaEqr-nVhm`<#Oz#ܕtYTx CVn1PңP+gY\TWb%ˑ y/0f\g'wґJN]/5iIE_YЩ:y>\u짥HW6 i(E"?juخWǗ6H@ N xkE0&8??8E6Er7}Mx""R QY w֮C*O<:6ظ."(!+OF+ZWy_X %nRLqz[#x$f̧HuXV[fwӨxG߱q_ ~|ݲH}Dd2FWkH@oi>%y<ڍ˜;erV)L2a!OTy!e[Mpߑ7a}ډ􀭹7y(U˭q@ ?<!W^7=2xUsF02!׌ R40; loٽb&o<| VD2_-8i8?{WA;_;rauԐƳ/zowC]`HxZ>ǁß˼ =݋Ӄ!3p7? Õ\:I12R3v|]pr69I/w%яs@9Clp.t'\z^/P-5X\ ,BSͷqm,^=_'aK7|Sr0Wp҅@T^<kG|/ 09jvJj`͟yE%_2EF~oxo <r7BR`nu]U C|/m'vrHth;+2+M !qRj#gy?{l3o_VQ烊0fTV;m>C?; >)*fèw;@QUiV9O8bh,XcvS@z0G=Xji C;OOr݀clo0}Ζy;\ {E߹ *u( 3:#ɈP$0?N0;^{s+|<\掇XW] ' K@Ч^# 7x9t tZ@m8\ 꼿O150? Ǟ@ŞTGV 6dsW@Я=8iy>yn6x 73ohSxt?WIԽZ8_1wtgswaX=;]qHˋ(&EC?R'w1~ޭ "uoa$~G+L#Kt͚WOj-tY<= #R.)w ެ!ݫt F av 0mz%F:+xab0޿G~0O;/f+oyP;ǹjEI(5FBGni*8jR"tޚXhu'~݄Qec+a8(õFyk78J nVrΆ!;x'l VCil{`V2H!4FR>e {7@KnhM1y8N_.g`*$8czwv q: YټJZXg<:tȶf FNn0b#A0w<9M@G7bʁrN}fuЋ_)Z#ڦ Z>:Ja"`m}ZsFvxFO Ȟ?/b|O5趘&%Į 0QoarL|qjt0qqBy=08Z0i2@sKn m0;`ujg~ U$gwa0[IQ-Y;a'ׇ)~9#Mz?$޻cBQk'P3jAp~ *8!Q|×O~Ŏm@@\21Qni^>I?a?ܴ% Ù5.[Oҧ3i-g9I=V@QW%@uZ2H,3&q9+NMZ`V.G{`,̙6c%ڰ O܏\G"8Ho.Z=[LmTw<_N10LJͶzEhYP⪙Tf2/+{-gQf62qU5އԏWzuY֌|O,O'#̳jضr;$oYSܻB}Q,grsI&FK%O&Sn^OS0]ȿmm⠦ePZ KUnaڛr7^?l`t&\P͚m~VR|uRe ն-Mp!RX/47Yn<"^pU|;Pd8Ffkp?V|ɮnOİxL#=6Ѻ}@A{% }8,`.s21}&pv!³0)w4'0\X&IɛVR UrgaP-hC6q;}c}fu- l½@.9޷qmL.ŊcsT`~=B&`LR#'^[#Ư mDryejy6 ?D&y1^OFbL*P,4-E:. VҤ0O+pD ux6(qW1\T݊{~ ( 57|?^!/gr-{zw( _`uBC|!0 /gI8bS07L.0?@u"wo [G-81te~9Y:RåN]a0^hMuh\޹-eiEf 0X)M޵}-a@=x3^sLi:0gf^[}/K c^@Qp`wrs#|mj*,^li1M5a@ZH"hDOWS'q,B:zRG _8ԦE`),٧kO?) r@/p(B:L= J\\&/VW\q@nH΄&a| sȏT%`|Ǣ^wg[: a؃N ۖ53+obv%(Q8 м_<[T ߞY[ TJfK@*ƎzW c!j*{C[ #c28Vbsq`hQnS3~c{=#r*ԋH< /+pwxЫp/A{s@6%d[@ʬ]+X!vy$fV(}_Ei p[zp _P/㮷O`5ս6 ͞0iƧ$x ew1=ݫhcqHEO~C@%y{>) D} tṞSu?z R{PQ[gq>p>`pnZ7k9JIwXB{]cq&| 1;Ḁ]S&Zn?Ox?I!{=Xm`=/y}jnpxq: uEa$t#o|4{S ,9^3+|ٞK`*U]_mj8Oc?8:/=Ӷw4[-$2>ṱj>`l^|hwz gۀJy솃j'Ϛtrx~ri_PV#d72; ˁگź_$pI>` =c8(_s@ ju3\1z:̬wfle ԫG4SF袃( } IJ#ڎ^]Dy-v ŇەPP0 ` ~zR.jE6mtDIvW1*+_N(PTEjk_0] #b<%T}̨tڀ v3 dm)]xxY~u\z6TZ117v8##Z_x箊\Jܨ~TKa2 vބ5Ʃ8)P+Ӛ&Ա8b*(fk"2<2x3Udbu4,{hf` ֱ߶(.i[a%al)|έ`;e|KRZoa qԅ潴Ey]*̹eb u.F߁u֯|Ƨ,6T.} v\| (e-dDK=PGj$?}K4U~{@d+M@{w_ӯY5h^6-,w]P%B6C :l1tk }p/JJyJg%@ێҤ6@J>/S:@;KS|π;ڛ9K(JרhAS>/v{XnQr֯q@2z3h@*0'%6/SH)2Z$-3SXf5  W0\w#049eWӄe`wq </`-I[1pGR-n$) 2zqN@B/pyb l{=)f/N!aM|@<<2c8O/X=![J8.(n)I# l.I>?a`ms @x^_^2Pll}|r7 -BE߶P2+ ad({o`%Aӆ6y[ %I@"gl0/Zry'l̟&Tjc<^٨guUa5@p#@G?_x/0 Dyݠo8.4pRp% ȓ]ʂrJ2tk/Ֆ`pW E@5y# fxAܠz (MZa@ &7+u/pۊ&:HeŀI3 b+P3+?S规C 6]bwwS9J <;jRʈ$1~N~ |I6,W)BI}=,[}F'hJX%!Ow̬NsA۝3~?csRa0>ur@] < $ m1N}L/!@RH/Un^o6c: Xc~L~wU o9-* 䘥1Ĺט^\KLT<@꺟yŁ*ÑzӁ|q/1f@oߋEZ Jשӹ 4UǒVEbˀn8/TRocܑx6s|yR!-҇Zb1׵($Ï(6;UabT}fټ[-s4z~a;$DVIƽ d>}^ܡI2JlIca$c,6l_Ck=&pOnW1o"|ֻe~VıX;պ_,ۇ!ՄֆWxb$ !j!fgE+Od1d#>z >^ֻ: 1t>^> FnHo\@j}? L{(3/hL{f۷&B3ag6٫Y]*W#!)mVS}F)2z#e:2.4(t 4/fHT"@[1ǷG:bt7&Ą(FFOJDž, "}ޕZNwILC_2[<Y,iX(po䡍RF~p Se#O")iCq'T:ߑ⥜YOLŐLK*YK A&E4~8gyDf)2yY !$0HYwn|ٰ iȮ~"5ƍxXΔwCrOEIsdzdDŸk HDOZ/ha9Οmclust/data/EuroUnemployment.rda0000644000175000017500000000120514157117042016643 0ustar nileshnilesh]SNA^.h  Rbb\Z.VB $shL3_/S?e3r$iY֨5>ǂhƭD`o|%l{\h `*x=nOw6>ug{G_"d,~龉!?=K>nrc?E)4lSC_5|跊9. N-~黇ְgNrKԯ`'ȓn'XA/KYIetE>bl#'=r[?\u!nNx/7y.sA}W&aBp$"Ǜqqk)UW\֤j &Ls4'l-mp֘r^Ȕy?s mq1U'w+6"lUe 78pZAb"L6d:l%f&y%/C` H;Q_[R;jB_?7uO'~ k%pYz}CW^!{ѯaW9 :[GԪ  /U<܈emclust/NAMESPACE0000644000175000017500000001416614150150606013140 0ustar nileshnileshuseDynLib(mclust) # useDynLib(mclust, .registration = TRUE) # Export all names # exportPattern(".") # Import all packages listed as Imports or Depends import("stats", "utils", "graphics", "grDevices") # export(.mclust) export(mclust.options, emControl) export(em, emE, emEEE, emEEI, emEEV, emEII, emEVI, emV, emVEI, emVEV, emVII, emVVI, emVVV, emEVV, emVEE, emEVE, emVVE, emX, emXII, emXXI, emXXX) export(me, meE, meEEE, meEEI, meEEV, meEII, meEVI, meV, meVEI, meVEV, meVII, meVVI, meVVV, meEVV, meVEE, meEVE, meVVE, meX, meXII, meXXI, meXXX) export(sim, simE, simEEE, simEEI, simEEV, simEII, simEVI, simV, simVEI, simVEV, simVII, simVVI, simVVV, simEVV, simVEE, simEVE, simVVE) export(estep, estepE, estepEEE, estepEEI, estepEEV, estepEII, estepEVI, estepV, estepVEI, estepVEV, estepVII, estepVVI, estepVVV, estepEVV, estepVEE, estepEVE, estepVVE) export(mstep, mstepE, mstepEEE, mstepEEI, mstepEEV, mstepEII, mstepEVI, mstepV, mstepVEI, mstepVEV, mstepVII, mstepVVI, mstepVVV, mstepEVV, mstepVEE, mstepEVE, mstepVVE) export(mvn, mvnX, mvnXII, mvnXXI, mvnXXX) export(cdens, cdensE, cdensEEE, cdensEEI, cdensEEV, cdensEII, cdensEVI, cdensV, cdensVEI, cdensVEV, cdensVII, cdensVVI, cdensVVV, cdensEVV, cdensVEE, cdensEVE, cdensVVE, cdensX, cdensXII, cdensXXI, cdensXXX) export(bic, pickBIC, mclustBICupdate) export(mclustLoglik, print.mclustLoglik) S3method("print", "mclustLoglik") export(nVarParams, nMclustParams) export(map, unmap, partconv, partuniq, errorBars) export(mclustModel, mclustModelNames, checkModelName, mclustVariance) export(decomp2sigma, sigma2decomp) export(imputeData, imputePairs, matchCluster, majorityVote) export(mapClass, classError, adjustedRandIndex, BrierScore) export(mclust1Dplot, mclust2Dplot, mvn2plot, surfacePlot, uncerPlot) export(clPairs, clPairsLegend, coordProj, randProj, randomOrthogonalMatrix) export(priorControl, defaultPrior, hypvol) export(hc, print.hc, plot.hc) S3method("print", "hc") S3method("plot", "hc") export(hcE, hcEEE, hcEII, hcV, hcVII, hcVVV) export(hclass, hcRandomPairs, randomPairs, dupPartition) export(mclustBIC, print.mclustBIC, summary.mclustBIC, print.summary.Mclust, plot.mclustBIC, summaryMclustBIC, summaryMclustBICn) S3method("print", "mclustBIC") S3method("summary", "mclustBIC") S3method("print", "summary.mclustBIC") S3method("plot", "mclustBIC") export(Mclust, print.Mclust, summary.Mclust, print.summary.Mclust, plot.Mclust, predict.Mclust, logLik.Mclust) S3method("print", "Mclust") S3method("summary", "Mclust") S3method("print", "summary.Mclust") S3method("plot", "Mclust") S3method("predict", "Mclust") S3method("logLik", "Mclust") export(densityMclust, plot.densityMclust, dens, predict.densityMclust, cdfMclust, quantileMclust, densityMclust.diagnostic, plotDensityMclust1, plotDensityMclust2, plotDensityMclustd) S3method("plot", "densityMclust") S3method("predict", "densityMclust") export(MclustDA, print.MclustDA, summary.MclustDA, print.summary.MclustDA, plot.MclustDA, predict.MclustDA, cvMclustDA, getParameters.MclustDA, logLik.MclustDA, classPriorProbs) S3method("print", "MclustDA") S3method("summary", "MclustDA") S3method("print", "summary.MclustDA") S3method("plot", "MclustDA") S3method("predict", "MclustDA") S3method("logLik", "MclustDA") export(MclustSSC, print.MclustSSC, summary.MclustSSC, print.summary.MclustSSC, plot.MclustSSC, predict.MclustSSC) S3method("print", "MclustSSC") S3method("summary", "MclustSSC") S3method("print", "summary.MclustSSC") S3method("plot", "MclustSSC") S3method("predict", "MclustSSC") export(MclustDR, print.MclustDR, summary.MclustDR, print.summary.MclustDR, plot.MclustDR, plotEvalues.MclustDR, projpar.MclustDR, predict.MclustDR, predict2D.MclustDR) S3method("print", "MclustDR") S3method("summary", "MclustDR") S3method("print", "summary.MclustDR") S3method("plot", "MclustDR") S3method("predict", "MclustDR") export(MclustDRsubsel, MclustDRsubsel_cluster, MclustDRsubsel_classif, MclustDRsubsel1cycle, MclustDRrecoverdir, print.MclustDRsubsel, summary.MclustDRsubsel) S3method("print", "MclustDRsubsel") S3method("summary", "MclustDRsubsel") export(me.weighted, covw, hdrlevels, dmvnorm) export(icl, mclustICL, print.mclustICL, summary.mclustICL, print.summary.mclustICL, plot.mclustICL) S3method("icl", "Mclust") S3method("icl", "MclustDA") S3method("print", "mclustICL") S3method("summary", "mclustICL") S3method("print", "summary.mclustICL") S3method("plot", "mclustICL") export(mclustBootstrapLRT, print.mclustBootstrapLRT, plot.mclustBootstrapLRT) S3method("print", "mclustBootstrapLRT") S3method("plot", "mclustBootstrapLRT") export(MclustBootstrap, print.MclustBootstrap, summary.MclustBootstrap, print.summary.MclustBootstrap, plot.MclustBootstrap) S3method("print", "MclustBootstrap") S3method("summary", "MclustBootstrap") S3method("print", "summary.MclustBootstrap") S3method("plot", "MclustBootstrap") export(as.Mclust, as.Mclust.default, as.Mclust.densityMclust) S3method("as.Mclust", "default") S3method("as.Mclust", "densityMclust") export(as.densityMclust, as.densityMclust.default, as.densityMclust.Mclust) S3method("as.densityMclust", "default") S3method("as.densityMclust", "Mclust") export(clustCombi, print.clustCombi, summary.clustCombi, print.summary.clustCombi, plot.clustCombi, combiPlot, entPlot, combiTree, combMat, clustCombiOptim) S3method("plot", "clustCombi") S3method("print", "clustCombi") S3method("summary", "clustCombi") S3method("print", "summary.clustCombi") export(gmmhd, print.gmmhd, summary.gmmhd, print.summary.gmmhd, plot.gmmhd, gmmhdClusterCores, gmmhdClassify) S3method("print", "gmmhd") S3method("summary", "gmmhd") S3method("print", "summary.gmmhd") S3method("plot", "gmmhd") export(crimcoords, print.crimcoords, plot.crimcoords) S3method("print", "crimcoords") S3method("plot", "crimcoords") # deprecated functions export(cv.MclustDA, cv1EMtrain, bicEMtrain)