MixtureFitting/0000755000176200001440000000000015014061676013242 5ustar liggesusersMixtureFitting/tests/0000755000176200001440000000000015011046042014367 5ustar liggesusersMixtureFitting/tests/inputs/0000755000176200001440000000000014623643241015725 5ustar liggesusersMixtureFitting/tests/inputs/2-2-cryptand-dihedrals-trimmed.RData0000644000176200001440000000024214623643241024356 0ustar liggesusers r0b```b`cd`b2Y#spJfFjJQbN1  y83)﹦f8"]O)OȽ GH~ïXD3[9N:?g{49w??玓.|s'M~`mm8MixtureFitting/tests/gmm_intersections_001.R0000644000176200001440000000144614623643241020644 0ustar liggesuserslibrary( MixtureFitting ) P = matrix( c( 0.0443473274465592, 0.130138383023397, 0.825514289530041, 2.21235358083254, 2.00352959127296, 2.03247868665627, 0.27455506306116, 0.0275176004414257, 0.106694771531626 ), ncol = 3 ) test1 = all.equal( gmm_intersections( P[c(1,3),] ), c( 1.66800776372449, 2.33295709358655 ) ) test2 = all.equal( gmm_intersections( P[c(2,3),] ), c() ) test3 = all.equal( gmm_intersections( P[c(1,1),] ), NaN ) test4 = all.equal( gmm_intersections( c( 0.5, 0.5, 1, 2, 1, 1 ) ), c( 1.5 ) ) if( test1 != TRUE ) { stop( test1 ) } if( test2 != TRUE ) { stop( test2 ) } if( test3 != TRUE ) { stop( test3 ) } if( test4 != TRUE ) { stop( test4 ) } MixtureFitting/tests/smm_002.R0000644000176200001440000000110315011046042015662 0ustar liggesuserslibrary( MixtureFitting ) load( "outputs/smm_002.RData" ) set.seed( 42 ) x = rt( 100, 10 ) sinit = smm_init_vector( x, 3 ) sfjt = smm_fit_em( x, sinit, polyroot.solution = "jenkins_taub" ) sfnr = smm_fit_em( x, sinit, polyroot.solution = "newton_raphson" ) if( all.equal( sinit, output_init ) != TRUE ) { stop( all.equal( sinit, output_init ) ) } if( all.equal( sfjt$p, output_parameters_jt ) != TRUE ) { stop( all.equal( sfjt$p, output_parameters_jt ) ) } if( all.equal( sfjt$p, output_parameters_nr ) != TRUE ) { stop( all.equal( sfjt$p, output_parameters_nr ) ) } MixtureFitting/tests/smm_001.R0000644000176200001440000000056114623643241015704 0ustar liggesuserslibrary( MixtureFitting ) load( "outputs/smm_001.RData" ) set.seed( 42 ) x = rt( 100, 10 ) sinit = smm_init_vector( x, 1 ) sf = smm_fit_em_CWL04( x, sinit, debug = TRUE ) if( all.equal( sinit, output_init ) != TRUE ) { stop( all.equal( sinit, output_init ) ) } if( all.equal( sf$p, output_parameters ) != TRUE ) { stop( all.equal( sf$p, output_parameters ) ) } MixtureFitting/tests/outputs/0000755000176200001440000000000014623643241016126 5ustar liggesusersMixtureFitting/tests/outputs/smm_002.RData0000644000176200001440000000043314623643241020220 0ustar liggesusers r0b```b`cd`b2Y#s1w~iIAiI|f^f Ȥvo0o 4}IC!m QJC4,@L( K7P@vL*fO) Nh h( ˧|(oJ ͧTъfAh\XZZT̀> yb|O[O{3G]~s9$L+C/EO8C-Ok{/4Pp@DJ ;v'7Q hNhÀZ03HձwpK G Hz# MixtureFitting/tests/bhattacharyya_dist_001.R0000644000176200001440000000043014623643241020752 0ustar liggesuserslibrary( MixtureFitting ) orig1 = 0.125 dist1 = bhattacharyya_dist( 1, 2, 1, 1 ) orig2 = 1.95606150021424 dist2 = bhattacharyya_dist( 0, 0, 1, 0.01 ) if( all.equal( c( orig1, orig2 ), c( dist1, dist2 ) ) != TRUE ) { stop( all.equal( c( orig1, orig2 ), c( dist1, dist2 ) ) ) } MixtureFitting/tests/gmm_fit_em_001.R0000644000176200001440000000207015007355714017212 0ustar liggesuserslibrary( MixtureFitting ) set.seed(42) p = c( 0.5, 0.5, 1.5, 10, 3, 1 ) x = c( rnorm(2000 * p[1], p[3], p[5]), rnorm(2000 * p[2], p[4], p[6]) ) init = gmm_init_vector( x, 2 ) gf = gmm_fit_em( x, init, implementation = "C" ) if( !all( abs( gf$p - c( 0.5, 0.5, 1.5, 10, 3, 1 ) ) < 0.1 ) ) { stop( 1, gf$p - p ) } gf = gmm_fit_em( x, init, implementation = "R" ) if( !all( abs( gf$p - c( 0.5, 0.5, 1.5, 10, 3, 1 ) ) < 0.1 ) ) { stop( 2, gf$p - p ) } gf = gmm_fit_em( x, init, x * 0 + 0.5, implementation = "C" ) if( !all( abs( gf$p - c( 0.5, 0.5, 1.5, 10, 3, 1 ) ) < 0.1 ) ) { stop( 3, gf$p - p ) } gf = gmm_fit_em( x, init, x * 0 + 0.5, implementation = "R" ) if( !all( abs( gf$p - c( 0.5, 0.5, 1.5, 10, 3, 1 ) ) < 0.1 ) ) { stop( 4, gf$p - p ) } init = gmm_init_vector( x, 1 ) gf = gmm_fit_em( x, init, c( numeric(1000)+1, numeric(1000)) ) if( !all( abs( gf$p - c( 1, 1.5, 3 ) ) < 0.1 ) ) { stop( 5, gf$p - p ) } gf = gmm_fit_em( x, init, c( numeric(1000), numeric(1000)+1) ) if( !all( abs( gf$p - c( 1, 10, 1 ) ) < 0.1 ) ) { stop( 6, gf$p - p ) } MixtureFitting/tests/2-2-cryptand-dihedrals.R0000644000176200001440000000120714623643241020605 0ustar liggesusersload( "inputs/2-2-cryptand-dihedrals-trimmed.RData" ) load( "outputs/2-2-cryptand-dihedrals.RData" ) library( MixtureFitting ) init = list() parameters = list() ll = list() for( i in 1:10 ) { vinit = vmm_init_vector( i ) vf = vmm_fit_em( dihedrals, vinit ) init[[i]] = vinit parameters[[i]] = vf$p ll[[i]] = llvmm( dihedrals, vf$p ) } if( all.equal( init, output_init ) != TRUE ) { stop( all.equal( init, output_init ) ) } if( all.equal( parameters, output_parameters ) != TRUE ) { stop( all.equal( parameters, output_parameters ) ) } if( all.equal( ll, output_ll ) != TRUE ) { stop( all.equal( ll, output_ll ) ) } MixtureFitting/tests/gmm_init_vector_quantile_001.R0000644000176200001440000000120515007355714022175 0ustar liggesuserslibrary( MixtureFitting ) set.seed(42) p = c( 0.5, 0.5, 1.5, 10, 3, 1 ) x = c( rnorm(2000 * p[1], p[3], p[5]), rnorm(2000 * p[2], p[4], p[6]) ) init = gmm_init_vector_quantile( x, 2 ) if( !all( abs( init[3:4] - c( 2.7, 9.5 ) ) < 0.1 ) ) { stop( 1, init[3:4] - c( 2.7, 9.5 ) ) } init = gmm_init_vector_quantile( x, 2, c( numeric(1000)+1, numeric(1000) ) ) if( !all( abs( init[3:4] - c( 0.2, 2.7 ) ) < 0.1 ) ) { stop( 2, init[3:4] - c( 0.2, 2.7 ) ) } init = gmm_init_vector_quantile( x, 2, c( numeric(1000), numeric(1000)+1 ) ) if( !all( abs( init[3:4] - c( 9.5, 10.4 ) ) < 0.1 ) ) { stop( 2, init[3:4] - c( 9.5, 10.4 ) ) } MixtureFitting/MD50000644000176200001440000001037515014061676013560 0ustar liggesusers546dffbbde4c43c96f12b0870d89d77c *DESCRIPTION 5cc0f44098274601c24fb89a1c804e11 *NAMESPACE 5fb371b81bf7691f4cb80001a0d97198 *R/MixtureFitting.R b30be43de08fbfaa1a3c352cca46d703 *man/abs_convergence.Rd 1dcdd0451483e4594432f85c89e62f77 *man/bhattacharyya_dist.Rd 25f076ec2b31747b85bec11259f00c5b *man/bic.Rd 08c11b597fc2b15ad8b9d6eb79acba52 *man/cmm_fit_em.Rd 802af28235299ae797ba59ca420c740e *man/cmm_fit_hwhm_spline_deriv.Rd a4a0a3325dfa4d6f306d7e7b9ed80fce *man/cmm_init_vector.Rd 034c98d222bad7ad3e8001ec5732f68b *man/cmm_init_vector_kmeans.Rd 3a0d5f66c03c05f627a270b6821af32d *man/cmm_intersections.Rd 1714e226ffcd7b6a6666e484ba945c10 *man/dcgmm.Rd 49d9c8d5821331dc6cb96ed4df8c7027 *man/dcmm.Rd 3680d68862f4d0e72381c9a8d76ac0d7 *man/dgmm.Rd 7be9b2b4b7dbdc1aef5bc44adb317d10 *man/digamma_approx.Rd 75a5ef4c2208392fe2a19fecbe4aadd9 *man/ds.Rd 3c62757d105b6df2b2d3964ae484bcf8 *man/dsmm.Rd 8b686d85505f1803118d2c9e0f537ef2 *man/dvmm.Rd e2be13b78bd1acd94c0c0dda8074cb00 *man/gmm_fit_em.Rd 00e351628ae8807b7caecb593d89bb5c *man/gmm_fit_hwhm.Rd 60b4c81cbdb3bc1f0742924f1cba988e *man/gmm_fit_hwhm_spline_deriv.Rd 7735302240c32b7e44c106a5e6d6037c *man/gmm_fit_kmeans.Rd e2136f787b05d2dbac7d88cb49448671 *man/gmm_init_vector.Rd 1f1ebb599940776d6a45990c3ebd93a0 *man/gmm_init_vector_kmeans.Rd 37ac219ad68d0450c618c5bc10990163 *man/gmm_init_vector_quantile.Rd 25cf9b3750c7a2288c7151bfc37193d4 *man/gmm_intersections.Rd d7bd5404974c0955849d5c4d294f7c2b *man/gmm_merge_components.Rd acc232216c03adb200f78f1c91bafcd8 *man/gmm_size_probability.Rd 3fe1d6076a3161ce7d0149ca2d2cf8ec *man/gmm_size_probability_nls.Rd 9152db59b7f51313fcfcadae93f736a7 *man/gradient_descent.Rd 615244b87412876a558a1a67e6309595 *man/kldiv.Rd d4f8adda906aa1bc28be99968a4e4e16 *man/kmeans_circular.Rd 3f4c99514a290dc588d631cb251778a4 *man/llcmm.Rd c4886efefeb40e0d1ceb34cf7a7ed569 *man/llgmm.Rd c3e1267ea417fec0e08819a88c61a077 *man/llgmm_conservative.Rd 37bbbef75495384058882a8a6f8330fa *man/llgmm_opposite.Rd 3fae64b42855aace9df00557639c9c4a *man/llsmm.Rd 9d5266bad38b64292ed84742eff5fddf *man/llvmm.Rd c48820f9f6258b3bb88f904d62dc46d0 *man/llvmm_opposite.Rd b7fcc2f51824a8b519b1431bddbf39a4 *man/mk_fit_images.Rd dda0cd5678d438dcceb9196e04369cde *man/plot_circular_hist.Rd 5423032312889707b1fb07b4744d2576 *man/plot_density.Rd 50eeea094178456071efa7a442227b3d *man/polyroot_NR.Rd 264c8d8f62a7cd1fd59e587fe9f83b3c *man/pssd.Rd 1b8b25c198e0a2a62eecdf84bef07b5e *man/pssd_gradient.Rd 0b1bdb2b0ec1d4a60b645f3cc5c59362 *man/ratio_convergence.Rd 29da7069b9bdba455758ba1646982ce2 *man/rcmm.Rd d18050f87bb6bf19ab280b03d57e170d *man/rgmm.Rd ed0e9ace8646f7ffc3cbc9165c7d12da *man/rsimplex_start.Rd 09471db5d99845dc3dd9c53c4c971e8f *man/rvmm.Rd 14da13a4645d972537d2fd49274a3929 *man/s_fit_primitive.Rd 2581e982c20e0815cb3fb8fd34ac16a7 *man/simplex.Rd fb6c2dcc68e6d83a6e3b1b815780c521 *man/smm_fit_em.Rd 47b8b56f3a83e17cef39e6d21b1fe53a *man/smm_fit_em_APK10.Rd f78235bac93b9152a26485fbb60116b1 *man/smm_fit_em_CWL04.Rd 6af7d5a4b8a53d52e7f07c74288975c3 *man/smm_fit_em_GNL08.Rd 13399b4eadc84ac0a21a6c336bd34f38 *man/smm_init_vector.Rd 6af9622b416d570e5b086c0f23d264c3 *man/smm_init_vector_kmeans.Rd e82be2493122adef0e671b30ce8853c3 *man/smm_split_component.Rd c24cd5fcca4a7605ada2f00f443778dd *man/ssd.Rd 3f56ffc1794cf97381f5d06d4568892d *man/ssd_gradient.Rd 482a7839f5f03b085da191fcb6efbb3c *man/vmm_fit_em.Rd b7bd6eda0d8c736d1049d6f4785b8b37 *man/vmm_fit_em_by_diff.Rd 2b3d52a09963224fd13025a2c7f5224e *man/vmm_fit_em_by_ll.Rd 76910a76d26e4547ae9af9258e9cc16c *man/vmm_init_vector.Rd 5a1bc6d1a0e9eeaf9914c62664ff4ce8 *man/wmedian.Rd c306cb4a163c885519e60c3ceeb463a0 *src/MixtureFitting.c be4f1b185cdf9940c1ee798a3fa723bd *tests/2-2-cryptand-dihedrals.R 290e6127d3ebe6095be3ccff28e81501 *tests/bhattacharyya_dist_001.R f0c442d6127cb6a73edf95e9159281c7 *tests/gmm_fit_em_001.R 0d38409defdc967fda3ccc4fe2cd80e2 *tests/gmm_init_vector_quantile_001.R f9cf0edae8f62f83c88e5dc027a361d9 *tests/gmm_intersections_001.R 204308c5bdd8db1cda79de7ca59ca4e9 *tests/inputs/2-2-cryptand-dihedrals-trimmed.RData 4940316a15fe72379fca50b792438576 *tests/outputs/2-2-cryptand-dihedrals.RData 1c47f1bfa8adb1cec96e28aadfda0ce0 *tests/outputs/smm_001.RData 150410e0d1b29859b532ede74fb481b7 *tests/outputs/smm_002.RData a694f55f3f5ea9f8e8892e013fce2160 *tests/smm_001.R b19728c81a73a9e458b390763169d65e *tests/smm_002.R MixtureFitting/R/0000755000176200001440000000000015011047135013432 5ustar liggesusersMixtureFitting/R/MixtureFitting.R0000644000176200001440000016342115011047135016546 0ustar liggesusersdgmm <- function( x, p, normalise_proportions = FALSE, restrict_sigmas = FALSE, implementation = "C" ) { if( implementation == "C" ) { if( length( p[is.na(p)] ) > 0 ) { return( rep( NaN, times = length( x ) ) ) } buffer = numeric( length(x) ) ret = .C( "dgmm", as.double(x), as.integer( length(x) ), as.double(p), as.integer( length(p) ), retvec = numeric( length(x) ))$retvec buffer[1:length(x)] <- ret[1:length(x)] return( buffer ) } else { buffer = dgmm_R( x, p, normalise_proportions = normalise_proportions, restrict_sigmas = restrict_sigmas ) } } dvmm <- function( x, p, implementation = "C" ) { if( length( p[is.na(p)] ) > 0 ) { return( rep( NaN, times = length( x ) ) ) } if( implementation == "C" ) { buffer = numeric( length(x) ) ret = .C( "dvmm", as.double(x), as.integer( length(x) ), as.double(p), as.integer( length(p) ), retvec = numeric( length(x) ))$retvec buffer[1:length(x)] <- ret[1:length(x)] return( buffer ) } else { m = length(p)/3 A = p[1:m] mu = p[(m+1):(2*m)] k = p[(2*m+1):(3*m)] sum = 0 for( i in 1:m ) { sum = sum + A[i] * exp( k[i] * cos( deg2rad( x - mu[i] ) ) ) / ( 2 * pi * besselI( k[i], 0 ) ) } return( sum ) } } dcmm <- function( x, p, implementation = "C" ) { if( length( p[is.na(p)] ) > 0 ) { return( rep( NaN, times = length( x ) ) ) } if( implementation == "C" ) { buffer = numeric( length(x) ) ret = .C( "dcmm", as.double(x), as.integer( length(x) ), as.double(p), as.integer( length(p) ), retvec = numeric( length(x) ))$retvec buffer[1:length(x)] <- ret[1:length(x)] return( buffer ) } else { m = length(p)/3 A = p[1:m] c = p[(m+1):(2*m)] s = p[(2*m+1):(3*m)] sum = numeric( length( x ) ) * 0 for( i in 1:m ) { sum = sum + A[i] * dcauchy( x, c[i], s[i] ) } return( sum ) } } dcgmm <- function( x, p ) { P = matrix( p, ncol = 5 ) sum = numeric( length( x ) ) * 0 for( i in 1:nrow( P ) ) { sum = sum + P[i,1] * ( P[i,2] * dcauchy( x, P[i,3], P[i,4] ) + ( 1 - P[i,2] ) * dnorm( x, P[i,3], P[i,5] ) ) } return( sum ) } llgmm <- function( x, p, implementation = "C" ) { if( length( p[is.na(p)] ) > 0 ) { return( NaN ) } if( implementation == "C" ) { ret = .C( "llgmm", as.double(x), as.integer( length(x) ), as.double(p), as.integer( length(p) ), retvec = numeric(1) )$retvec return( ret ) } else { n = length(x) m = length(p)/3 A = p[1:m] mu = p[(m+1):(2*m)] sigma = p[(2*m+1):(3*m)] if( length(p[is.na(p)]) > 0 ) { return( NaN ) } if( length(A[A>=0]) < length(A) || length(sigma[sigma>=0]) < length(sigma) ) { return( -Inf ) } diff = matrix(data = 0, nrow = n, ncol = m ) ref_peak = vector( "numeric", n ) for (i in 1:n) { diff[i,1:m] = ( x[i] - mu[1:m] ) ^ 2 ref_peak[i] = which.min( diff[i,1:m] ) } sum = sum( log( A[ref_peak] / (sqrt(2*pi) * sigma[ref_peak]) ) ) for (i in 1:n) { rp = ref_peak[i] sum = sum - diff[i,rp] / ( 2 * sigma[rp]^2 ) expsum = sum( exp( log( (A[1:m]*sigma[rp])/(A[rp]*sigma[1:m]) ) - diff[i,1:m] / (2*sigma[1:m]^2) + diff[i,rp] / (2*sigma[rp]^2) ) ) sum = sum + log( expsum ) } return( sum ) } } llvmm <- function( x, p, implementation = "C" ) { if( length( p[is.na(p)] ) > 0 ) { return( NaN ) } if( implementation == "C" ) { ret = .C( "llvmm", as.double(x), as.integer( length(x) ), as.double(p), as.integer( length(p) ), retvec = numeric(1) )$retvec return( ret ) } else { n = length(x) m = length(p)/3 A = p[1:m]/sum(p[1:m]) mu = p[(m+1):(2*m)] k = p[(2*m+1):(3*m)] y = vector( "numeric", n ) * 0 for (i in 1:m) { y = y + dvmm( x, c( A[i], mu[i], k[i] ) ) } return( sum( log( y ) ) ) } } llcmm <- function( x, p, implementation = "C" ) { if( length( p[is.na(p)] ) > 0 ) { return( NaN ) } if( implementation == "C" ) { ret = .C( "llcmm", as.double(x), as.integer( length(x) ), as.double(p), as.integer( length(p) ), retvec = numeric(1) )$retvec return( ret ) } else { n = length(x) m = length(p)/3 A = p[1:m]/sum(p[1:m]) c = p[(m+1):(2*m)] s = p[(2*m+1):(3*m)] y = numeric( n ) * 0 for (i in 1:m) { y = y + dcmm( x, c( A[i], c[i], s[i] ) ) } return( sum( log( y ) ) ) } } gmm_fit_em <- function( x, p, w = numeric(), epsilon = c( 0.000001, 0.000001, 0.000001 ), debug = FALSE, implementation = "C", ... ) { if( length(w) != length(x) ) { w = x * 0 + 1 } l = NULL if( implementation == "C" ) { ret = .C( "gmm_fit_em", as.double(x), as.integer( length(x) ), as.double(p), as.integer( length(p) ), as.double(w), as.double( epsilon ), as.integer( debug ), retvec = numeric( length(p) ), steps = integer(1) ) l = list( p = ret$retvec, steps = ret$steps ) } else { l = gmm_fit_em_R( x, p, w, epsilon, ... ) } if( !any( is.na( l$p ) ) ) { N = length(p) / 3 order = order( l$p[(N+1):(2*N)] ) for (i in 0:2) { l$p[(i*N+1):(i*N+N)] = l$p[order+i*N] } } return( l ) } vmm_fit_em <- function( x, p, epsilon = c( 0.000001, 0.000001, 0.000001 ), debug = FALSE, implementation = "C" ) { if( implementation == "C" ) { l = vmm_fit_em_by_diff( x, p, epsilon, debug ) return( l ) } else { l = vmm_fit_em_by_diff_R( x, p, epsilon, debug ) return( l ) } } vmm_fit_em_by_diff <- function( x, p, epsilon = c( 0.000001, 0.000001, 0.000001 ), debug = FALSE, implementation = "C" ) { if( implementation == "C" ) { debugflag = 0 if( debug == TRUE ) { debugflag = 1 } ret = .C( "vmm_fit_em_by_diff", as.double(x), as.integer( length(x) ), as.double(p), as.integer( length(p) ), as.double( epsilon ), as.integer( debug ), retvec = numeric( length(p) ), steps = integer(1) ) l = list( p = ret$retvec, steps = ret$steps ) return( l ) } else { l = vmm_fit_em_by_diff_R( x, p, epsilon, debug ) return( l ) } } vmm_fit_em_by_ll <- function( x, p, epsilon = .Machine$double.eps, debug = FALSE, implementation = "C" ) { if( implementation == "C" ) { debugflag = 0 if( debug == TRUE ) { debugflag = 1 } ret = .C( "vmm_fit_em_by_ll", as.double(x), as.integer( length(x) ), as.double(p), as.integer( length(p) ), as.double( epsilon ), as.integer( debug ), retvec = numeric( length(p) ), steps = integer(1) ) l = list( p = ret$retvec, steps = ret$steps ) return( l ) } else { l = vmm_fit_em_by_ll_R( x, p, epsilon, debug ) } } cmm_fit_em <- function( x, p, epsilon = c( 0.000001, 0.000001, 0.000001 ), iter.cauchy = 20, debug = FALSE, implementation = "C" ) { l = NULL if( implementation == "C" ) { debugflag = 0 if( debug == TRUE ) { debugflag = 1 } ret = .C( "cmm_fit_em", as.double(x), as.integer( length(x) ), as.double(p), as.integer( length(p) ), as.double( epsilon ), as.integer( iter.cauchy ), as.integer( debug ), retvec = numeric( length(p) ), steps = integer(1) ) l = list( p = ret$retvec, steps = ret$steps ) } else { l = cmm_fit_em_R( x, p, epsilon ) } if( !any( is.na( l$p ) ) ) { N = length(p) / 3 order = order( l$p[(N+1):(2*N)] ) for (i in 0:2) { l$p[(i*N+1):(i*N+N)] = l$p[order+i*N] } } return( l ) } gmm_init_vector <- function( x, n, implementation = "C" ) { if( implementation == "C" ) { ret = .C( "gmm_init_vector", as.double(x), as.integer( length(x) ), as.integer(n), retvec = numeric( 3*n ) ) return( ret$retvec ) } else { ret = gmm_init_vector_R( x, n ) return( ret ) } } cmm_init_vector <- function( x, m, implementation = "C" ) { if( implementation == "C" ) { ret = .C( "cmm_init_vector", as.double(x), as.integer( length(x) ), as.integer(m), retvec = numeric( 3*m ) ) return( ret$retvec ) } else { ret = cmm_init_vector_R( x, m ) return( ret ) } } vmm_init_vector <- function( m, implementation = "C" ) { if( implementation == "C" ) { ret = .C( "vmm_init_vector", as.integer(m), retvec = numeric( 3*m ) ) return( ret$retvec ) } else { ret = vmm_init_vector_R( m ) return( ret ) } } polyroot_NR <- function( p, init = 0, epsilon = 1e-6, debug = FALSE, implementation = "C" ) { if( implementation == "C" ) { ret = .C( "polyroot_NR", as.double(p), as.integer( length(p) ), as.double(init), as.double(epsilon), as.integer( debug ), retvec = numeric(1) ) return( ret$retvec ) } else { ret = polyroot_NR_R( p, init, epsilon, debug ) } } #========================================================================= # R counterparts of functions, rewritten in C #========================================================================= # Returns distribution for a Gaussian Mixture Model at point x # p - vector of 3*m parameters, where m is size of mixture in peaks. # p[1:m] -- mixture proportions # p[(m+1):(2*m)] -- means of the peaks # p[(2*m+1):(3*m)] -- dispersions of the peaks dgmm_R <- function( x, p, normalise_proportions = FALSE, restrict_sigmas = FALSE ) { m = length(p)/3 A = p[1:m] mu = p[(m+1):(2*m)] sigma = p[(2*m+1):(3*m)] if( normalise_proportions == TRUE ) { A = A / sum(A) } sum = numeric( length(x) ) for( i in 1:m ) { if( sigma[i] > 0 || restrict_sigmas == FALSE ) { sum = sum + A[i] * dnorm( x, mu[i], sigma[i] ) } } return( sum ) } gmm_fit_em_R <- function( x, p, w = numeric(), epsilon = c( 0.000001, 0.000001, 0.000001 ), collect.history = FALSE, unif.component = FALSE, convergence = abs_convergence ) { m = length(p)/3 A = p[1:m] mu = p[(m+1):(2*m)] sigma = p[(2*m+1):(3*m)] prev_A = rep( Inf, m ) prev_mu = rep( Inf, m ) prev_sigma = rep( Inf, m ) steps = 0 history = list() if( collect.history == TRUE ) { history[[1]] = p } while( steps == 0 || !convergence( c( A, mu, sigma ), c( prev_A, prev_mu, prev_sigma ), epsilon ) ) { prev_A = A prev_mu = mu prev_sigma = sigma q = vector( "numeric", length( x ) ) * 0 for( j in 1:m ) { q = q + A[j] * dnorm( x, mu[j], sigma[j] ) } if( unif.component ) { # Allows additional component with uniform distribution for # the modelling of outliers as suggested in: # Cousineau, D. & Chartier, S. # Outliers detection and treatment: a review # International Journal of Psychological Research, # 2010, 3, 58-67 # https://revistas.usb.edu.co/index.php/IJPR/article/view/844 q = q + ( 1 - sum( A ) ) * dunif( x, min(x), max(x) ) } for( j in 1:m ) { h = w * A[j] * dnorm( x, mu[j], sigma[j] ) / q A[j] = sum( h ) / sum( w ) mu[j] = sum( h * x ) / sum( h ) sigma[j] = sqrt( sum( h * ( x - mu[j] ) ^ 2 ) / sum( h ) ) } steps = steps + 1 if( collect.history == TRUE ) { history[[steps+1]] = c( A, mu, sigma ) } if( length( A[ is.na(A)] ) + length( mu[ is.na(mu) ] ) + length( sigma[is.na(sigma)] ) > 0 ) { break } } p[1:m] = A p[(m+1):(2*m)] = mu p[(2*m+1):(3*m)] = sigma l = list( p = p, steps = steps ) if( collect.history == TRUE ) { l$history = history } return( l ) } vmm_fit_em_by_diff_R <- function( x, p, epsilon = c( 0.000001, 0.000001, 0.000001 ), debug = FALSE ) { m = length(p)/3 A = p[1:m] mu = p[(m+1):(2*m)] k = p[(2*m+1):(3*m)] d_A = c( Inf ) d_mu = c( Inf ) d_k = c( Inf ) steps = 0 while( length( d_A[ d_A > epsilon[1]] ) > 0 || length( d_mu[d_mu > epsilon[2]] ) > 0 || length( d_k[ d_k > epsilon[3]] ) > 0 ) { prev_A = A prev_mu = mu prev_k = k q = vector( "numeric", length( x ) ) * 0 for( j in 1:m ) { q = q + dvmm( x, c( A[j], mu[j], k[j] ) ) } for( j in 1:m ) { h = dvmm( x, c( A[j], mu[j], k[j] ) ) / q A[j] = sum( h ) / length( x ) mu[j] = rad2deg( atan2( sum( sin( deg2rad(x) ) * h ), sum( cos( deg2rad(x) ) * h ) ) ) Rbar = sqrt( sum( sin( deg2rad(x) ) * h )^2 + sum( cos( deg2rad(x) ) * h )^2 ) / sum( h ) k[j] = ( 2 * Rbar - Rbar ^ 3 ) / ( 1 - Rbar ^ 2 ) if( debug == TRUE ) { cat( A[j], " ", mu[j], " ", k[j], " " ) } } if( debug == TRUE ) { cat( "\n" ) } d_A = abs( A - prev_A ) d_mu = abs( mu - prev_mu ) d_k = abs( k - prev_k ) steps = steps + 1 if( length( d_A[ is.na(d_A)] ) + length( d_mu[is.na(d_mu)] ) + length( d_k[ is.na(d_k)] ) > 0 ) { break } } p[1:m] = A p[(m+1):(2*m)] = mu p[(2*m+1):(3*m)] = k return( list( p = p, steps = steps ) ) } vmm_fit_em_by_ll_R <- function( x, p, epsilon = .Machine$double.eps, debug = FALSE ) { m = length(p)/3 A = p[1:m] mu = p[(m+1):(2*m)] k = p[(2*m+1):(3*m)] prev_llog = llvmm( x, p ) d_llog = Inf steps = 0 while( !is.na(d_llog) && d_llog > epsilon ) { q = vector( "numeric", length( x ) ) * 0 for( j in 1:m ) { q = q + dvmm( x, c( A[j], mu[j], k[j] ) ) } for( j in 1:m ) { h = dvmm( x, c( A[j], mu[j], k[j] ) ) / q A[j] = sum( h ) / length( x ) mu[j] = rad2deg( atan2( sum( sin( deg2rad(x) ) * h ), sum( cos( deg2rad(x) ) * h ) ) ) Rbar = sqrt( sum( sin( deg2rad(x) ) * h )^2 + sum( cos( deg2rad(x) ) * h )^2 ) / sum( h ) k[j] = ( 2 * Rbar - Rbar ^ 3 ) / ( 1 - Rbar ^ 2 ) if( debug == TRUE ) { cat( A[j], " ", mu[j], " ", k[j], " " ) } } if( debug == TRUE ) { cat( "\n" ) } llog = llvmm( x, c( A, mu, k ) ) d_llog = abs( llog - prev_llog ) prev_llog = llog steps = steps + 1 if( length( A[ is.na(A) ] ) + length( mu[is.na(mu)] ) + length( k[ is.na(k) ] ) > 0 ) { break } } p[1:m] = A p[(m+1):(2*m)] = mu p[(2*m+1):(3*m)] = k return( list( p = p, steps = steps ) ) } # Estimate Cauchy Mixture parameters using expectation maximisation. # Estimation of individual component's parameters is implemented according # to Ferenc Nahy, Parameter Estimation of the Cauchy Distribution in # Information Theory Approach, Journal of Universal Computer Science, 2006. cmm_fit_em_R <- function( x, p, epsilon = c( 0.000001, 0.000001, 0.000001 ), collect.history = FALSE, unif.component = FALSE, convergence = abs_convergence ) { m = length(p)/3 A = p[1:m] c = p[(m+1):(2*m)] s = p[(2*m+1):(3*m)] prev_A = rep( Inf, m ) prev_c = rep( Inf, m ) prev_s = rep( Inf, m ) steps = 0 history = list() if( collect.history == TRUE ) { history[[1]] = p } while( steps == 0 || !convergence( c( A, c, s ), c( prev_A, prev_c, prev_s ), epsilon ) ) { prev_A = A prev_c = c prev_s = s q = numeric( length( x ) ) * 0 for( j in 1:m ) { q = q + A[j] * dcauchy( x, c[j], s[j] ) } if( unif.component ) { # Allows additional component with uniform distribution for # the modelling of outliers as suggested in: # Cousineau, D. & Chartier, S. # Outliers detection and treatment: a review # International Journal of Psychological Research, # 2010, 3, 58-67 # https://revistas.usb.edu.co/index.php/IJPR/article/view/844 q = q + ( 1 - sum( A ) ) * dunif( x, min(x), max(x) ) } for( j in 1:m ) { h = A[j] * dcauchy( x, c[j], s[j] ) / q A[j] = sum( h ) / length( x ) cauchy_steps = 0 prev_cj = Inf prev_sj = Inf while( cauchy_steps == 0 || !convergence( c( c[j], s[j] ), c( prev_cj, prev_sj ), epsilon[2:3] ) ) { prev_cj = c[j] prev_sj = s[j] e0k = sum( h / (1+((x-c[j])/s[j])^2) ) / sum( h ) e1k = sum( h * ((x-c[j])/s[j])/(1+((x-c[j])/s[j])^2) ) / sum( h ) c[j] = c[j] + s[j] * e1k / e0k s[j] = s[j] * sqrt( 1/e0k - 1 ) cauchy_steps = cauchy_steps + 1 } } steps = steps + 1 if( collect.history == TRUE ) { history[[steps+1]] = c( A, c, s ) } if( length( A[is.na(A)] ) + length( c[is.na(c)] ) + length( s[is.na(s)] ) > 0 ) { break } } p[1:m] = A p[(m+1):(2*m)] = c p[(2*m+1):(3*m)] = s l = list( p = p, steps = steps ) if( collect.history == TRUE ) { l$history = history } return( l ) } gmm_init_vector_R <- function( x, m ) { start = numeric( 3 * m ) start[1:m] = 1/m start[(m+1):(2*m)] = min(x) + (1:m)*(max(x)-min(x))/(m+1) start[(2*m+1):(3*m)] = (max(x)-min(x))/(m+1)/6 return( start ) } vmm_init_vector_R <- function( m ) { start = numeric( 3 * m ) start[1:m] = 1/m start[(m+1):(2*m)] = 360 / m * seq( 0, m-1, 1 ) start[(2*m+1):(3*m)] = (m/(12*180))^2 return( start ) } cmm_init_vector_R <- function( x, m ) { start = numeric( 3 * m ) start[1:m] = 1/m start[(m+1):(2*m)] = min(x) + (1:m)*(max(x)-min(x))/(m+1) start[(2*m+1):(3*m)] = 1 return( start ) } # Finds one real polynomial root using Newton--Raphson method, implemented # according to Wikipedia: # https://en.wikipedia.org/w/index.php?title=Newton%27s_method&oldid=710342140 polyroot_NR_R <- function( p, init = 0, epsilon = 1e-6, debug = FALSE ) { x = init x_prev = Inf steps = 0 n = length(p) d = p[2:n] * (1:(n-1)) while( abs( x - x_prev ) > epsilon ) { x_prev = x powers = x^(0:(n-1)) x = x - sum(p * powers) / sum(d * powers[1:(n-1)]) steps = steps + 1 } if( debug ) { cat( "Convergence reached after", steps, "iteration(s)\n" ) } return( x ) } #========================================================================= # Functions, that are not yet rewritten in C #========================================================================= ds <- function( x, c, s, ni ) { return( dt( ( x - c ) / s, ni ) / s ) } dsmm <- function( x, p ) { m = length( p ) / 4 A = p[1:m] c = p[(m+1):(2*m)] s = p[(2*m+1):(3*m)] ni = p[(3*m+1):(4*m)] ret = numeric( length( x ) ) for( i in 1:m ) { ret = ret + A[i] * ds( x, c[i], s[i], ni[i] ) } return( ret ) } # Generates random sample of size n from Gaussian Mixture Model. # GMM is parametrised using p vector, as described in dgmm. rgmm <- function(n, p) { m = length(p)/3 A = p[1:m]/sum(p[1:m]) mu = p[(m+1):(2*m)] sigma = p[(2*m+1):(3*m)] x = vector( "numeric", n ) prev = 0 for( i in 1:m ) { quant = round(n*A[i]) last = prev + quant if( i == m ) { last = n } x[(prev+1):(last)] = rnorm( length((prev+1):(last)), mu[i], sigma[i] ) prev = last } return( x ) } # Generates random sample of size n from von Mises Mixture Model. # vMM is parametrised using p vector. # Accepts and returns angles in degrees. # Simulation of random sampling is implemented according to # Best & Fisher, Efficient Simulation of the von Mises Distribution, # Journal of the RSS, Series C, 1979, 28, 152-157. rvmm <- function(n, p) { m = length(p)/3 A = p[1:m] mu = deg2rad( p[(m+1):(2*m)] ) k = p[(2*m+1):(3*m)] x = vector( "numeric", n ) prev = 0 for( i in 1:m ) { quant = 0 if( i == m ) { quant = n - prev } else { quant = round(n*A[i]) } last = prev + quant tau = 1 + sqrt( 1 + 4 * k[i]^2 ) rho = ( tau - sqrt( 2 * tau ) ) / ( 2 * k[i] ) r = ( 1 + rho^2 ) / ( 2 * rho ) c = vector( "numeric", quant ) * NaN f = vector( "numeric", quant ) * NaN while( length( c[is.na(c)] ) > 0 ) { na_count = length( c[is.na(c)] ) z = cos( pi * runif( na_count, 0, 1 ) ) f[is.na(c)] = ( 1 + r * z ) / ( r + z ) cn = k[i] * ( r - f[is.na(c)] ) cn[ log( cn / runif( na_count, 0, 1 ) ) + 1 - cn < 0 ] = NaN c[is.na(c)] = cn } x[(prev+1):(last)] = sign( runif( quant, 0, 1 ) - 0.5 ) * acos( f ) + mu[i] prev = last } return( rad2deg( x ) ) } rcmm <- function(n, p) { m = length(p)/3 A = p[1:m]/sum(p[1:m]) mu = p[(m+1):(2*m)] sigma = p[(2*m+1):(3*m)] x = numeric( n ) prev = 0 for( i in 1:m ) { quant = round(n*A[i]) last = prev + quant if( i == m ) { last = n } x[(prev+1):(last)] = rcauchy( quant, mu[i], sigma[i] ) prev = last } return( x ) } llgmm_conservative <- function(x, p) { n = length(x) m = length(p)/3 A = p[1:m] mu = p[(m+1):(2*m)] sigma = p[(2*m+1):(3*m)] sum = 0 for (i in 1:n) { sum = sum + log( sum( A / ( sqrt(2*pi) * sigma ) * exp( -(mu-x[i])^2/(2*sigma^2) ) ) ) } return( sum ) } llsmm <- function( x, p ) { n = length(x) m = length(p)/4 A = p[1:m]/sum(p[1:m]) c = p[(m+1):(2*m)] s = p[(2*m+1):(3*m)] ni = p[(3*m+1):(4*m)] y = numeric( n ) * 0 for (i in 1:m) { y = y + dsmm( x, c( A[i], c[i], s[i], ni[i] ) ) } return( sum( log( y ) ) ) } llgmm_opposite <- function( x, p ) { return( -llgmm( x, p ) ) } llvmm_opposite <- function( x, p ) { return( -llvmm( x, p ) ) } # Calculate Bayesian Information Criterion (BIC) for any type of mixture # model. Log-likelihood function has to be provided. bic <- function( x, p, llf ) { return( -2 * llf( x, p ) + (length( p ) - 1) * log( length( x ) ) ) } # Calculate posterior probability of given number of peaks in # Gaussian Mixture Model gmm_size_probability <- function(x, n, method = "SANN") { p = vector( "numeric", n * 3 ) for (i in 1:n) { p[i] = 1 p[n+i] = min(x)+(max(x)-min(x))/n*i-(max(x)-min(x))/n/2 p[2*n+i] = 1 } f = optim( p, llgmm_opposite, hessian = TRUE, method = method, x = x ) return( f ) } # Fit Gaussian Mixture Model to binned data (histogram). # Lower bounds for mixture proportions and dispersions are fixed in order # to avoid getting NaNs. gmm_size_probability_nls <- function(x, n, bins = 100, trace = FALSE) { lower = min( x ) upper = max( x ) p = vector( "numeric", n * 3 ) l = vector( "numeric", n * 3 ) binsize = (upper-lower)/bins for (i in 1:n) { p[i] = 1/n p[n+i] = lower + (upper-lower)/(n+1)*i p[2*n+i] = 1 l[i] = 0.001 l[n+i] = -Inf l[2*n+i] = 0.1 } y = vector( "numeric", bins ) for (i in 1:bins) { y[i] = length(x[x >= lower+(i-1)*binsize & x < lower+i*binsize])/length(x) } leastsq = nls( y ~ dgmm( lower + seq( 0, bins - 1, 1 ) * binsize, theta, normalise_proportions = FALSE), start = list( theta = p ), trace = trace, control = list( warnOnly = TRUE ), algorithm = "port", lower = l ) par = coef( leastsq ) prob = factorial( n ) * ( 4*pi )^n * exp( -sum(resid(leastsq)^2)/2 ) / (((lower-upper) * 1 * max(par[(2*n+1):(3*n)]))^n * sqrt(det(solve(vcov(leastsq))))) return( list( p = prob, par = par, residual = sum(resid(leastsq)^2), hessian = solve(vcov(leastsq)), vcov = vcov(leastsq) ) ) } gmm_fit_kmeans <- function(x, n) { p = vector( "numeric", 3*n ) km = kmeans( x, n ) for( i in 1:n ) { p[i] = length( x[km$cluster==i] )/length(x) p[n+i] = mean( x[km$cluster==i] ) p[2*n+i] = sqrt(var( x[km$cluster==1] )) } return( p ) } # Calculate intersection of two normal distributions by finding roots # of quadratic equation. gmm_intersections <- function( p ) { P = matrix( p, ncol = 3 ) a = P[2,3]^2 - P[1,3]^2 b = -2 * ( P[1,2] * P[2,3]^2 - P[2,2] * P[1,3]^2 ) c = ( P[1,2]^2 * P[2,3]^2 - P[2,2]^2 * P[1,3]^2 - 2 * (P[1,3]*P[2,3])^2 * log( P[1,1]*P[2,3] / P[2,1]/P[1,3] ) ) D = b^2 - 4 * a * c if( a == 0 && b == 0 && c == 0 ) { # Components are identical return( NaN ) } else if( a == 0 ) { # Not a quadratic equation return( -c / b ) } else if( D < 0 ) { # Discriminant is less than zero, no intersections return( c() ) } else if( D == 0 ) { # Single root return( -b / ( 2 * a ) ) } else { # Two roots return( ( -b + c( 1, -1 ) * sqrt( D ) ) / ( 2 * a ) ) } } cmm_intersections <- function( p ) { P = matrix( p, ncol = 3 ) a = P[2,1] * P[2,3] - P[1,1] * P[1,3] b = 2 * ( P[1,1] * P[2,2] * P[1,3] - P[2,1] * P[1,2] * P[2,3] ) c = ( P[2,1] * P[1,2]^2 * P[2,3] - P[1,1] * P[2,2]^2 * P[1,3] + P[2,1] * P[1,3]^2 * P[2,3] - P[1,1] * P[2,3]^2 * P[1,3] ) D = b^2 - 4 * a * c if( a == 0 && b == 0 && c == 0 ) { # Components are identical return( NaN ) } else if( a == 0 ) { # Not a quadratic equation return( -c / b ) } else if( D < 0 ) { # Discriminant is less than zero, no intersections return( c() ) } else if( D == 0 ) { # Single root return( -b / ( 2 * a ) ) } else { # Two roots return( ( -b + c( 1, -1 ) * sqrt( D ) ) / ( 2 * a ) ) } } ssd_gradient <- function(x, y, p) { n = length(x) m = length(p)/3 A = p[1:m] mu = p[(m+1):(2*m)] sigma = p[(2*m+1):(3*m)] grad = vector( "numeric", length(p) ) for( i in 1:m ) { grad[i] = 0 grad[i+m] = 0 grad[i+2*m] = 0 for( k in 1:n ) { grad[i] = grad[i] - 2 * exp( -( x[k] - mu[i] )^2 / (2 * sigma[i]^2) ) / ( (2*pi)^0.5 * sigma[i] ) * ( y[k] - sum( A / ( (2*pi)^0.5 * sigma ) * exp( -( x[k] - mu )^2 / ( 2 * sigma^2 ) ) ) ) grad[i+m] = grad[i+m] - 2 * A[i] * ( x[k] - mu[i] ) * exp( -( x[k] - mu[i] )^2 / ( 2 * sigma[i]^2 ) ) * ( y[k] - sum( A / ( (2*pi)^0.5 * sigma ) * exp( -( x[k] - mu )^2 / ( 2 * sigma^2 ) ) ) ) grad[i+2*m] = grad[i+2*m] + 2 * A[i] * exp( -( x[k] - mu[i] )^2 / ( 2 * sigma[i]^2 ) ) / ( sigma[i]^2 * (2*pi)^0.5 ) * ( 1 - ( x[k] - mu[i] )^2 / sigma[i]^2 ) * ( y[k] - sum( A / ( (2*pi)^0.5 * sigma ) * exp( -( x[k] - mu )^2 / ( 2 * sigma^2 ) ) ) ) } } return( grad ) } pssd_gradient <- function(x, y, p) { grad = ssd_gradient( x, y, p ) m = length(p)/3 A = p[1:m] mu = p[(m+1):(2*m)] sigma = p[(2*m+1):(3*m)] for( i in 1:m ) { grad[i] = grad[i] + 2 * ( sum( A ) - 1 ) if( A[i] <= 0 ) { grad[i] = grad[i] - exp( -sum( A[A<=0] ) ) } if( mu[i] < min(x) ) { grad[i+m] = grad[i+m] - 2 * (min(x) - mu[i]) } if( mu[i] > max(x) ) { grad[i+m] = grad[i+m] - 2 * (max(x) - mu[i]) } if( sigma[i] <= 0 ) { grad[i+2*m] = grad[i+2*m] - exp( -sum( sigma[sigma<=0] ) ) } } return( grad ) } gradient_descent <- function( gradfn, start, gamma = 0.1, ..., epsilon = 0.01 ) { a = start while( TRUE ) { grad = gradfn( a, ... ) prev_a = a a = a - gamma * grad if( sqrt(sum((a-prev_a)^2)) <= epsilon ) { break } } return( a ) } ssd <- function( x, y, p ) { return( sum( ( y - dgmm( x, p ) )^2 ) ) } pssd <- function( x, y, p ) { m = length(p)/3 A = p[1:m] mu = p[(m+1):(2*m)] sigma = p[(2*m+1):(3*m)] sum = ssd( x, y, c( A[sigma>0], mu[sigma>0], sigma[sigma>0] ) ) sum = sum + sum( exp( -sigma[sigma<=0] ) - 1 ) sum = sum + sum( exp( -A[A<=0] ) - 1 ) sum = sum + ( sum( A ) - 1 )^2 sum = sum + sum( ( mu[mumax(x)] - max(x) )^2 ) return( sum ) } simplex <- function( fn, start, ..., epsilon = 0.000001, alpha = 1, gamma = 2, rho = 0.5, delta = 0.5, trace = FALSE ) { A = start while( TRUE ) { v = vector( "numeric", length( A ) ) for (i in 1:length( A )) { v[i] = fn( A[[i]], ... ) } A = A[sort( v, index.return = TRUE )$ix] v = sort( v ) maxdiff = 0 for (i in 2:length( A )) { diff = sqrt( sum( (as.vector(A[[i]])-as.vector(A[[1]]))^2 ) ) if( diff > maxdiff ) { maxdiff = diff } } if( maxdiff / max( 1, sqrt( sum(as.vector(A[[1]])^2) ) ) <= epsilon ) { break } x0 = vector( "numeric", length( A[[1]] ) ) * 0 # gravity center for (i in 1:(length( A )-1)) { x0 = x0 + A[[i]] / (length( A )-1) } xr = x0 + alpha * ( x0 - A[[length( A )]] ) # reflected point fr = fn( xr, ... ) if( fr < v[1] ) { # reflected point is the best point so far xe = x0 + gamma * ( x0 - A[[length( A )]] ) # expansion fe = fn( xe, ... ) if( fe < fr ) { A[[length( A )]] = xe if( trace == TRUE ) { cat( "Expanding towards ", xe, " (", fe, ")\n" ) } } else { A[[length( A )]] = xr if( trace == TRUE ) { cat( "Reflecting towards ", xr, " (", fr, ")\n" ) } } } else if( fr >= v[length(v)-1] ) { xc = x0 + rho * ( x0 - A[[length( A )]] ) # contraction fc = fn( xc, ... ) if( fc < v[length( A )] ) { A[[length( A )]] = xc if( trace == TRUE ) { cat( "Contracting towards ", xc, " (", fc, ")\n" ) } } else { for (i in 2:length( A )) { # reduction A[[i]] = A[[1]] + delta * ( A[[i]] - A[[1]] ) } if( trace == TRUE ) { cat( "Reducing\n" ) } } } else if( fr < v[length(v)-1] ) { A[[length( A )]] = xr # reflection if( trace == TRUE ) { cat( "Reflecting towards ", xr, " (", fr, ")\n" ) } } else { for (i in 2:length( A )) { # reduction A[[i]] = A[[1]] + delta * ( A[[i]] - A[[1]] ) } if( trace == TRUE ) { cat( "Reducing\n" ) } } } v = vector( "numeric", length( A ) ) for (i in 1:length( A )) { v[i] = fn( A[[i]], ... ) } A = A[sort( v, index.return = TRUE )$ix] v = sort( v ) return( list( best = A[[1]], score = v[1] ) ) } rsimplex_start <- function(seed, n, lower, upper) { set.seed( seed ) l = list() for( i in 1:(length(lower) * n + 1) ) { v = vector( "numeric", length(lower) * n ) * 0 for( j in 1:length(lower) ) { v[((j-1)*n+1):(j*n)] = runif( n, lower[j], upper[j] ) } v[1:n] = v[1:n] / sum( v[1:n] ) l[[i]] = v } return( l ) } gmm_fit_hwhm <- function( x, y, n ) { a = y p = vector( "numeric", 3 * n ) for( i in 1:n ) { maxid = which.max(a) mu = x[maxid] aleft = a[1:maxid] aright = a[maxid:length(a)] xleft = x[1:maxid] xright = x[maxid:length(x)] sigma = min( c( x[maxid] - head(xleft[aleft>a[maxid]/2],1), head(xright[aright 0 & diff[2:length(diff)] < 0] sorted_peak_order = rev( sort( y[peak_positions], index.return = TRUE )$ix ) peak_positions = peak_positions[sorted_peak_order] n = length( peak_positions ) p = vector( "numeric", 3 * n ) for( i in 1:n ) { maxid = peak_positions[i] mu = x[maxid] aleft = a[1:maxid] aright = a[maxid:length(a)] xleft = x[1:maxid] xright = x[maxid:length(x)] sigma = min( c( x[maxid] - head(xleft[aleft>a[maxid]/2],1), head(xright[aright 0 & diff[2:length(diff)] < 0] sorted_peak_order = rev( sort( y[peak_positions], index.return = TRUE )$ix ) peak_positions = peak_positions[sorted_peak_order] n = length( peak_positions ) p = vector( "numeric", 3 * n ) for( i in 1:n ) { maxid = peak_positions[i] mu = x[maxid] aleft = a[1:maxid] aright = a[maxid:length(a)] xleft = x[1:maxid] xright = x[maxid:length(x)] sigma = min( c( x[maxid] - head(xleft[aleft>a[maxid]/2],1), head(xright[aright epsilon[1]] ) > 0 || length( d_c[ d_c > epsilon[2]] ) > 0 || length( d_s[ d_s > epsilon[3]] ) > 0 || length( d_ni[d_ni > epsilon[4]] ) > 0 ) { prev_A = A prev_c = c prev_s = s prev_ni = ni q = dsmm( x, c( A, c, s, ni ) ) for( j in 1:m ) { h = A[j] * ds( x, c[j], s[j], ni[j] ) w = h / q A[j] = sum( w ) / length( x ) ord = order( x ) xo = x[ord] ho = w[ord] / sum( w ) c[j] = wmedian( xo, ho ) + 1e-6 z = log( ( x - c[j] )^2 ) zbar = sum( z * w ) / sum( w ) b = sum( ( z - zbar )^2 * w ) / sum( w ) - trigamma( 0.5 ) ni[j] = ( 1 + sqrt( 1 + 4 * b ) ) / b s[j] = exp( zbar - log( ni[j] ) + digamma( ni[j] / 2 ) - digamma( 0.5 ) ) if( debug == TRUE ) { cat( A[j], " ", c[j], " ", s[j], " ", ni[j], " " ) } } if( debug == TRUE ) { cat( "\n" ) } d_A = abs( A - prev_A ) d_c = abs( c - prev_c ) d_s = abs( s - prev_s ) d_ni = abs( ni - prev_ni ) steps = steps + 1 if( collect.history == TRUE ) { history[[steps+1]] = c( A, c, s, ni ) } if( length( d_A[ is.na(d_A) ] ) + length( d_c[ is.na(d_c) ] ) + length( d_s[ is.na(d_s) ] ) + length( d_ni[is.na(d_ni)] ) > 0 ) { break } } l = list( p = c( A, c, s, ni ), steps = steps ) if( collect.history == TRUE ) { l$history = history } return( l ) } # Coeficients for digamma function approximation, that contains first # eight non-zero members of asymptotic expression for digamma(x). # Taken from Wikipedia (see "Computation and approximation"): # https://en.wikipedia.org/w/index.php?title=Digamma_function&oldid=708779689 digamma_approx_coefs = c( 1/2, 1/12, 0, -1/120, 0, 1/252, 0, -1/240, 0, 1/660, 0, -691/32760, 0, 1/12 ) smm_fit_em_GNL08 <- function( x, p, epsilon = c( 1e-6, 1e-6, 1e-6, 1e-6 ), collect.history = FALSE, debug = FALSE, min.sigma = 1e-256, min.ni = 1e-256, max.df = 1000, max.steps = Inf, polyroot.solution = "jenkins_taub", convergence = abs_convergence, unif.component = FALSE ) { m = length(p)/4 A = p[1:m] c = p[(m+1):(2*m)] s = p[(2*m+1):(3*m)] ni = p[(3*m+1):(4*m)] prev_A = rep( Inf, m ) prev_c = rep( Inf, m ) prev_s = rep( Inf, m ) prev_ni = rep( Inf, m ) steps = 0 history = list() if( collect.history ) { history[[1]] = p } while( steps < max.steps && steps == 0 || !convergence( c( A, c, s, ni ), c( prev_A, prev_c, prev_s, prev_ni ), epsilon ) ) { prev_A = A prev_c = c prev_s = s prev_ni = ni q = dsmm( x, c( A, c, s, ni ) ) if( unif.component ) { # Allows additional component with uniform distribution for # the modelling of outliers as suggested in: # Cousineau, D. & Chartier, S. # Outliers detection and treatment: a review # International Journal of Psychological Research, # 2010, 3, 58-67 # https://revistas.usb.edu.co/index.php/IJPR/article/view/844 q = q + ( 1 - sum( A ) ) * dunif( x, min(x), max(x) ) } for( j in 1:m ) { h = A[j] * ds( x, c[j], s[j], ni[j] ) z = h / q u = ( ni[j] + 1 ) / ( ni[j] + ( ( x - c[j] ) / s[j] )^2 ) A[j] = sum( z ) / length( x ) c[j] = sum( z * u * x ) / sum( z * u ) s[j] = sqrt( sum( z * u * ( x - c[j] )^2 ) / sum( z ) ) # Solution of Eqn. 17 is implemented via digamma function # approximation using asymptotic expression of digamma(x). # Jenkins-Taub (implemented in R's polyroot() function) or # Newton-Raphson (implemented here) algorithm is used to find # the roots of the polynomial. For Jenkins-Taub, a positive # real root (should be single) is chosen as a solution. cl = length( digamma_approx_coefs ) p = sum( rep( 2 / ( ni[j] + 1 ), cl )^(1:cl) * digamma_approx_coefs ) polynome = c( sum( z * ( log( u ) - u ) ) / sum( z ) - p + 1, digamma_approx_coefs ) roots = switch( polyroot.solution, jenkins_taub = polyroot( polynome ), newton_raphson = polyroot_NR( polynome, init = 2/ni[j] ), NaN ) ni[j] = 2 / switch( polyroot.solution, jenkins_taub = Re(roots[abs(Im(roots)) < 1e-10 & Re(roots) > 1e-10]), newton_raphson = roots, NaN ) if( ni[j] > max.df ) { ni[j] = max.df } if( debug ) { cat( A[j], " ", c[j], " ", s[j], " ", ni[j], " " ) } } if( debug ) { cat( "\n" ) } steps = steps + 1 if( collect.history ) { history[[steps+1]] = c( A, c, s, ni ) } if( length( A[ is.na(A) ] ) + length( c[ is.na(c) ] ) + length( s[ is.na(s) ] ) + length( ni[is.na(ni)] ) + length( s[s <= min.sigma] ) + length( ni[ni <= min.ni] ) > 0 ) { A = A * NaN c = c * NaN s = s * NaN ni = ni * NaN break } } l = list( p = c( A, c, s, ni ), steps = steps ) if( collect.history ) { l$history = history } return( l ) } # Greedy EM algorithm for Student's t-distribution mixture fitting. # Implemented according to: # Chen, S.; Wang, H. & Luo, B. # Greedy EM Algorithm for Robust T-Mixture Modeling # Third International Conference on Image and Graphics (ICIG'04), # Institute of Electrical & Electronics Engineers (IEEE), 2004, 548--551 smm_fit_em_CWL04 <- function( x, p, collect.history = FALSE, debug = FALSE, ... ) { bic_prev = Inf prev_p = p m = length(p) / 4 run = TRUE history = list() while( run ) { if( debug ) { cat( "Starting EM with", m, "components\n" ) } fit = smm_fit_em( x, p, ... ) p = fit$p bic_now = bic( x, p, llsmm ) if( debug ) { cat( "Achieving fit with BIC =", bic_now, "\n" ) } if( bic_prev > bic_now ) { bic_prev = bic_now kldivs = numeric( m ) for( i in 1:m ) { kldivs[i] = kldiv( x, p, i ) } split = which.max( kldivs ) if( debug ) { cat( "Splitting component", split, "\n" ) } if( collect.history ) { history[[m]] = p } s = smm_split_component( p[0:3*m+split] ) prev_p = p p = c( p[0*m+sort( c( 1:m, split ) )], p[1*m+sort( c( 1:m, split ) )], p[2*m+sort( c( 1:m, split ) )], p[3*m+sort( c( 1:m, split ) )] ) m = m + 1 p[0*m+split+0:1] = s[1:2] p[1*m+split+0:1] = s[3:4] p[2*m+split+0:1] = s[5:6] p[3*m+split+0:1] = s[7:8] } else { run = FALSE p = prev_p if( debug ) { cat( "Stopping on convergence criterion\n" ) } } } l = list( p = p ) if( collect.history ) { l$history = history } return( l ) } # Fits the distribution of observations with t-distribution (Student's # distribution) mixture model. Implemented according to the Batch # Approximation Algorithm, as given in Fig. 2 in: # Aeschliman, C.; Park, J. & Kak, A. C. A # Novel Parameter Estimation Algorithm for the Multivariate t-Distribution # and Its Application to Computer Vision # European Conference on Computer Vision 2010, 2010 # https://engineering.purdue.edu/RVL/Publications/Aeschliman2010ANovel.pdf s_fit_primitive <- function( x ) { xbar = median( x ) z = log( ( x - xbar )^2 ) zbar = sum( z ) / length( x ) b = sum( ( z - zbar )^2 ) / length( x ) - trigamma( 0.5 ) ni = ( 1 + sqrt( 1 + 4 * b ) ) / b alpha = exp( zbar - log( ni ) + digamma( ni / 2 ) - digamma( 0.5 ) ) return( c( xbar, alpha, ni ) ) } mk_fit_images <- function( h, l, prefix = "img_" ) { maxstrlen = ceiling( log( length( l ) ) / log( 10 ) ) for( i in 1:length( l ) ) { fname = paste( prefix, sprintf( paste( "%0", maxstrlen, "d", sep = "" ), i ), ".png", sep = "" ) png( filename = fname ) plot( h ) lines( h$mids, sum( h$counts ) * dgmm( h$mids, l[[i]] )/sum( h$density ), col = "red", lwd = 2 ) dev.off() } } gmm_init_vector_kmeans <- function( x, m ) { start = numeric( 3 * m ) if( min(x) == max(x) ) { start[1:m] = 1/m start[(m+1):(2*m)] = min(x) + (1:m)*(max(x)-min(x))/(m+1) start[(2*m+1):(3*m)] = (max(x)-min(x))/(m+1)/6 } else { k = kmeans( x, m ) start[1:m] = k$size / length( x ) start[(m+1):(2*m)] = k$centers start[(2*m+1):(3*m)] = sqrt( k$withinss / k$size ) } return( start ) } gmm_init_vector_quantile <- function( x, m, w = numeric() ) { if( length(x) != length(w) ) { w = x * 0 + 1 } w = w[order(x)] x = x[order(x)] start = numeric( 3 * m ) start[1:m] = 1/m n = 1 wsum = 0 for( i in 1:length(x) ) { wsum = wsum + w[i] if( wsum > n * sum(w)/(m+1) ) { start[m+n] = x[i] n = n + 1 } } start[(2*m+1):(3*m)] = sqrt( sum( (x-mean(x))^2 ) / length(x) ) return( start ) } cmm_init_vector_kmeans <- function( x, m, iter.cauchy = 20 ) { start = numeric( 3 * m ) if( min(x) == max(x) ) { start[1:m] = 1/m start[(m+1):(2*m)] = min(x) + (1:m)*(max(x)-min(x))/(m+1) start[(2*m+1):(3*m)] = 1 } else { k = kmeans( x, m ) start[1:m] = k$size / length( x ) start[(m+1):(2*m)] = 0 start[(2*m+1):(3*m)] = 1 for( n in 1:m ) { for( i in 1:iter.cauchy ) { u = (x[k$cluster == n] - start[m+n]) / start[2*m+n] e0k = sum( 1 / (1 + u^2) ) / k$size[n] e1k = sum( u / (1 + u^2 ) ) / k$size[n] start[m+n] = start[m+n] + start[2*m+n] * e1k / e0k start[2*m+n] = start[2*m+n] * sqrt( 1/e0k - 1 ) } } } return( start ) } smm_init_vector <- function( x, n ) { start = numeric( 4 * n ) start[1:n] = 1/n start[(n+1):(2*n)] = min(x) + (1:n)*(max(x)-min(x))/(n+1) start[(2*n+1):(3*n)] = 1 start[(3*n+1):(4*n)] = 1 return( start ) } smm_init_vector_kmeans <- function( x, m ) { start = numeric( 4 * m ) if( min(x) == max(x) ) { start[1:m] = 1/m start[(m+1):(2*m)] = min(x) + (1:m)*(max(x)-min(x))/(m+1) start[(2*m+1):(3*m)] = 1 start[(3*m+1):(4*m)] = 1 } else { k = kmeans( x, m ) start[1:m] = k$size / length( x ) for( i in 1:m ) { p = s_fit_primitive( x[k$cluster==i] ) start[1*m+i] = p[1] start[2*m+i] = p[2] start[3*m+i] = p[3] } } return( start ) } gmm_merge_components <- function( x, p, i, j ) { P = matrix( p, ncol = 3 ) A = P[i,1] + P[j,1] # Performing an iteration of EM to find new mean and sd q = vector( "numeric", length( x ) ) * 0 for( k in 1:nrow( P ) ) { q = q + P[k,1] * dnorm( x, P[k,2], P[k,3] ) } h = ( P[i,1] * dnorm( x, P[i,2], P[i,3] ) + P[j,1] * dnorm( x, P[j,2], P[j,3] ) ) / q mu = sum( x * h ) / sum( h ) sigma = sqrt( sum( h * ( x - mu ) ^ 2 ) / sum( h ) ) P[i,] = c( A, mu, sigma ) return( as.vector( P[setdiff( 1:nrow( P ), j ),] ) ) } # Splits a component of Student's t-distribution mixture. Implemented # according to Eqns. 30--36 of: # Chen, S.-B. & Luo, B. # Robust t-mixture modelling with SMEM algorithm # Proceedings of 2004 International Conference on Machine Learning and # Cybernetics (IEEE Cat. No.04EX826), # Institute of Electrical & Electronics Engineers (IEEE), 2004, 6, 3689--3694 smm_split_component <- function( p, alpha = 0.5, beta = 0.5, u = 0.5 ) { A = c( alpha, 1 - alpha ) * p[1] c = p[2] + c( -sqrt( A[2]/A[1] ), sqrt( A[1]/A[2] ) ) * u * sqrt( p[3] ) s = p[1] * p[3] * (1 - (p[4] - 2) * u^2 / p[4]) * c( beta, 1 - beta ) / A return( c( A, c, s, p[4], p[4] ) ) } plot_circular_hist <- function( x, breaks = 72, ball = 0.5, ... ) { xx = numeric( breaks * 3 + 1 ) yy = numeric( breaks * 3 + 1 ) * 0 xx[(1:breaks)*3] = 1:breaks * 2*pi / breaks for( i in 1:breaks ) { xx[((i-1)*3+1):((i-1)*3+2)] = (i-1) * 2*pi / breaks yy[((i-1)*3+2):(i*3)] = length( x[x >= 360 / breaks * (i-1) && x < 360 / breaks * i] ) } yy = (yy / max(yy)) * (1-ball) + ball plot( yy * cos( xx ), yy * sin( xx ), type = "l", asp = 1, ann = FALSE, axes = FALSE, ... ) lines( ball * cos( seq( 0, 2*pi, pi/180 ) ), ball * sin( seq( 0, 2*pi, pi/180 ) ) ) } deg2rad <- function( x ) { return( x * pi / 180 ) } rad2deg <- function( x ) { return( x * 180 / pi ) } kmeans_circular <- function( x, centers, iter.max = 10 ) { centers = sort( deg2rad( centers ) ) n = length( centers ) x = deg2rad( x ) for( i in 1:iter.max ) { cluster = numeric(n) * 0 for( j in 2:(n-1) ) { cluster[x >= (centers[j] + centers[j-1])/2 & x < (centers[j+1] + centers[j])/2] = j } midpoint = (centers[n] - 2*pi + centers[1])/2 if( midpoint < 0 ) { cluster[x < (centers[1] + centers[2])/2] = 1 cluster[x >= midpoint + 2*pi] = 1 cluster[x < midpoint + 2*pi & x >= (centers[n-1] + centers[n])/2] = n x[x >= midpoint + 2*pi] = x[x >= midpoint + 2*pi] - 2*pi } else { cluster[x >= (centers[n-1] + centers[n])/2] = n cluster[x < midpoint] = n cluster[x < (centers[1] + centers[2])/2 && x >= midpoint] = 1 x[x < midpoint] = x[x < midpoint] + 2*pi } for( j in 1:n ) { centers[j] = sum( x[cluster == j] ) / length( cluster[cluster == j] ) } centers[centers<0] = centers[centers<0] + 2*pi centers[centers>2*pi] = centers[centers>2*pi] - 2*pi x[x < 0] = x[x < 0] + 2*pi x[x > 2*pi] = x[x > 2*pi] - 2*pi centers = sort( centers ) } return( rad2deg( centers ) ) } # Weighted median function, implemented according to Wikipedia: # https://en.wikipedia.org/w/index.php?title=Weighted_median&oldid=690896947 wmedian <- function( x, w, start = 1, end = length( x ) ) { # base case for single element if( start == end ) { return( x[start] ) } # base case for two elements # make sure we return lower median if( end - start == 1 ) { if( w[start] >= w[end] ) { return( x[start] ) } else { return( x[end] ) } } # partition around center pivot q = round( ( start + end ) / 2 ) w_left = sum( w[start:(q-1)] ) w_right = sum( w[(q+1):end] ) if( w_left < 0.5 && w_right < 0.5 ) { return( x[q] ) } if( w_left > w_right ) { w[q] = w[q] + w_right return( wmedian( x, w, start, q ) ) } else { w[q] = w[q] + w_left return( wmedian( x, w, q, end ) ) } } digamma_approx <- function( x ) { cl = length( digamma_approx_coefs ) ret = numeric( length( x ) ) for( i in 1:length(x) ) { ret[i] = log( x[i] ) - sum( rep( 1/x[i], cl )^(1:cl) * digamma_approx_coefs ) } return( ret ) } # Kullback--Leibler divergence, using Dirac's delta function, implemented # according to: # Chen, S.; Wang, H. & Luo, B. # Greedy EM Algorithm for Robust T-Mixture Modeling # Third International Conference on Image and Graphics (ICIG'04), # Institute of Electrical & Electronics Engineers (IEEE), 2004, 548-551 kldiv <- function( x, p, k ) { m = length( p ) / 4 A = p[k] c = p[m+k] s = p[2*m+k] ni = p[3*m+k] z = A * ds( x, c, s, ni ) / dsmm( x, p ) kld = 0 for( i in unique(x) ) { pk = z[x==i] / sum( z ) fk = ds( i, c, s, ni ) kld = kld + pk * log( pk / fk ) } return( kld ) } bhattacharyya_dist <- function( mu1, mu2, sigma1, sigma2 ) { return( log( sum( c( sigma1, sigma2 )^2 / c( sigma2, sigma1 )^2, 2 ) / 4 ) / 4 + ( mu1 - mu2 )^2 / ( 4 * ( sigma1^2 + sigma2^2 ) ) ) } abs_convergence <- function( p_now, p_prev, epsilon = 1e-6 ) { if( length( epsilon ) > 1 && length( epsilon ) < length( p_now ) ) { n = length( p_now ) / length( epsilon ) epsilon_now = numeric( 0 ) for( i in length( epsilon ) ) { epsilon_now = c( epsilon_now, rep( epsilon[i], n ) ) } } has_converged = all( abs( p_now - p_prev ) <= epsilon ) if( is.na( has_converged ) ) { has_converged = TRUE } return( has_converged ) } ratio_convergence <- function( p_now, p_prev, epsilon = 1e-6 ) { if( length( epsilon ) > 1 && length( epsilon ) < length( p_now ) ) { n = length( p_now ) / length( epsilon ) epsilon_now = numeric( 0 ) for( i in length( epsilon ) ) { epsilon_now = c( epsilon_now, rep( epsilon[i], n ) ) } } has_converged = all( abs( p_now - p_prev ) / p_prev <= epsilon ) if( is.na( has_converged ) ) { has_converged = TRUE } return( has_converged ) } plot_density <- function( x, model, density_f, width, height, cuts = 400, main = "", filename = NULL, obs_good = c(), obs_bad = c(), scale_density = FALSE ) { png( filename, width = width, height = height ) h = hist( x, cuts, main = main, xlim = c( min( c( x, obs_bad ) ), max( c( x, obs_bad ) ) ) ) xmids = seq( min( c( x, obs_bad ) ), max( c( x, obs_bad ) ), h$mids[2] - h$mids[1] ) density = do.call( density_f, list( xmids, model ) ) if( scale_density == TRUE ) { density = density / max( density ) * max( h$counts ) } else { density = length(x) / sum( h$density ) * density } lines( xmids, density, lwd = 2, col = "green" ) if( length( obs_good ) > 0 ) { rug( obs_good, lwd = 2, col = "green" ) } if( length( obs_bad ) > 0 ) { rug( obs_bad, lwd = 2, col = "red" ) } dev.off() } MixtureFitting/src/0000755000176200001440000000000015013041375014022 5ustar liggesusersMixtureFitting/src/MixtureFitting.c0000644000176200001440000003760615010650317017163 0ustar liggesusers#include #include #include #include #include double bessi0( double x ) { return bessel_i( x, 0, 1 ); } double deg2rad( double x ) { return x * M_PI / 180; } double rad2deg( double x ) { return x * 180 / M_PI; } void dgmm( double *x, int *xlength, double *p, int *plength, double *ret ) { int m = *plength / 3; double mu[m]; double factor[m]; double divisor[m]; double sqrtdblpi = sqrt( 2 * M_PI ); for (int i = 0; i < m; i++) { double A = p[i]; mu[i] = p[m+i]; double sigma = p[2*m+i]; factor[i] = A / (sigma * sqrtdblpi); divisor[i] = 2 * sigma * sigma; } for (int i = 0; i < *xlength; i++) { ret[i] = 0.0; for (int j = 0; j < m; j++) { double diff = x[i] - mu[j]; ret[i] = ret[i] + factor[j] * exp( -(diff * diff) / divisor[j] ); } } } void llgmm( double *x, int *xlength, double *p, int *plength, double *ret ) { double * dgmms = calloc( *xlength, sizeof( double ) ); if (!dgmms) error( "cannot allocate memory" ); dgmm( x, xlength, p, plength, dgmms ); *ret = 0.0; for (int i = 0; i < *xlength; i++) *ret = *ret + log( dgmms[i] ); free( dgmms ); } void gmm_fit_em( double *x, int *xlength, double *p, int *plength, double *w, double *epsilon, int *debug, double *ret, int *steps ) { int m = *plength / 3; double A[m]; double mu[m]; double sigma[m]; for (int i = 0; i < m; i++) { A[i] = p[i]; mu[i] = p[m+i]; sigma[i] = p[2*m+i]; } double wsum = 0; for (int i = 0; i < *xlength; i++) wsum += w[i]; double sqrtdblpi = sqrt( 2 * M_PI ); double * q = calloc( *xlength, sizeof( double ) ); if (!q) error( "cannot allocate memory" ); int run = 1; *steps = 0; while (run == 1) { int is_nan = 0; run = 0; for (int i = 0; i < *xlength; i++) { q[i] = 0.0; for (int j = 0; j < m; j++) { double diff = x[i] - mu[j]; q[i] = q[i] + A[j] / (sigma[j] * sqrtdblpi) * exp( -(diff * diff) / (2 * sigma[j] * sigma[j]) ); } } for (int j = 0; j < m; j++) { double sumh = 0.0; double sumhprod = 0.0; double factor = A[j] / (sigma[j] * sqrtdblpi); double twosqsigma = 2 * sigma[j] * sigma[j]; for (int i = 0; i < *xlength; i++) { double diff = x[i] - mu[j]; double sqdiff = diff * diff; double weight = w[i] * factor * exp( -sqdiff / twosqsigma ) / q[i]; sumh = sumh + weight; sumhprod = sumhprod + weight * x[i]; } double prev_A = A[j]; A[j] = sumh / wsum; double prev_mu = mu[j]; mu[j] = sumhprod / sumh; double sumhdiff = 0.0; for (int i = 0; i < *xlength; i++) { double diff = x[i] - mu[j]; double sqdiff = diff * diff; double weight = w[i] * factor * exp( -sqdiff / twosqsigma ) / q[i]; sumhdiff = sumhdiff + weight * sqdiff; } double prev_sigma = sigma[j]; sigma[j] = sqrt( sumhdiff / sumh ); if (*debug > 0) Rprintf( "%f %f %f ", A[j], mu[j], sigma[j] ); if (fabs( A[j] - prev_A ) > epsilon[0] || fabs( mu[j] - prev_mu ) > epsilon[1] || fabs( sigma[j] - prev_sigma ) > epsilon[2]) run = 1; if (isnan(sigma[j])) is_nan = 1; } if (is_nan == 1) run = 0; if (*debug > 0) Rprintf( "\n" ); *steps = *steps + 1; } for (int i = 0; i < m; i++) { ret[i] = A[i]; ret[m+i] = mu[i]; ret[2*m+i] = sigma[i]; } free( q ); } void gmm_init_vector( double *x, int *xlength, int *m, double *ret ) { double min = x[0]; double max = x[0]; for (int i = 1; i < *xlength; i++) { if (x[i] < min) min = x[i]; if (x[i] > max) max = x[i]; } for (int i = 0; i < *m; i++) { ret[i] = 1.0/(*m); ret[*m+i] = min + (i+1)*(max-min)/(*m+1); ret[2*(*m)+i] = (max-min)/(*m+1)/6; } } int main( int argc, char *argv[], char *env[] ) { return( 0 ); } void dcmm( double *x, int *xlength, double *p, int *plength, double *ret ) { int m = *plength / 3; for (int i = 0; i < *xlength; i++) { ret[i] = 0.0; for (int j = 0; j < m; j++) { double normdiff = ( x[i] - p[j+m] ) / p[j+2*m]; ret[i] = ret[i] + p[j] / ( M_PI * p[j+2*m] * ( 1 + normdiff*normdiff ) ); } } } void llcmm( double *x, int *xlength, double *p, int *plength, double *ret ) { double * dcmms = calloc( *xlength, sizeof( double ) ); if (!dcmms) error( "cannot allocate memory" ); dcmm( x, xlength, p, plength, dcmms ); *ret = 0.0; for (int i = 0; i < *xlength; i++) *ret = *ret + log( dcmms[i] ); free( dcmms ); } void cmm_fit_em( double *x, int *xlength, double *p, int *plength, double *epsilon, int *itercauchy, int *debug, double *ret, int *steps ) { int m = *plength / 3; double A[m]; double c[m]; double s[m]; for (int i = 0; i < m; i++) { A[i] = p[i]; c[i] = p[m+i]; s[i] = p[2*m+i]; } double * q = calloc( *xlength, sizeof( double ) ); if (!q) error( "cannot allocate memory" ); double * h = calloc( *xlength, sizeof( double ) ); if (!h) error( "cannot allocate memory" ); int run = 1; *steps = 0; while (run == 1) { int is_nan = 0; run = 0; for (int i = 0; i < *xlength; i++) { q[i] = 0.0; for (int j = 0; j < m; j++) { double diff = x[i] - c[j]; q[i] = q[i] + A[j] * s[j] / ( M_PI * ( diff*diff + s[j]*s[j] ) ); } } for (int j = 0; j < m; j++) { double sumh = 0.0; double factor = A[j] * s[j] / M_PI; double sqs = s[j] * s[j]; for (int i = 0; i < *xlength; i++) { double diff = x[i] - c[j]; h[i] = factor / ( diff*diff + sqs ) / q[i]; sumh = sumh + h[i]; } double prev_A = A[j]; A[j] = sumh / *xlength; double prev_c = c[j]; double prev_s = s[j]; int is_converged = 0; int k = 0; while (is_converged == 0 && k < *itercauchy) { double hdiv = 0.0; double hprod = 0.0; for (int i = 0; i < *xlength; i++) { double chi = (x[i] - c[j]) / s[j]; double sqchi = chi * chi; hdiv = hdiv + h[i] / (1+sqchi); hprod = hprod + h[i] * chi/(1+sqchi); } double e0k = hdiv / sumh; double e1k = hprod / sumh; c[j] = c[j] + s[j] * e1k / e0k; s[j] = s[j] * sqrt( 1 / e0k - 1 ); if (fabs( c[j] - prev_c ) < 1e-6 && fabs( s[j] - prev_s ) < 1e-6) is_converged = 1; k++; } if (*debug > 0) Rprintf( "%f %f %f ", A[j], c[j], s[j] ); if (fabs( A[j] - prev_A ) > epsilon[0] || fabs( c[j] - prev_c ) > epsilon[1] || fabs( s[j] - prev_s ) > epsilon[2]) run = 1; if (isnan(s[j])) is_nan = 1; } if (is_nan == 1) run = 0; if (*debug > 0) Rprintf( "\n" ); *steps = *steps + 1; } for (int i = 0; i < m; i++) { ret[i] = A[i]; ret[m+i] = c[i]; ret[2*m+i] = s[i]; } free( q ); free( h ); } void cmm_init_vector( double *x, int *xlength, int *m, double *ret ) { double min = x[0]; double max = x[0]; for (int i = 1; i < *xlength; i++) { if (x[i] < min) min = x[i]; if (x[i] > max) max = x[i]; } for (int i = 0; i < *m; i++) { ret[i] = 1.0/(*m); ret[*m+i] = min + (i+1)*(max-min)/(*m+1); ret[2*(*m)+i] = 1; } } void dvmm( double *x, int *xlength, double *p, int *plength, double *ret ) { int m = *plength / 3; double denom[m]; for (int j = 0; j < m; j++) denom[j] = 2 * M_PI * bessi0( p[2*m+j] ); for (int i = 0; i < *xlength; i++) { ret[i] = 0.0; for (int j = 0; j < m; j++) { double diffcos = cos( deg2rad( x[i] - p[m+j] ) ); ret[i] = ret[i] + p[j] * exp( p[2*m+j] * diffcos ) / denom[j]; } } } void llvmm( double *x, int *xlength, double *p, int *plength, double *ret ) { double * dvmms = calloc( *xlength, sizeof( double ) ); if (!dvmms) error( "cannot allocate memory" ); dvmm( x, xlength, p, plength, dvmms ); *ret = 0.0; for (int i = 0; i < *xlength; i++) *ret = *ret + log( dvmms[i] ); free( dvmms ); } void vmm_fit_em_by_diff( double *x, int *xlength, double *p, int *plength, double *epsilon, int *debug, double *ret, int *steps ) { int m = *plength / 3; double A[m]; double mu[m]; double k[m]; double denom[m]; for (int i = 0; i < m; i++) { A[i] = p[i]; mu[i] = p[m+i]; k[i] = p[2*m+i]; denom[i] = 2.0 * M_PI * bessi0( k[i] ); } double * q = calloc( *xlength, sizeof( double ) ); if (!q) error( "cannot allocate memory" ); double * h = calloc( *xlength, sizeof( double ) ); if (!h) error( "cannot allocate memory" ); int run = 1; *steps = 0; while (run == 1) { int is_nan = 0; run = 0; for (int i = 0; i < *xlength; i++) { q[i] = 0.0; for (int j = 0; j < m; j++) { double diffcos = cos( deg2rad( x[i] - mu[j] ) ); q[i] = q[i] + A[j] * exp( k[j] * diffcos ) / denom[j]; } } for (int j = 0; j < m; j++) { double sumh = 0.0; double sumsin = 0.0; double sumcos = 0.0; for (int i = 0; i < *xlength; i++) { double diffcos = cos( deg2rad( x[i] - mu[j] ) ); h[i] = A[j] * exp( k[j] * diffcos ) / ( denom[j] * q[i] ); sumh = sumh + h[i]; sumsin = sumsin + sin( deg2rad(x[i]) ) * h[i]; sumcos = sumcos + cos( deg2rad(x[i]) ) * h[i]; } double prev_A = A[j]; A[j] = sumh / *xlength; double prev_mu = mu[j]; mu[j] = rad2deg( atan2( sumsin, sumcos ) ); double Rbar = sqrt( sumsin*sumsin + sumcos*sumcos ) / sumh; double prev_k = k[j]; k[j] = ( 2.0 * Rbar - Rbar*Rbar*Rbar ) / ( 1.0 - Rbar*Rbar ); denom[j] = 2.0 * M_PI * bessi0( k[j] ); if (*debug > 0) Rprintf( "%f %f %f ", A[j], mu[j], k[j] ); if (fabs( A[j] - prev_A ) > epsilon[0] || fabs( mu[j] - prev_mu ) > epsilon[1] || fabs( k[j] - prev_k ) > epsilon[2]) run = 1; if (isnan(k[j])) is_nan = 1; } if (is_nan == 1) run = 0; if (*debug > 0) Rprintf( "\n" ); *steps = *steps + 1; } for (int i = 0; i < m; i++) { ret[i] = A[i]; ret[m+i] = mu[i]; ret[2*m+i] = k[i]; } free( q ); free( h ); } void vmm_fit_em_by_ll( double *x, int *xlength, double *p, int *plength, double *epsilon, int *debug, double *ret, int *steps ) { int m = *plength / 3; double A[m]; double mu[m]; double k[m]; double denom[m]; double prev_llog; llvmm( x, xlength, p, plength, &prev_llog ); for (int i = 0; i < m; i++) { A[i] = p[i]; mu[i] = p[m+i]; k[i] = p[2*m+i]; denom[i] = 2.0 * M_PI * bessi0( k[i] ); } double * q = calloc( *xlength, sizeof( double ) ); if (!q) error( "cannot allocate memory" ); double * h = calloc( *xlength, sizeof( double ) ); if (!h) error( "cannot allocate memory" ); int run = 1; *steps = 0; while (run == 1) { int is_nan = 0; for (int i = 0; i < *xlength; i++) { q[i] = 0.0; for (int j = 0; j < m; j++) { double diffcos = cos( deg2rad( x[i] - mu[j] ) ); q[i] = q[i] + A[j] * exp( k[j] * diffcos ) / denom[j]; } } double tp[3*m]; for (int j = 0; j < m; j++) { double sumh = 0.0; double sumsin = 0.0; double sumcos = 0.0; for (int i = 0; i < *xlength; i++) { double diffcos = cos( deg2rad( x[i] - mu[j] ) ); h[i] = A[j] * exp( k[j] * diffcos ) / ( denom[j] * q[i] ); sumh = sumh + h[i]; sumsin = sumsin + sin( deg2rad(x[i]) ) * h[i]; sumcos = sumcos + cos( deg2rad(x[i]) ) * h[i]; } tp[j] = sumh / *xlength; tp[m+j] = rad2deg( atan2( sumsin, sumcos ) ); double Rbar = sqrt( sumsin*sumsin + sumcos*sumcos ) / sumh; tp[2*m+j] = ( 2.0 * Rbar - Rbar*Rbar*Rbar ) / ( 1.0 - Rbar*Rbar ); denom[j] = 2.0 * M_PI * bessi0( tp[2*m+j] ); if (*debug > 0) Rprintf( "%f %f %f ", tp[j], tp[m+j], tp[2*m+j] ); if (isnan( tp[2*m+j] )) is_nan = 1; } double llog; llvmm( x, xlength, tp, plength, &llog ); if (is_nan == 1 || llog - prev_llog < *epsilon) { run = 0; } else { for (int j = 0; j < m; j++) { A[j] = tp[j]; mu[j] = tp[m+j]; k[j] = tp[2*m+j]; } } prev_llog = llog; if (*debug > 0) Rprintf( "\n" ); *steps = *steps + 1; } for (int i = 0; i < m; i++) { ret[i] = A[i]; ret[m+i] = mu[i]; ret[2*m+i] = k[i]; } free( q ); free( h ); } void vmm_init_vector( int *m, double *ret ) { for (int i = 0; i < *m; i++) { ret[i] = 1.0/(*m); ret[*m+i] = 360/(*m) * i; ret[2*(*m)+i] = (((double)*m)/(12*180))*(((double)*m)/(12*180)); } } void polyroot_NR( double *p, int *plength, double *init, double *epsilon, int *debug, double *ret ) { double x = *init; int steps = 0; double d[*plength-1]; for (int i = 0; i < *plength-1; i++) d[i] = p[i+1] * (i+1); int run = 1; while (run == 1) { double powers[*plength]; powers[0] = 1; for (int i = 1; i < *plength; i++) powers[i] = powers[i-1] * x; double numerator = 0.0; double denominator = 0.0; for (int i = 0; i < *plength; i++) { numerator = numerator + p[i] * powers[i]; if (i < *plength-1) denominator = denominator + d[i] * powers[i]; } double diff = numerator / denominator; x = x - diff; steps++; if (fabs( diff ) < *epsilon) run = 0; } if (*debug > 0) Rprintf( "Convergence reached after %u iteration(s)\n", steps ); *ret = x; } MixtureFitting/NAMESPACE0000644000176200001440000000427315011041673014457 0ustar liggesusers#========================================================================= # Global functions or variables #========================================================================= importFrom("grDevices", "dev.off", "png") importFrom("graphics", "hist", "lines", "plot", "rug") importFrom("stats", "coef", "dcauchy", "dnorm", "dt", "dunif", "kmeans", "median", "nls", "optim", "rcauchy", "resid", "rnorm", "runif", "var", "vcov") importFrom("utils", "head") #========================================================================= # MixtureFitting #========================================================================= useDynLib(MixtureFitting) export(dgmm) export(dvmm) export(dcmm) export(llgmm) export(llvmm) export(llcmm) export(gmm_fit_em) export(vmm_fit_em) export(vmm_fit_em_by_diff) export(vmm_fit_em_by_ll) export(cmm_fit_em) export(gmm_init_vector) export(gmm_init_vector_quantile) export(vmm_init_vector) export(cmm_init_vector) export(polyroot_NR) #========================================================================= # Functions in R only #========================================================================= export(ds) export(dsmm) export(rgmm) export(rvmm) export(rcmm) export(llgmm_conservative) export(llsmm) export(llgmm_opposite) export(llvmm_opposite) export(bic) export(gmm_size_probability) export(gmm_size_probability_nls) export(gmm_fit_kmeans) export(gmm_intersections) export(cmm_intersections) export(ssd_gradient) export(pssd_gradient) export(gradient_descent) export(ssd) export(pssd) export(simplex) export(rsimplex_start) export(gmm_fit_hwhm) export(gmm_fit_hwhm_spline_deriv) export(cmm_fit_hwhm_spline_deriv) export(smm_fit_em) export(smm_fit_em_APK10) export(smm_fit_em_GNL08) export(smm_fit_em_CWL04) export(s_fit_primitive) export(mk_fit_images) export(gmm_init_vector_kmeans) export(gmm_init_vector_quantile) export(cmm_init_vector_kmeans) export(smm_init_vector) export(smm_init_vector_kmeans) export(gmm_merge_components) export(smm_split_component) export(plot_circular_hist) export(kmeans_circular) export(wmedian) export(digamma_approx) export(kldiv) export(bhattacharyya_dist) export(abs_convergence) export(ratio_convergence) export(plot_density) MixtureFitting/man/0000755000176200001440000000000015011047162014004 5ustar liggesusersMixtureFitting/man/rsimplex_start.Rd0000644000176200001440000000101715011040013017336 0ustar liggesusers\name{rsimplex_start} \alias{rsimplex_start} \docType{methods} \title{Nelder-Mead's Simplex Method for Function Minimization.} \description{ Generate initial simplices for simplex(). } \usage{ rsimplex_start( seed, n, lower, upper ) } \arguments{ \item{seed}{seed for random number generator} \item{n}{number of simplices} \item{lower}{vector with lower bounds of each dimension} \item{upper}{vector with upper bounds of each dimension} } \value{A list with \emph{n} simplices.} \author{Andrius Merkys} MixtureFitting/man/abs_convergence.Rd0000644000176200001440000000100515010317261017411 0ustar liggesusers\name{abs_convergence} \alias{abs_convergence} \docType{methods} \title{Absolute Convergence Check.} \description{ Compare two values to tell whether an optimization process has converged. } \usage{ abs_convergence( p_now, p_prev, epsilon = 1e-6 ) } \arguments{ \item{p_now}{function value of \emph{i}-th iteration.} \item{p_prev}{function value of \emph{i-1}-th iteration.} \item{epsilon}{convergence criterion} } \value{TRUE if deemed to have converged, FALSE otherwise} \author{Andrius Merkys} MixtureFitting/man/gmm_init_vector_kmeans.Rd0000644000176200001440000000162115010645030021013 0ustar liggesusers\name{gmm_init_vector_kmeans} \alias{gmm_init_vector_kmeans} \docType{methods} \title{Estimate Gaussian Mixture parameters using Expectation Maximization.} \description{ Estimate an initialization vector for Gaussian mixture fitting using k-means. R implementation of k-means in kmeans() is used to find data point assignment to clusters. } \usage{ gmm_init_vector_kmeans( x, m ) } \arguments{ \item{x}{data vector} \item{m}{number of mixture components} } \value{ Parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, sigma1, sigma2, ..., sigma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the location of \emph{i}-th component, sigma\emph{i} is the scale of \emph{i}-th component. } \author{Andrius Merkys} MixtureFitting/man/llcmm.Rd0000644000176200001440000000156414623643241015415 0ustar liggesusers\name{llcmm} \alias{llcmm} \docType{methods} \title{Log-likelihood for Cauchy Mixture} \description{ Calculates log-likelihood for a given data vector using a Cauchy mixture distribution. } \usage{ llcmm( x, p, implementation = "C" ) } \arguments{ \item{x}{data vector} \item{p}{ parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, gamma1, gamma2, ..., gamma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component and gamma\emph{i} is the Cauchy scale of \emph{i}-th component. } \item{implementation}{ flag to switch between C (default) and R implementations. } } \value{log-likelihood} \author{Andrius Merkys} MixtureFitting/man/pssd_gradient.Rd0000644000176200001440000000147515010636163017134 0ustar liggesusers\name{pssd_gradient} \alias{pssd_gradient} \docType{methods} \title{Penalized Sum of Squared Differences Using Gaussian Mixture Distribution} \description{ Gradient (derivative) function of pssd(). } \usage{ pssd_gradient( x, y, p ) } \arguments{ \item{x}{data vector} \item{y}{response vector} \item{p}{ parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, sigma1, sigma2, ..., sigma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the location of \emph{i}-th component, sigma\emph{i} is the scale of \emph{i}-th component. } } \value{Gradient values measured at \emph{x}.} \author{Andrius Merkys} MixtureFitting/man/digamma_approx.Rd0000644000176200001440000000113014623643241017266 0ustar liggesusers\name{digamma_approx} \alias{digamma_approx} \docType{methods} \title{Calculate Approximate Value of The Digamma Function.} \description{ Calculates approximate value of the digamma function using first eight non-zero members of asymptotic expression for digamma(x). Implemented according to Wikipedia. } \usage{ digamma_approx( x ) } \arguments{ \item{x}{data vector} } \value{ Digamma function value. } \references{ Users of Wikipedia. Digamma function. \url{https://en.wikipedia.org/w/index.php?title=Digamma_function&oldid=708779689} } \author{Andrius Merkys} MixtureFitting/man/ds.Rd0000644000176200001440000000064214623651112014707 0ustar liggesusers\name{ds} \alias{ds} \docType{methods} \title{Density of The Student's t Model} \description{ Density function for the Student's t Model. Wrapper around R's dt(), supporting center and concentration parameters. } \usage{ ds( x, c, s, ni ) } \arguments{ \item{x}{data vector} \item{c}{center} \item{s}{concentration} \item{ni}{degrees of freedom} } \value{A vector.} \author{Andrius Merkys} MixtureFitting/man/s_fit_primitive.Rd0000644000176200001440000000162114623643241017477 0ustar liggesusers\name{s_fit_primitive} \alias{s_fit_primitive} \docType{methods} \title{Estimate Student's t distribution parameters using Batch Approximation Algorithm.} \description{ Estimates parameters for univariate Student's t distribution parameters using Batch Approximation Algorithm, according to Fig. 2 of Aeschliman et al. (2010). } \usage{ s_fit_primitive( x ) } \arguments{ \item{x}{data vector} } \value{ Vector \code{c( mu, k, ni )}, where mu is the center, k is the concentration and ni is the degrees of freedom of the distribution. } \references{ Aeschliman, C.; Park, J. & Kak, A. C. A Novel Parameter Estimation Algorithm for the Multivariate t-Distribution and Its Application to Computer Vision European Conference on Computer Vision 2010, 2010 \url{https://engineering.purdue.edu/RVL/Publications/Aeschliman2010ANovel.pdf} } \author{Andrius Merkys} MixtureFitting/man/gmm_init_vector.Rd0000644000176200001440000000202315010645030017452 0ustar liggesusers\name{gmm_init_vector} \alias{gmm_init_vector} \docType{methods} \title{Estimate Gaussian Mixture parameters using Expectation Maximization.} \description{ Estimate an initialization vector for Gaussian mixture fitting via Expectation Maximization. Proportions and scales are set to equal, centers are equispaced through the whole domain of input sample. } \usage{ gmm_init_vector( x, n, implementation = "C" ) } \arguments{ \item{x}{data vector} \item{n}{number of mixture components} \item{implementation}{ flag to switch between C (default) and R implementations. } } \value{ Parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, sigma1, sigma2, ..., sigma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the location of \emph{i}-th component, sigma\emph{i} is the scale of \emph{i}-th component. } \author{Andrius Merkys} MixtureFitting/man/dcmm.Rd0000644000176200001440000000152314623643241015224 0ustar liggesusers\name{dcmm} \alias{dcmm} \docType{methods} \title{Density of The Cauchy Mixture Distribution} \description{ Density function for the Cauchy mixture distribution. } \usage{ dcmm( x, p, implementation = "C" ) } \arguments{ \item{x}{data vector} \item{p}{ parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, gamma1, gamma2, ..., gamma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the location of \emph{i}-th component, gamma\emph{i} is the Cauchy scale of \emph{i}-th component. } \item{implementation}{ flag to switch between C (default) and R implementations. } } \value{A vector.} \author{Andrius Merkys} MixtureFitting/man/llsmm.Rd0000644000176200001440000000153014623643241015426 0ustar liggesusers\name{llsmm} \alias{llsmm} \docType{methods} \title{Log-likelihood for Student's t Mixture} \description{ Calculates log-likelihood for a given data vector using a Student's t mixture distribution. } \usage{ llsmm( x, p ) } \arguments{ \item{x}{data vector} \item{p}{ parameter vector of 4*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, k1, k2, ..., k\emph{n}, ni1, ni2, ..., ni\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component, k\emph{i} is the concentration of \emph{i}-th component and ni\emph{i} is the degrees of freedom of \emph{i}-th component. } } \value{log-likelihood} \author{Andrius Merkys} MixtureFitting/man/plot_circular_hist.Rd0000644000176200001440000000065115011037731020167 0ustar liggesusers\name{plot_circular_hist} \alias{plot_circular_hist} \docType{methods} \title{Mixture Distribution Modeling} \description{ Plot a circular histogram. } \usage{ plot_circular_hist( x, breaks = 72, ball = 0.5, ... ) } \arguments{ \item{x}{data vector} \item{breaks}{number of breaks in histogram} \item{ball}{radius of the drawn circle} \item{...}{parameters passed to plot()} } \author{Andrius Merkys} MixtureFitting/man/smm_fit_em_CWL04.Rd0000644000176200001440000000322415010645030017261 0ustar liggesusers\name{smm_fit_em_CWL04} \alias{smm_fit_em_CWL04} \docType{methods} \title{Greedily estimate Student's t Mixture parameters using Expectation Maximization.} \description{ Estimates (greedily) parameters for univariate Student's t mixture using Expectation Maximization algorithm, implemented according to Chen et al. (2004). The algorithm relies upon smm_fit_em_GNL08() to estimate mixture parameters iteratively. } \usage{ smm_fit_em_CWL04( x, p, collect.history = FALSE, debug = FALSE, ... ) } \arguments{ \item{x}{data vector} \item{p}{ initialization vector of 4*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, k1, k2, ..., k\emph{n}, ni1, ni2, ..., ni\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component, k\emph{i} is the concentration of \emph{i}-th component and ni\emph{i} is the degrees of freedom of \emph{i}-th component. } \item{collect.history}{ logical. If set to TRUE, a list of parameter values of all iterations is returned. } \item{debug}{ flag to turn the debug prints on/off. } \item{...}{ parameters passed to smm_fit_em_GNL08(). } } \value{ A list. } \references{ Chen, S.; Wang, H. & Luo, B. Greedy EM Algorithm for Robust T-Mixture Modeling Third International Conference on Image and Graphics (ICIG'04), Institute of Electrical & Electronics Engineers (IEEE), 2004, 548--551 } \author{Andrius Merkys} MixtureFitting/man/wmedian.Rd0000644000176200001440000000104614623643241015730 0ustar liggesusers\name{wmedian} \alias{wmedian} \docType{methods} \title{Calculate Weighted Median.} \description{ Calculated weighted median. } \usage{ wmedian( x, w, start = 1, end = length( x ) ) } \arguments{ \item{x}{sample vector} \item{w}{weights vector} \item{start}{start index (default: 1)} \item{end}{end index (default: last index in \emph{x})} } \value{Median} \references{ Users of Wikipedia. Weighted median. \url{https://en.wikipedia.org/w/index.php?title=Weighted_median&oldid=690896947} } \author{Andrius Merkys} MixtureFitting/man/gmm_size_probability.Rd0000644000176200001440000000117215010625014020503 0ustar liggesusers\name{gmm_size_probability} \alias{gmm_size_probability} \docType{methods} \title{The Gaussian Mixture Distribution} \description{ Calculates the posterior probability of a Gaussian mixture with \emph{n} components. Internally, it attempts to maximize log-likelihood of data by calling optim() and returns the list as received from optim(). } \usage{ gmm_size_probability( x, n, method = "SANN" ) } \arguments{ \item{x}{data vector} \item{n}{number of mixture components} \item{method}{optimization method passed to optim()} } \value{ List representing the converged optim() run. } \author{Andrius Merkys} MixtureFitting/man/gmm_size_probability_nls.Rd0000644000176200001440000000127715010625617021376 0ustar liggesusers\name{gmm_size_probability_nls} \alias{gmm_size_probability_nls} \docType{methods} \title{The Gaussian Mixture Distribution} \description{ Calculates the posterior probability of a Gaussian mixture with \emph{n} components. Internally, it bins the data vector and calls nls() to optimize the mixture fit. Returns the list of the same form as received from optim(). } \usage{ gmm_size_probability_nls( x, n, bins = 100, trace = FALSE ) } \arguments{ \item{x}{data vector} \item{n}{number of mixture components} \item{bins}{number of bins} \item{trace}{should debug trace be printed?} } \value{ List of the same form as received from optim(). } \author{Andrius Merkys} MixtureFitting/man/dgmm.Rd0000644000176200001440000000225015010637570015225 0ustar liggesusers\name{dgmm} \alias{dgmm} \docType{methods} \title{The Gaussian Mixture Distribution} \description{ Density function for the Gaussian mixture distribution. } \usage{ dgmm( x, p, normalise_proportions = FALSE, restrict_sigmas = FALSE, implementation = "C" ) } \arguments{ \item{x}{data vector} \item{p}{ parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, sigma1, sigma2, ..., sigma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the location of \emph{i}-th component, sigma\emph{i} is the scale of \emph{i}-th component. } \item{normalise_proportions}{ if TRUE, make component proportions sum up to 1 by dividing each one of them by their sum (R implementation only). } \item{restrict_sigmas}{ if TRUE, skip components with scales less or equal to zero (R implementation only). } \item{implementation}{ flag to switch between C (default) and R implementations. } } \value{A vector.} \author{Andrius Merkys} MixtureFitting/man/ratio_convergence.Rd0000644000176200001440000000122615010644052017770 0ustar liggesusers\name{ratio_convergence} \alias{ratio_convergence} \docType{methods} \title{Ratio Convergence Check.} \description{ Compare two values to tell whether an optimization process has converged. The absolute difference between values of two iterations is divided by the value of previous iteration and compared to the epsilon value. } \usage{ ratio_convergence( p_now, p_prev, epsilon = 1e-6 ) } \arguments{ \item{p_now}{function value of \emph{i}-th iteration.} \item{p_prev}{function value of \emph{i-1}-th iteration.} \item{epsilon}{convergence criterion} } \value{TRUE if deemed to have converged, FALSE otherwise} \author{Andrius Merkys} MixtureFitting/man/gmm_fit_em.Rd0000644000176200001440000000341415010645030016375 0ustar liggesusers\name{gmm_fit_em} \alias{gmm_fit_em} \docType{methods} \title{Estimate Gaussian Mixture parameters using Expectation Maximization.} \description{ Estimates parameters for Gaussian mixture using Expectation Maximization algorithm. } \usage{ gmm_fit_em( x, p, w = numeric(), epsilon = c( 0.000001, 0.000001, 0.000001 ), debug = FALSE, implementation = "C", ... ) } \arguments{ \item{x}{data vector} \item{p}{ initialization vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, sigma1, sigma2, ..., sigma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component and sigma\emph{i} is the scale of \emph{i}-th component. } \item{w}{ weights of data points, must have the same length as the data vector; if not given or has different length, equal weights are assumed. } \item{epsilon}{ tolerance threshold for convergence. Structure of epsilon is epsilon = c( epsilon_A, epsilon_mu, epsilon_sigma ), where epsilon_A is threshold for component proportions, epsilon_mu is threshold for component centers and epsilon_sigma is threshold for component scales. } \item{debug}{ flag to turn the debug prints on/off. } \item{implementation}{ flag to switch between C (default) and R implementations. } \item{...}{ additional arguments passed to gmm_fit_em_R() when R implementation is used. } } \value{ Vector of mixture parameters, whose structure is the same as of input parameter's p. } \author{Andrius Merkys} MixtureFitting/man/cmm_intersections.Rd0000644000176200001440000000137414623643241020035 0ustar liggesusers\name{cmm_intersections} \alias{cmm_intersections} \docType{methods} \title{Intersections of Two Cauchy Distributions} \description{ Finds intersections of two Cauchy distributions by finding roots of a quadratic equation. } \usage{ cmm_intersections( p ) } \arguments{ \item{p}{ parameter vector of 6 parameters. Structure of p vector is p = c( A1, A2, mu1, mu2, gamma1, gamma2 ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the location of \emph{i}-th component, gamma\emph{i} is the Cauchy scale of \emph{i}-th component. } } \value{ A vector of x values of intersections (zero, one or two). Returns NaN if both distributions are identical. } \author{Andrius Merkys} MixtureFitting/man/kldiv.Rd0000644000176200001440000000241114623643241015412 0ustar liggesusers\name{kldiv} \alias{kldiv} \docType{methods} \title{Kullback--Leibler Divergence of \emph{i}th Student's t Mixture component.} \description{ Measures Kullback--Leibler divergence of \emph{i}th Student's t Mixture component using Dirac's delta function. Implemented according to Chen et al. (2004). } \usage{ kldiv( x, p, k ) } \arguments{ \item{x}{data vector} \item{p}{ vector of Student's t mixture parameters. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, k1, k2, ..., k\emph{n}, ni1, ni2, ..., ni\emph{n} ), where \emph{n} is number of mixture components, A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component, k\emph{i} is the concentration of \emph{i}-th component and ni\emph{i} is the degrees of freedom of \emph{i}-th component. } \item{k}{ number of the component. } } \value{ Kullback--Leibler divergence as double. } \references{ Chen, S.; Wang, H. & Luo, B. Greedy EM Algorithm for Robust T-Mixture Modeling Third International Conference on Image and Graphics (ICIG'04), Institute of Electrical & Electronics Engineers (IEEE), 2004, 548--551 } \author{Andrius Merkys} MixtureFitting/man/vmm_fit_em_by_diff.Rd0000644000176200001440000000354015010645030020076 0ustar liggesusers\name{vmm_fit_em_by_diff} \alias{vmm_fit_em_by_diff} \docType{methods} \title{Estimate von Mises Mixture parameters using Expectation Maximization.} \description{ Estimates parameters for univariate von Mises mixture using Expectation Maximization algorithm. In this version stopping criterion is the difference between parameters in the subsequent iterations. } \usage{ vmm_fit_em_by_diff( x, p, epsilon = c( 0.000001, 0.000001, 0.000001 ), debug = FALSE, implementation = "C" ) } \arguments{ \item{x}{data vector} \item{p}{ initialization vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, k1, k2, ..., k\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component and k\emph{i} is the concentration of \emph{i}-th component. } \item{epsilon}{ tolerance threshold for convergence. Structure of epsilon is epsilon = c( epsilon_A, epsilon_mu, epsilon_k ), where epsilon_A is threshold for component proportions, epsilon_mu is threshold for component centers and epsilon_k is threshold for component concentrations. } \item{debug}{ flag to turn the debug prints on/off. } \item{implementation}{ flag to switch between C (default) and R implementations. } } \value{ Vector of mixture parameters, whose structure is the same as of input parameter's p. } \references{ Banerjee et al. Expectation Maximization for Clustering on Hyperspheres (2003), manuscript, accessible on: \url{https://web.archive.org/web/20130120061240/http://www.lans.ece.utexas.edu/~abanerjee/papers/05/banerjee05a.pdf} } \author{Andrius Merkys} MixtureFitting/man/cmm_init_vector.Rd0000644000176200001440000000204515010645046017461 0ustar liggesusers\name{cmm_init_vector} \alias{cmm_init_vector} \docType{methods} \title{Estimate Cauchy Mixture parameters using Expectation Maximization.} \description{ Estimate an initialization vector for Cauchy mixture fitting via Expectation Maximization. Proportions are set to equal, centers are equispaced through the whole domain of input sample, and scales are set to 1. } \usage{ cmm_init_vector( x, m, implementation = "C" ) } \arguments{ \item{x}{data vector} \item{m}{number of mixture components} \item{implementation}{ flag to switch between C (default) and R implementations. } } \value{ Parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, gamma1, gamma2, ..., gamma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component and gamma\emph{i} is the Cauchy scale of \emph{i}-th component. } \author{Andrius Merkys} MixtureFitting/man/dsmm.Rd0000644000176200001440000000145214623643241015245 0ustar liggesusers\name{dsmm} \alias{dsmm} \docType{methods} \title{Density of The Student's t Mixture Model} \description{ Density function for the Student's t Mixture Model. } \usage{ dsmm( x, p ) } \arguments{ \item{x}{data vector} \item{p}{ parameter vector of 4*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, k1, k2, ..., k\emph{n}, ni1, ni2, ..., ni\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component, k\emph{i} is the concentration of \emph{i}-th component and ni\emph{i} is the degrees of freedom of \emph{i}-th component. } } \value{A vector.} \author{Andrius Merkys} MixtureFitting/man/vmm_init_vector.Rd0000644000176200001440000000202415010645046017501 0ustar liggesusers\name{vmm_init_vector} \alias{vmm_init_vector} \docType{methods} \title{Estimate von Mises Mixture parameters using Expectation Maximization.} \description{ Estimate an initialization vector for von Mises mixture fitting via Expectation Maximization. Proportions are set to equal, centers are equispaced through the whole domain of input sample, and concentrations are set to (m/(12*180))^2. } \usage{ vmm_init_vector( m, implementation = "C" ) } \arguments{ \item{m}{number of mixture components} \item{implementation}{ flag to switch between C (default) and R implementations. } } \value{ Parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, k1, k2, ..., k\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component and k\emph{i} is the concentration of \emph{i}-th component. } \author{Andrius Merkys} MixtureFitting/man/polyroot_NR.Rd0000644000176200001440000000152715010643647016577 0ustar liggesusers\name{polyroot_NR} \alias{polyroot_NR} \docType{methods} \title{Find one real polynomial root using Newton--Raphson method.} \description{ Finds one real polynomial root using Newton--Raphson method, implemented according to Wikipedia. } \usage{ polyroot_NR( p, init = 0, epsilon = 1e-6, debug = FALSE, implementation = "C" ) } \arguments{ \item{p}{vector of polynomial coefficients.} \item{init}{initial value.} \item{epsilon}{tolerance threshold for convergence.} \item{debug}{flag to turn the debug prints on/off.} \item{implementation}{ flag to switch between C (default) and R implementations. } } \value{ Real polynomial root. } \references{ Users of Wikipedia. Newton's method. \url{https://en.wikipedia.org/w/index.php?title=Newton\%27s_method&oldid=710342140} } \author{Andrius Merkys} MixtureFitting/man/llgmm_opposite.Rd0000644000176200001440000000151514623643241017337 0ustar liggesusers\name{llgmm_opposite} \alias{llgmm_opposite} \docType{methods} \title{Opposite Log-likelihood for Gaussian Mixture} \description{ Calculates opposite log-likelihood for a given data vector using a Gaussian mixture distribution. } \usage{ llgmm_opposite( x, p ) } \arguments{ \item{x}{data vector} \item{p}{ parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, sigma1, sigma2, ..., sigma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component and sigma\emph{i} is the scale of \emph{i}-th component. } } \value{opposite log-likelihood (negated log-likelihood value)} \author{Andrius Merkys} MixtureFitting/man/rcmm.Rd0000644000176200001440000000135014623643241015240 0ustar liggesusers\name{rcmm} \alias{rcmm} \docType{methods} \title{Random Sample of The Cauchy Mixture Distribution} \description{ Generates a random sample of the Cauchy mixture distribution. } \usage{ rcmm( n, p ) } \arguments{ \item{n}{sample size} \item{p}{ parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, gamma1, gamma2, ..., gamma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the location of \emph{i}-th component, gamma\emph{i} is the Cauchy scale of \emph{i}-th component. } } \value{A vector.} \author{Andrius Merkys} MixtureFitting/man/vmm_fit_em.Rd0000644000176200001440000000332615010644756016434 0ustar liggesusers\name{vmm_fit_em} \alias{vmm_fit_em} \docType{methods} \title{Estimate von Mises Mixture parameters using Expectation Maximization.} \description{ Estimates parameters for univariate von Mises mixture using Expectation Maximization algorithm. } \usage{ vmm_fit_em( x, p, epsilon = c( 0.000001, 0.000001, 0.000001 ), debug = FALSE, implementation = "C" ) } \arguments{ \item{x}{data vector} \item{p}{ initialization vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, k1, k2, ..., k\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component and k\emph{i} is the concentration of \emph{i}-th component. } \item{epsilon}{ tolerance threshold for convergence. Structure of epsilon is epsilon = c( epsilon_A, epsilon_mu, epsilon_k ), where epsilon_A is threshold for component proportions, epsilon_mu is threshold for component centers and epsilon_k is threshold for component concentrations. } \item{debug}{ flag to turn the debug prints on/off. } \item{implementation}{ flag to switch between C (default) and R implementations. } } \value{ Vector of mixture parameters, whose structure is the same as of input parameter's p. } \references{ Banerjee et al. Expectation Maximization for Clustering on Hyperspheres (2003), manuscript, accessible on: \url{https://web.archive.org/web/20130120061240/http://www.lans.ece.utexas.edu/~abanerjee/papers/05/banerjee05a.pdf} } \author{Andrius Merkys} MixtureFitting/man/ssd_gradient.Rd0000644000176200001440000000145715010635337016756 0ustar liggesusers\name{ssd_gradient} \alias{ssd_gradient} \docType{methods} \title{Sum of Squared Differences Using Gaussian Mixture Distribution} \description{ Gradient (derivative) function of ssd(). } \usage{ ssd_gradient( x, y, p ) } \arguments{ \item{x}{data vector} \item{y}{response vector} \item{p}{ parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, sigma1, sigma2, ..., sigma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the location of \emph{i}-th component, sigma\emph{i} is the scale of \emph{i}-th component. } } \value{Gradient values measured at \emph{x}.} \author{Andrius Merkys} MixtureFitting/man/llgmm.Rd0000644000176200001440000000156114623643241015416 0ustar liggesusers\name{llgmm} \alias{llgmm} \docType{methods} \title{Log-likelihood for Gaussian Mixture} \description{ Calculates log-likelihood for a given data vector using a Gaussian mixture distribution. } \usage{ llgmm( x, p, implementation = "C" ) } \arguments{ \item{x}{data vector} \item{p}{ parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, sigma1, sigma2, ..., sigma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component and sigma\emph{i} is the scale of \emph{i}-th component. } \item{implementation}{ flag to switch between C (default) and R implementations. } } \value{log-likelihood} \author{Andrius Merkys} MixtureFitting/man/smm_fit_em_GNL08.Rd0000644000176200001440000000614415010645030017264 0ustar liggesusers\name{smm_fit_em_GNL08} \alias{smm_fit_em_GNL08} \docType{methods} \title{Estimate Student's t Mixture parameters using Expectation Maximization.} \description{ Estimates parameters for univariate Student's t mixture using Expectation Maximization algorithm, according to Eqns. 12--17 of Gerogiannis et al. (2009). } \usage{ smm_fit_em_GNL08( x, p, epsilon = c( 1e-6, 1e-6, 1e-6, 1e-6 ), collect.history = FALSE, debug = FALSE, min.sigma = 1e-256, min.ni = 1e-256, max.df = 1000, max.steps = Inf, polyroot.solution = 'jenkins_taub', convergence = abs_convergence, unif.component = FALSE ) } \arguments{ \item{x}{data vector} \item{p}{ initialization vector of 4*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, k1, k2, ..., k\emph{n}, ni1, ni2, ..., ni\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component, k\emph{i} is the concentration of \emph{i}-th component and ni\emph{i} is the degrees of freedom of \emph{i}-th component. } \item{epsilon}{ tolerance threshold for convergence. Structure of epsilon is epsilon = c( epsilon_A, epsilon_mu, epsilon_k, epsilon_ni ), where epsilon_A is threshold for component proportions, epsilon_mu is threshold for component centers, epsilon_k is threshold for component concentrations and epsilon_ni is threshold for component degrees of freedom. } \item{collect.history}{ logical. If set to TRUE, a list of parameter values of all iterations is returned. } \item{debug}{ flag to turn the debug prints on/off. } \item{min.sigma}{minimum value of sigma} \item{min.ni}{minimum value of degrees of freedom} \item{max.df}{maximum value of degrees of freedom} \item{max.steps}{maximum number of steps, may be infinity} \item{polyroot.solution}{ polyroot finding method used to approximate digamma function. Possible values are 'jenkins_taub' and 'newton_raphson'. } \item{convergence}{ function to use for convergence checking. Must accept function values of the last two iterations and return TRUE or FALSE. } \item{unif.component}{ should a uniform component for outliers be added, as suggested by Cousineau & Chartier (2010)? } } \value{ A list. } \references{ Gerogiannis, D.; Nikou, C. & Likas, A. The mixtures of Student's t-distributions as a robust framework for rigid registration. Image and Vision Computing, Elsevier BV, 2009, 27, 1285--1294 \url{https://www.cs.uoi.gr/~arly/papers/imavis09.pdf} Cousineau, D. & Chartier, S. Outliers detection and treatment: a review. International Journal of Psychological Research, 2010, 3, 58--67 \url{https://revistas.usb.edu.co/index.php/IJPR/article/view/844} } \author{Andrius Merkys} MixtureFitting/man/gmm_init_vector_quantile.Rd0000644000176200001440000000171115010645030021357 0ustar liggesusers\name{gmm_init_vector_quantile} \alias{gmm_init_vector_quantile} \docType{methods} \title{Estimate Gaussian Mixture parameters using Expectation Maximization.} \description{ Estimate an initialization vector for Gaussian mixture fitting using (weighted) quantiles. Proportions and scales are set to equal, centers are placed at equispaced quantiles. } \usage{ gmm_init_vector_quantile( x, m, w = numeric() ) } \arguments{ \item{x}{data vector} \item{m}{number of mixture components} \item{w}{weight vector} } \value{ Parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, sigma1, sigma2, ..., sigma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the location of \emph{i}-th component, sigma\emph{i} is the scale of \emph{i}-th component. } \author{Andrius Merkys} MixtureFitting/man/bic.Rd0000644000176200001440000000077014623643241015044 0ustar liggesusers\name{bic} \alias{bic} \docType{methods} \title{Bayesian Information Criterion (BIC)} \description{ Calculates Bayesian Information Criterion (BIC) for any type of mixture model. Log-likelihood function has to be provided. } \usage{ bic( x, p, llf ) } \arguments{ \item{x}{data vector} \item{p}{vector of mixture model parameters} \item{llf}{function calculating log-likelihood, called as llf( x, p )} } \value{ Bayesian Information Criterion value. } \author{Andrius Merkys} MixtureFitting/man/gmm_fit_hwhm_spline_deriv.Rd0000644000176200001440000000173615011041673021513 0ustar liggesusers\name{gmm_fit_hwhm_spline_deriv} \alias{gmm_fit_hwhm_spline_deriv} \docType{methods} \title{Estimate Gaussian Mixture Parameters Using Derivatives and Half-Width at Half-Maximum Method.} \description{ Estimate Gaussian mixture parameters using derivatives and half-width at half-maximum (HWHM) method. The method smooths the histogram before attempting to locate the modes. Then it describes them using HWHM. } \usage{ gmm_fit_hwhm_spline_deriv( x, y ) } \arguments{ \item{x}{data vector} \item{y}{response vector for \emph{x}} } \value{ Parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, sigma1, sigma2, ..., sigma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the location of \emph{i}-th component, sigma\emph{i} is the scale of \emph{i}-th component. } \author{Andrius Merkys} MixtureFitting/man/smm_fit_em.Rd0000644000176200001440000000211415010645030016405 0ustar liggesusers\name{smm_fit_em} \alias{smm_fit_em} \docType{methods} \title{Estimate Student's t Mixture parameters using Expectation Maximization.} \description{ Estimates parameters for Student's t mixture using Expectation Maximization algorithm. Calls smm_fit_em_APK10(). } \usage{ smm_fit_em( x, p, ... ) } \arguments{ \item{x}{data vector} \item{p}{ initialization vector of 4*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, k1, k2, ..., k\emph{n}, ni1, ni2, ..., ni\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component, k\emph{i} is the concentration of \emph{i}-th component and ni\emph{i} is the degrees of freedom of \emph{i}-th component. } \item{...}{ additional arguments passed to smm_fit_em_GNL08(). } } \value{ Vector of mixture parameters, whose structure is the same as of input parameter's p. } \author{Andrius Merkys} MixtureFitting/man/plot_density.Rd0000644000176200001440000000175315011047162017016 0ustar liggesusers\name{plot_density} \alias{plot_density} \docType{methods} \title{Mixture Distribution Modeling} \description{ Draw a PNG histogram with a mixture density on top of it. } \usage{ plot_density( x, model, density_f, width, height, cuts = 400, main = "", filename = NULL, obs_good = c(), obs_bad = c(), scale_density = FALSE ) } \arguments{ \item{x}{data vector} \item{cuts}{number of breaks in histogram} \item{main}{main title of the plot} \item{model}{model passed to density_f()} \item{density_f}{probability density function} \item{filename}{name of the file to write} \item{width}{image width, passed to png()} \item{height}{image height, passed to png()} \item{obs_good}{vector of values to mark with rug() in green color} \item{obs_bad}{vector of values to mark with rug() in red color} \item{scale_density}{should probability density be scaled?} } \author{Andrius Merkys} MixtureFitting/man/gmm_intersections.Rd0000644000176200001440000000137114623643241020036 0ustar liggesusers\name{gmm_intersections} \alias{gmm_intersections} \docType{methods} \title{Intersections of Two Gaussian Distributions} \description{ Finds intersections of two Gaussian distributions by finding roots of a quadratic equation. } \usage{ gmm_intersections( p ) } \arguments{ \item{p}{ parameter vector of 6 parameters. Structure of p vector is p = c( A1, A2, mu1, mu2, sigma1, sigma2 ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the location of \emph{i}-th component, sigma\emph{i} is the scale of \emph{i}-th component. } } \value{ A vector of x values of intersections (zero, one or two). Returns NaN if both distributions are identical. } \author{Andrius Merkys} MixtureFitting/man/ssd.Rd0000644000176200001440000000165015010606753015074 0ustar liggesusers\name{ssd} \alias{ssd} \docType{methods} \title{Sum of Squared Differences Using Gaussian Mixture Distribution} \description{ Given two vectors of same length and a Gaussian mixture, calculate the sum of squared differences (SSD) between the first vector and Gaussian mixture densities measured at points from second vector. } \usage{ ssd( x, y, p ) } \arguments{ \item{x}{data vector} \item{y}{response vector} \item{p}{ parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, sigma1, sigma2, ..., sigma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the location of \emph{i}-th component, sigma\emph{i} is the scale of \emph{i}-th component. } } \value{Sum of squared differences.} \author{Andrius Merkys} MixtureFitting/man/smm_init_vector_kmeans.Rd0000644000176200001440000000177415010645046021047 0ustar liggesusers\name{smm_init_vector_kmeans} \alias{smm_init_vector_kmeans} \docType{methods} \title{Estimate Student's t Mixture parameters using Expectation Maximization.} \description{ Estimate an initialization vector for Student's t mixture fitting via Expectation Maximization. R implementation of k-means in kmeans() is used to find data point assignment to clusters. s_fit_primitive() is then used to estimate component parameters for each cluster. } \usage{ smm_init_vector_kmeans( x, m ) } \arguments{ \item{x}{data vector} \item{m}{number of mixture components} } \value{ Parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, sigma1, sigma2, ..., sigma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the location of \emph{i}-th component, sigma\emph{i} is the scale of \emph{i}-th component. } \author{Andrius Merkys} MixtureFitting/man/kmeans_circular.Rd0000644000176200001440000000100714623646153017450 0ustar liggesusers\name{kmeans_circular} \alias{kmeans_circular} \docType{methods} \title{K-Means Clustering for Points on Circle} \description{ Perform k-means clustering on angular data (in degrees). } \usage{ kmeans_circular( x, centers, iter.max = 10 ) } \arguments{ \item{x}{data vector} \item{centers}{vector of initial centers (in degrees)} \item{iter.max}{maximum number of iterations} } \value{ Vector of the same length as \emph{centers} defining cluster centers (in degrees). } \author{Andrius Merkys} MixtureFitting/man/rvmm.Rd0000644000176200001440000000154214623643241015266 0ustar liggesusers\name{rvmm} \alias{rvmm} \docType{methods} \title{Random Sample of the von Mises Mixture Model.} \description{ Generates a random sample of the von Mises Mixture Model. } \usage{ rvmm( n, p ) } \arguments{ \item{n}{sample size} \item{p}{ parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, k1, k2, ..., k\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component and k\emph{i} is the concentration of \emph{i}-th component. } } \value{A vector.} \references{ Best & Fisher. Efficient Simulation of the von Mises Distribution. Journal of the RSS, Series C, 1979, 28, 152-157. } \author{Andrius Merkys} MixtureFitting/man/cmm_init_vector_kmeans.Rd0000644000176200001440000000240715010645030021012 0ustar liggesusers\name{cmm_init_vector_kmeans} \alias{cmm_init_vector_kmeans} \docType{methods} \title{Estimate Cauchy Mixture parameters using Expectation Maximization.} \description{ Estimate an initialization vector for Cauchy mixture fitting using k-means. R implementation of k-means in kmeans() is used to find data point assignment to clusters. Then several iterations of Cauchy mixture fitting (per Nahy 2006) is used to derive mixture parameters. } \usage{ cmm_init_vector_kmeans( x, m, iter.cauchy = 20 ) } \arguments{ \item{x}{data vector} \item{m}{number of mixture components} \item{iter.cauchy}{number of iterations to fit a single Cauchy component.} } \value{ Parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, gamma1, gamma2, ..., gamma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component and gamma\emph{i} is the Cauchy scale of \emph{i}-th component. } \references{ Ferenc Nahy. Parameter Estimation of the Cauchy Distribution in Information Theory Approach (2006). Journal of Universal Computer Science } \author{Andrius Merkys} MixtureFitting/man/dcgmm.Rd0000644000176200001440000000235514623643241015377 0ustar liggesusers\name{dcgmm} \alias{dcgmm} \docType{methods} \title{Density of The Cauchy-Gaussian Distribution} \description{ Density function for the Cauchy-Gaussian distribution, according to Eqn. 2 of Swami (2000). } \usage{ dcgmm( x, p ) } \arguments{ \item{x}{data vector} \item{p}{ parameter vector of 5*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, e1, e2, ..., e\emph{n}, mu1, mu2, ..., mu\emph{n}, gamma1, gamma2, ..., gamma\emph{n}, sigma1, sigma2, ..., sigma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, e\emph{i} is the proportion of Cauchy subcomponent of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component, gamma\emph{i} is the Cauchy concentration of \emph{i}-th component and sigma\emph{i} is the Gaussian standard deviation of \emph{i}-th component. } } \value{A vector.} \references{ Swami, A. Non-Gaussian mixture models for detection and estimation in heavy-tailed noise 2000 IEEE International Conference on Acoustics, Speech, and Signal Processing. Proceedings (Cat. No.00CH37100), 2000, 6, 3802-3805 } \author{Andrius Merkys} MixtureFitting/man/smm_split_component.Rd0000644000176200001440000000274615010312452020371 0ustar liggesusers\name{smm_split_component} \alias{smm_split_component} \docType{methods} \title{Split a component of Student's t-distribution in two.} \description{ Splits a component of Student's t-distribution mixture. Implemented according to Eqns. 30--36 of Chen et al. (2004). } \usage{ smm_split_component( p, alpha = 0.5, beta = 0.5, u = 0.5 ) } \arguments{ \item{p}{ vector of Student's t mixture parameters. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, k1, k2, ..., k\emph{n}, ni1, ni2, ..., ni\emph{n} ), where \emph{n} is number of mixture components, A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component, k\emph{i} is the concentration of \emph{i}-th component and ni\emph{i} is the degrees of freedom of \emph{i}-th component. } \item{alpha}{split proportion for component proportions} \item{beta}{split proportion for component concentrations} \item{u}{split proportion for component centers} } \value{ Vector of parameters for resulting two-component mixture, whose structure is the same as of input parameter's p. } \references{ Chen, S.-B. & Luo, B. Robust t-mixture modelling with SMEM algorithm Proceedings of 2004 International Conference on Machine Learning and Cybernetics (IEEE Cat. No.04EX826), Institute of Electrical & Electronics Engineers (IEEE), 2004, 6, 3689--3694 } \author{Andrius Merkys} MixtureFitting/man/llvmm_opposite.Rd0000644000176200001440000000150714623643241017357 0ustar liggesusers\name{llvmm_opposite} \alias{llvmm_opposite} \docType{methods} \title{Opposite Log-likelihood for von Mises Mixture} \description{ Calculates opposite log-likelihood for a given data vector using a von Mises mixture distribution. } \usage{ llvmm_opposite( x, p ) } \arguments{ \item{x}{data vector} \item{p}{ parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, k1, k2, ..., k\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component and k\emph{i} is the concentration of \emph{i}-th component. } } \value{opposite log-likelihood (negated log-likelihood value)} \author{Andrius Merkys} MixtureFitting/man/gmm_fit_kmeans.Rd0000644000176200001440000000135714623643241017271 0ustar liggesusers\name{gmm_fit_kmeans} \alias{gmm_fit_kmeans} \docType{methods} \title{Estimate Gaussian Mixture parameters from kmeans.} \description{ Estimates parameters for Gaussian mixture using kmeans. } \usage{ gmm_fit_kmeans( x, n ) } \arguments{ \item{x}{data vector} \item{n}{number of mixture components} } \value{ Vector of 3*\emph{n} mixture parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, sigma1, sigma2, ..., sigma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the location of \emph{i}-th component, sigma\emph{i} is the scale of \emph{i}-th component. } \author{Andrius Merkys} MixtureFitting/man/llvmm.Rd0000644000176200001440000000155314623643241015436 0ustar liggesusers\name{llvmm} \alias{llvmm} \docType{methods} \title{Log-likelihood for von Mises Mixture} \description{ Calculates log-likelihood for a given data vector using a von Mises mixture distribution. } \usage{ llvmm( x, p, implementation = "C" ) } \arguments{ \item{x}{data vector} \item{p}{ parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, k1, k2, ..., k\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component and k\emph{i} is the concentration of \emph{i}-th component. } \item{implementation}{ flag to switch between C (default) and R implementations. } } \value{log-likelihood} \author{Andrius Merkys} MixtureFitting/man/cmm_fit_em.Rd0000644000176200001440000000335715010645030016377 0ustar liggesusers\name{cmm_fit_em} \alias{cmm_fit_em} \docType{methods} \title{Estimate Cauchy Mixture parameters using Expectation Maximization.} \description{ Estimates parameters for Caucy mixture using Expectation Maximization algorithm. } \usage{ cmm_fit_em( x, p, epsilon = c( 0.000001, 0.000001, 0.000001 ), iter.cauchy = 20, debug = FALSE, implementation = "C" ) } \arguments{ \item{x}{data vector} \item{p}{ initialization vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, gamma1, gamma2, ..., gamma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component and gamma\emph{i} is the Cauchy scale of \emph{i}-th component. } \item{epsilon}{ tolerance threshold for convergence. Structure of epsilon is epsilon = c( epsilon_A, epsilon_mu, epsilon_gamma ), where epsilon_A is threshold for component proportions, epsilon_mu is threshold for component centers and epsilon_gamma is threshold for component Cauchy scales. } \item{iter.cauchy}{ number of iterations to fit a single Cauchy component. } \item{debug}{ flag to turn the debug prints on/off. } \item{implementation}{ flag to switch between C (default) and R implementations. } } \value{ Vector of mixture parameters, whose structure is the same as of input parameter's p. } \references{ Ferenc Nahy. Parameter Estimation of the Cauchy Distribution in Information Theory Approach (2006). Journal of Universal Computer Science } \author{Andrius Merkys} MixtureFitting/man/mk_fit_images.Rd0000644000176200001440000000072315011037720017072 0ustar liggesusers\name{mk_fit_images} \alias{mk_fit_images} \docType{methods} \title{Mixture Distribution Modeling} \description{ Draw a PNG histogram with a mixture density on top of it for each iteration of mixture optimization process. } \usage{ mk_fit_images( h, l, prefix = "img_" ) } \arguments{ \item{h}{histogram object, as returned from hist()} \item{l}{list containing model vectors} \item{prefix}{prefix of file name to write} } \author{Andrius Merkys} MixtureFitting/man/pssd.Rd0000644000176200001440000000204115010636107015243 0ustar liggesusers\name{pssd} \alias{pssd} \docType{methods} \title{Penalized Sum of Squared Differences Using Gaussian Mixture Distribution} \description{ Given two vectors of same length and a Gaussian mixture, calculate the penalized sum of squared differences (SSD) between the first vector and Gaussian mixture densities measured at points from second vector. Penalties are included for proportions and scales that are less than or equal to 0. } \usage{ pssd( x, y, p ) } \arguments{ \item{x}{data vector} \item{y}{response vector} \item{p}{ parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, sigma1, sigma2, ..., sigma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the location of \emph{i}-th component, sigma\emph{i} is the scale of \emph{i}-th component. } } \value{Penalized sum of squared differences.} \author{Andrius Merkys} MixtureFitting/man/smm_fit_em_APK10.Rd0000644000176200001440000000370315010645046017255 0ustar liggesusers\name{smm_fit_em_APK10} \alias{smm_fit_em_APK10} \docType{methods} \title{Estimate Student's t Mixture parameters using Expectation Maximization.} \description{ Estimates parameters for univariate Student's t mixture using Expectation Maximization algorithm, according to Fig. 2 of Aeschliman et al. (2010). } \usage{ smm_fit_em_APK10( x, p, epsilon = c( 1e-6, 1e-6, 1e-6, 1e-6 ), collect.history = FALSE, debug = FALSE ) } \arguments{ \item{x}{data vector} \item{p}{ initialization vector of 4*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, k1, k2, ..., k\emph{n}, ni1, ni2, ..., ni\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component, k\emph{i} is the concentration of \emph{i}-th component and ni\emph{i} is the degrees of freedom of \emph{i}-th component. } \item{epsilon}{ tolerance threshold for convergence. Structure of epsilon is epsilon = c( epsilon_A, epsilon_mu, epsilon_k, epsilon_ni ), where epsilon_A is threshold for component proportions, epsilon_mu is threshold for component centers, epsilon_k is threshold for component concentrations and epsilon_ni is threshold for component degrees of freedom. } \item{collect.history}{ flag to turn accumulation of estimation history on/off. } \item{debug}{ flag to turn the debug prints on/off. } } \value{ A list. } \references{ Aeschliman, C.; Park, J. & Kak, A. C. A Novel Parameter Estimation Algorithm for the Multivariate t-Distribution and Its Application to Computer Vision European Conference on Computer Vision 2010, 2010 \url{https://engineering.purdue.edu/RVL/Publications/Aeschliman2010ANovel.pdf} } \author{Andrius Merkys} MixtureFitting/man/gradient_descent.Rd0000644000176200001440000000120315010613277017576 0ustar liggesusers\name{gradient_descent} \alias{gradient_descent} \docType{methods} \title{Gradient Descent} \description{ Simple implementation of gradient descent method. Given a derivative function, it follows its decrease until convergence criterion is met. } \usage{ gradient_descent( gradfn, start, gamma = 0.1, ..., epsilon = 0.01 ) } \arguments{ \item{gradfn}{derivative function} \item{start}{starting value} \item{gamma}{learning rate} \item{...}{additional arguments passed to derivative function} \item{epsilon}{convergence threshold for absolute squared difference} } \value{log-likelihood} \author{Andrius Merkys} MixtureFitting/man/vmm_fit_em_by_ll.Rd0000644000176200001440000000307515010645030017600 0ustar liggesusers\name{vmm_fit_em_by_ll} \alias{vmm_fit_em_by_ll} \docType{methods} \title{Estimate von Mises Mixture parameters using Expectation Maximization.} \description{ Estimates parameters for univariate von Mises mixture using Expectation Maximization algorithm. In this version stopping criterion is the difference between log-likelihood estimates of subsequent iterations. } \usage{ vmm_fit_em_by_ll( x, p, epsilon = .Machine$double.eps, debug = FALSE, implementation = "C" ) } \arguments{ \item{x}{data vector} \item{p}{ initialization vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, k1, k2, ..., k\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component and k\emph{i} is the concentration of \emph{i}-th component. } \item{epsilon}{tolerance threshold for convergence} \item{debug}{ flag to turn the debug prints on/off. } \item{implementation}{ flag to switch between C (default) and R implementations. } } \value{ Vector of mixture parameters, whose structure is the same as of input parameter's p. } \references{ Banerjee et al. Expectation Maximization for Clustering on Hyperspheres (2003), manuscript, accessible on: \url{https://web.archive.org/web/20130120061240/http://www.lans.ece.utexas.edu/~abanerjee/papers/05/banerjee05a.pdf} } \author{Andrius Merkys} MixtureFitting/man/rgmm.Rd0000644000176200001440000000134514623643241015250 0ustar liggesusers\name{rgmm} \alias{rgmm} \docType{methods} \title{Random Sample of the Gaussian Mixture Distribution} \description{ Generates a random sample of the Gaussian mixture distribution. } \usage{ rgmm( n, p ) } \arguments{ \item{n}{data vector} \item{p}{ parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, sigma1, sigma2, ..., sigma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the location of \emph{i}-th component, sigma\emph{i} is the scale of \emph{i}-th component. } } \value{A vector.} \author{Andrius Merkys} MixtureFitting/man/dvmm.Rd0000644000176200001440000000147614623643241015256 0ustar liggesusers\name{dvmm} \alias{dvmm} \docType{methods} \title{Density of The von Mises Mixture Model.} \description{ Density function for the von Mises Mixture Model. } \usage{ dvmm( x, p, implementation = "C" ) } \arguments{ \item{x}{data vector} \item{p}{ parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, k1, k2, ..., k\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component and k\emph{i} is the concentration of \emph{i}-th component. } \item{implementation}{ flag to switch between C (default) and R implementations. } } \value{A vector.} \author{Andrius Merkys} MixtureFitting/man/llgmm_conservative.Rd0000644000176200001440000000163515010613435020201 0ustar liggesusers\name{llgmm_conservative} \alias{llgmm_conservative} \docType{methods} \title{Log-likelihood for Gaussian Mixture} \description{ Calculates log-likelihood for a given data vector using a Gaussian mixture distribution. This is a straightforward implementation, different from llgmm() in that that it does not detect and shortcut edge cases. } \usage{ llgmm_conservative( x, p ) } \arguments{ \item{x}{data vector} \item{p}{ parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, sigma1, sigma2, ..., sigma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component and sigma\emph{i} is the scale of \emph{i}-th component. } } \value{log-likelihood} \author{Andrius Merkys} MixtureFitting/man/cmm_fit_hwhm_spline_deriv.Rd0000644000176200001440000000174215011041673021504 0ustar liggesusers\name{cmm_fit_hwhm_spline_deriv} \alias{cmm_fit_hwhm_spline_deriv} \docType{methods} \title{Estimate Cauchy Mixture Parameters Using Derivatives and Half-Width at Half-Maximum Method.} \description{ Estimate Cauchy mixture parameters using derivatives and half-width at half-maximum (HWHM) method. The method smooths the histogram before attempting to locate the modes. Then it describes them using HWHM. } \usage{ cmm_fit_hwhm_spline_deriv( x, y ) } \arguments{ \item{x}{data vector} \item{y}{response vector for \emph{x}} } \value{ Parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, gamma1, gamma2, ..., gamma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component and gamma\emph{i} is the Cauchy scale of \emph{i}-th component. } \author{Andrius Merkys} MixtureFitting/man/smm_init_vector.Rd0000644000176200001440000000204315010645030017470 0ustar liggesusers\name{smm_init_vector} \alias{smm_init_vector} \docType{methods} \title{Estimate Student's t Mixture parameters using Expectation Maximization.} \description{ Estimate an initialization vector for Student's t mixture fitting via Expectation Maximization. Proportions are set to be equal, centers are equispaced through the whole domain of input sample, concentrations and degrees of freedom are set to 1. } \usage{ smm_init_vector( x, n ) } \arguments{ \item{x}{data vector} \item{n}{number of mixture components} } \value{ Parameter vector of 4*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, k1, k2, ..., k\emph{n}, ni1, ni2, ..., ni\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component, k\emph{i} is the concentration of \emph{i}-th component and ni\emph{i} is the degrees of freedom of \emph{i}-th component. } \author{Andrius Merkys} MixtureFitting/man/gmm_fit_hwhm.Rd0000644000176200001440000000167215010324011016734 0ustar liggesusers\name{gmm_fit_hwhm} \alias{gmm_fit_hwhm} \docType{methods} \title{Estimate Gaussian Mixture Parameters Using Half-Width at Half-Maximum Method.} \description{ Estimate Gaussian mixture parameters using half-width at half-maximum (HWHM) method. Given a histogram, the method attempts to locate most prominent modes and describe them using HWHM. } \usage{ gmm_fit_hwhm( x, y, n ) } \arguments{ \item{x}{data vector} \item{y}{response vector for \emph{x}} \item{n}{number of mixture components} } \value{ Parameter vector of 3*\emph{n} parameters, where \emph{n} is number of mixture components. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, sigma1, sigma2, ..., sigma\emph{n} ), where A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the location of \emph{i}-th component, sigma\emph{i} is the scale of \emph{i}-th component. } \author{Andrius Merkys} MixtureFitting/man/bhattacharyya_dist.Rd0000644000176200001440000000121315010652654020146 0ustar liggesusers\name{bhattacharyya_dist} \alias{bhattacharyya_dist} \docType{methods} \title{Bhattacharyya distance for univariate Gaussian distributions.} \description{ Measures Bhattacharyya distance for two univariate Gaussian distributions. } \usage{ bhattacharyya_dist( mu1, mu2, sigma1, sigma2 ) } \arguments{ \item{mu1}{mean of the first Gaussian distribution.} \item{mu2}{mean of the second Gaussian distribution.} \item{sigma1}{standard deviation of the first Gaussian distribution.} \item{sigma2}{standard deviation of the second Gaussian distribution.} } \value{ Bhattacharyya distance as double. } \author{Andrius Merkys} MixtureFitting/man/simplex.Rd0000644000176200001440000000216715011044315015757 0ustar liggesusers\name{simplex} \alias{simplex} \docType{methods} \title{Nelder-Mead's Simplex Method for Function Minimization.} \description{ Nelder-Mead's Simplex Method for Function Minimization. } \usage{ simplex( fn, start, ..., epsilon = 0.000001, alpha = 1, gamma = 2, rho = 0.5, delta = 0.5, trace = FALSE ) } \arguments{ \item{fn}{minimized function, has to accept the argmin vector as first parameter} \item{start}{start vector} \item{...}{other parameters passed to the minimized function} \item{epsilon}{convergence criterion} \item{alpha}{reflection coefficient} \item{gamma}{expansion coefficient} \item{rho}{contraction coefficient} \item{delta}{shrink coefficient} \item{trace}{should debug trace be printed?} } \value{Vector yielding the minimum value of the minimized function} \references{ Nelder, J. A. & Mead, R. A Simplex Method For Function Minimization. The Computer Journal, 1965, 308-313. Users of Wikipedia. Nelder-Mead method. \url{https://en.wikipedia.org/w/index.php?title=Nelder\%E2\%80\%93Mead_method&oldid=1287347131} } \author{Andrius Merkys} MixtureFitting/man/gmm_merge_components.Rd0000644000176200001440000000257314623643241020516 0ustar liggesusers\name{gmm_merge_components} \alias{gmm_merge_components} \docType{methods} \title{Merge two Gaussian components into one.} \description{ Merges \emph{i}th and \emph{j}th components of Gaussian mixture model. Implemented in the same venue as in \code{\link[fpc]{mergeparameters}} of \code{fpc}. } \usage{ gmm_merge_components( x, p, i, j ) } \arguments{ \item{x}{data vector} \item{p}{ vector of Gaussian mixture parameters. Structure of p vector is p = c( A1, A2, ..., A\emph{n}, mu1, mu2, ..., mu\emph{n}, k1, k2, ..., k\emph{n} ), where \emph{n} is number of mixture components, A\emph{i} is the proportion of \emph{i}-th component, mu\emph{i} is the center of \emph{i}-th component, k\emph{i} is the concentration of \emph{i}-th component. } \item{i}{ index of the first component to be merged. Component with this index will be replaced by a merged one in the output. } \item{j}{ index of the second component to be merged. Component with this index will be removed in the output. } } \value{ Vector of mixture parameters, whose structure is the same as of input parameter's p. } \references{ Hennig, C. Methods for merging Gaussian mixture components Advances in Data Analysis and Classification, Springer Nature, 2010, 4, 3-34 } \author{Andrius Merkys} MixtureFitting/DESCRIPTION0000644000176200001440000000155015014061676014751 0ustar liggesusersPackage: MixtureFitting Version: 0.6.1 Date: 2025-05-20 Title: Fitting of Univariate Mixture Distributions to Data using Various Approaches Authors@R: person(given = "Andrius", family = "Merkys", role = c("aut", "cre"), email = "andrius.merkys@gmail.com") Depends: R (>= 2.0.1) Description: Methods for fitting mixture distributions to univariate data using expectation maximization, HWHM and other methods. Supports Gaussian, Cauchy, Student's t and von Mises mixtures. For more details see Merkys (2018) . License: GPL-2 NeedsCompilation: yes Packaged: 2025-05-20 08:53:17 UTC; andrius Author: Andrius Merkys [aut, cre] Maintainer: Andrius Merkys Repository: CRAN Date/Publication: 2025-05-23 12:02:06 UTC