This document is writen with rmarkdown and contain computations relative to a small experiment about algorithmic transparency policy. The reference paper is available here

The first chunk load the package we will need and include main variable recoding.

Elementary Statistics

We will use ggplot2

#create concept scores for multi-items measurement

reco_a$Confiance <- (reco_a$C_Bienveillance + reco_a$C_Competence + reco_a$C_Donnees.navigation + reco_a$C_Ethique + reco_a$C_Infos.perso)/5

reco_a$Satisfaction <- (reco_a$S_Interesse.par.produits.reco+
                          reco_a$S_Promoter.score+
                          reco_a$S_Recos.repondent.aux.besoins+
                          reco_a$S_Satisfaction.globale+reco_a$S_Tentation.d.achat)/5

reco_a$Understanding <- (reco_a$U_Comment+ reco_a$U_Interets+ reco_a$U_pourquoi+ reco_a$U_Technique)/4
#basic statistic with rcmdr

numSummary(reco_a[,c("Confiance", "Satisfaction", "Understanding"), drop=FALSE], statistics=c("mean", "sd", "cv"))
##                   mean        sd        cv  n
## Confiance     3.788462 0.8264391 0.2181463 52
## Satisfaction  4.126923 0.7441577 0.1803178 52
## Understanding 4.495192 0.8945510 0.1990017 52
#comparing empirical and theoretical distribution ( density function)

g1<-ggplot(data = reco_a, aes(x=Confiance))+geom_density(fill="pink")+theme_minimal()
g1

g1 <- g1 + stat_function(fun=dnorm, color="red", args=list(mean=mean(reco_a$Confiance), 
                                  sd=sd(reco_a$Confiance)))
g1

#comparing distribution per group
g2<-ggplot(data = reco_a, aes(x=Transparence_Nominale,y=Confiance))+geom_violin()
g2

Dependant<-subset(reco_a,select=c(Confiance,Satisfaction,Understanding))
Dependant<-melt(data = Dependant)
## No id variables; using all as measure variables
ggplot(Dependant, aes(x = value, fill = variable)) + geom_density(alpha = 0.2)

Analyse bivariée

t test

the elementary bivariate analysis is a comparaison of two groups and is better if we test the difference. For a detailed presentation see https://uc-r.github.io/t_test

reco_a$Sexe2<-as.factor(reco_a$Sexe) 
with(reco_a, plotMeans(C_Ethique, Sexe2, error.bars="se", connect=TRUE)) #with Rcmdr

t.test(C_Ethique~Sexe, alternative='two.sided', conf.level=.95, var.equal=FALSE, data=reco_a)
## 
##  Welch Two Sample t-test
## 
## data:  C_Ethique by Sexe
## t = 2.0714, df = 45.122, p-value = 0.04407
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.0125981 0.8964928
## sample estimates:
##  mean in group Feminin mean in group Masculin 
##               3.500000               3.045455

ANOVA

ANOVA is a kind of generalization of the 2-group comparisons

for details : https://bioinformatics-core-shared-training.github.io/linear-models-r/ANOVA.html

AnovaModel.1 <- aov(Satisfaction ~ Transparence_Nominale, data=reco_a)
summary(AnovaModel.1)
##                       Df Sum Sq Mean Sq F value Pr(>F)  
## Transparence_Nominale  2  4.302  2.1512   4.403 0.0174 *
## Residuals             49 23.940  0.4886                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
with(reco_a, numSummary(Satisfaction, groups=Transparence_Nominale, 
  statistics=c("mean", "sd")))
##               mean        sd data:n
## 1-Aucune  4.047059 0.4718549     17
## 2-Moderee 4.500000 0.7798944     18
## 3-Forte   3.811765 0.7920561     17

the test is significant à the 95% level of confidence, so we can analyse the difference. The following plot compare means of the 3 variables for the three treatments and give confidence intervals.

from http://www.sthda.com/french/wiki/ggplot2-barres-d-erreur-guide-de-demarrage-rapide-logiciel-r-et-visualisation-de-donnees

#preparing the data
means<- aggregate(cbind(Confiance, Satisfaction, Understanding) ~ Transparence_Nominale, data=reco_a, FUN="mean")

Sd<- aggregate(cbind(Confiance, Satisfaction, Understanding) ~ Transparence_Nominale, data=reco_a, FUN="sd")
meansM <- melt(means, id=c("Transparence_Nominale")) 
SdM <- melt(Sd, id=c("Transparence_Nominale"))
SdM$sd<-SdM$value
SdM<-subset(SdM,select=c(sd))
meansM<-cbind(meansM,SdM)

#making the chart

g<- ggplot(meansM, aes(x=Transparence_Nominale, y=value, fill=variable)) 
g<-g+geom_bar(stat="identity", color="black", position=position_dodge())
g

g<-g+geom_errorbar(aes(ymin=value-sd, ymax=value+sd), width=.2,
                position=position_dodge(.9)) + labs(title="Response to transparency",x="Transparence_Nominale", y = "valeur")+ theme_classic() +  scale_fill_manual(values=c('#999999','#E69F00', '#c62e0d'))

g

Correlations

for a definition of correlation see : http://www.r-tutor.com/elementary-statistics/numerical-measures/correlation-coefficient

in the following chart we represent correlation between Confiance and Satisfaction as an xy chart as a linear function (y=ax+b). Confidence intervals are represented in the grey area. We add a non linear function ( loess) in red.

ggplot(reco_a, aes(x=Confiance, y=Satisfaction)) + geom_point()+ geom_smooth(method='lm')+geom_smooth(method='gam',color="red")

M<-cor(reco_a[,c("Confiance","Satisfaction","Understanding")], use="complete")
M
##               Confiance Satisfaction Understanding
## Confiance     1.0000000    0.5979953     0.7107255
## Satisfaction  0.5979953    1.0000000     0.4339238
## Understanding 0.7107255    0.4339238     1.0000000
corrplot(M, method="color",type="lower", order="hclust")

When we need to compute a large number of correlation, it is usefull to visualize the large matrix. In this case line and column are rearranged with the clustering function “hclust”.

read also https://rstudio-pubs-static.s3.amazonaws.com/240657_5157ff98e8204c358b2118fa69162e18.html

Mcorr <- subset(reco_a, select=c(C_Bienveillance, C_Competence, C_Donnees.navigation, C_Ethique,  C_Infos.perso, C_Securite, S_Interesse.par.produits.reco, S_Promoter.score,S_Recos.repondent.aux.besoins, S_Satisfaction.globale, S_Tentation.d.achat,  Transparence_Numerique, U_Comment, U_Interets,U_pourquoi,U_Technique))
M<-round(cor(Mcorr),2)
M
##                               C_Bienveillance C_Competence
## C_Bienveillance                          1.00         0.66
## C_Competence                             0.66         1.00
## C_Donnees.navigation                     0.59         0.76
## C_Ethique                                0.67         0.64
## C_Infos.perso                            0.59         0.70
## C_Securite                               0.47         0.59
## S_Interesse.par.produits.reco            0.26         0.32
## S_Promoter.score                         0.48         0.49
## S_Recos.repondent.aux.besoins            0.35         0.50
## S_Satisfaction.globale                   0.49         0.51
## S_Tentation.d.achat                      0.37         0.45
## Transparence_Numerique                  -0.21        -0.14
## U_Comment                                0.52         0.59
## U_Interets                               0.53         0.53
## U_pourquoi                               0.46         0.54
## U_Technique                              0.50         0.62
##                               C_Donnees.navigation C_Ethique C_Infos.perso
## C_Bienveillance                               0.59      0.67          0.59
## C_Competence                                  0.76      0.64          0.70
## C_Donnees.navigation                          1.00      0.63          0.86
## C_Ethique                                     0.63      1.00          0.59
## C_Infos.perso                                 0.86      0.59          1.00
## C_Securite                                    0.50      0.56          0.41
## S_Interesse.par.produits.reco                 0.37      0.36          0.23
## S_Promoter.score                              0.47      0.61          0.51
## S_Recos.repondent.aux.besoins                 0.47      0.54          0.44
## S_Satisfaction.globale                        0.58      0.63          0.50
## S_Tentation.d.achat                           0.40      0.38          0.34
## Transparence_Numerique                       -0.11     -0.39         -0.05
## U_Comment                                     0.69      0.38          0.65
## U_Interets                                    0.69      0.41          0.68
## U_pourquoi                                    0.64      0.32          0.59
## U_Technique                                   0.65      0.41          0.57
##                               C_Securite S_Interesse.par.produits.reco
## C_Bienveillance                     0.47                          0.26
## C_Competence                        0.59                          0.32
## C_Donnees.navigation                0.50                          0.37
## C_Ethique                           0.56                          0.36
## C_Infos.perso                       0.41                          0.23
## C_Securite                          1.00                          0.45
## S_Interesse.par.produits.reco       0.45                          1.00
## S_Promoter.score                    0.47                          0.49
## S_Recos.repondent.aux.besoins       0.51                          0.64
## S_Satisfaction.globale              0.49                          0.59
## S_Tentation.d.achat                 0.60                          0.71
## Transparence_Numerique             -0.12                         -0.26
## U_Comment                           0.31                          0.36
## U_Interets                          0.38                          0.38
## U_pourquoi                          0.30                          0.29
## U_Technique                         0.50                          0.35
##                               S_Promoter.score
## C_Bienveillance                           0.48
## C_Competence                              0.49
## C_Donnees.navigation                      0.47
## C_Ethique                                 0.61
## C_Infos.perso                             0.51
## C_Securite                                0.47
## S_Interesse.par.produits.reco             0.49
## S_Promoter.score                          1.00
## S_Recos.repondent.aux.besoins             0.69
## S_Satisfaction.globale                    0.58
## S_Tentation.d.achat                       0.68
## Transparence_Numerique                   -0.09
## U_Comment                                 0.28
## U_Interets                                0.43
## U_pourquoi                                0.32
## U_Technique                               0.20
##                               S_Recos.repondent.aux.besoins
## C_Bienveillance                                        0.35
## C_Competence                                           0.50
## C_Donnees.navigation                                   0.47
## C_Ethique                                              0.54
## C_Infos.perso                                          0.44
## C_Securite                                             0.51
## S_Interesse.par.produits.reco                          0.64
## S_Promoter.score                                       0.69
## S_Recos.repondent.aux.besoins                          1.00
## S_Satisfaction.globale                                 0.61
## S_Tentation.d.achat                                    0.66
## Transparence_Numerique                                -0.28
## U_Comment                                              0.36
## U_Interets                                             0.44
## U_pourquoi                                             0.36
## U_Technique                                            0.38
##                               S_Satisfaction.globale S_Tentation.d.achat
## C_Bienveillance                                 0.49                0.37
## C_Competence                                    0.51                0.45
## C_Donnees.navigation                            0.58                0.40
## C_Ethique                                       0.63                0.38
## C_Infos.perso                                   0.50                0.34
## C_Securite                                      0.49                0.60
## S_Interesse.par.produits.reco                   0.59                0.71
## S_Promoter.score                                0.58                0.68
## S_Recos.repondent.aux.besoins                   0.61                0.66
## S_Satisfaction.globale                          1.00                0.64
## S_Tentation.d.achat                             0.64                1.00
## Transparence_Numerique                         -0.18                0.05
## U_Comment                                       0.31                0.29
## U_Interets                                      0.38                0.35
## U_pourquoi                                      0.34                0.29
## U_Technique                                     0.30                0.34
##                               Transparence_Numerique U_Comment U_Interets
## C_Bienveillance                                -0.21      0.52       0.53
## C_Competence                                   -0.14      0.59       0.53
## C_Donnees.navigation                           -0.11      0.69       0.69
## C_Ethique                                      -0.39      0.38       0.41
## C_Infos.perso                                  -0.05      0.65       0.68
## C_Securite                                     -0.12      0.31       0.38
## S_Interesse.par.produits.reco                  -0.26      0.36       0.38
## S_Promoter.score                               -0.09      0.28       0.43
## S_Recos.repondent.aux.besoins                  -0.28      0.36       0.44
## S_Satisfaction.globale                         -0.18      0.31       0.38
## S_Tentation.d.achat                             0.05      0.29       0.35
## Transparence_Numerique                          1.00     -0.04      -0.11
## U_Comment                                      -0.04      1.00       0.80
## U_Interets                                     -0.11      0.80       1.00
## U_pourquoi                                     -0.07      0.83       0.77
## U_Technique                                    -0.03      0.85       0.69
##                               U_pourquoi U_Technique
## C_Bienveillance                     0.46        0.50
## C_Competence                        0.54        0.62
## C_Donnees.navigation                0.64        0.65
## C_Ethique                           0.32        0.41
## C_Infos.perso                       0.59        0.57
## C_Securite                          0.30        0.50
## S_Interesse.par.produits.reco       0.29        0.35
## S_Promoter.score                    0.32        0.20
## S_Recos.repondent.aux.besoins       0.36        0.38
## S_Satisfaction.globale              0.34        0.30
## S_Tentation.d.achat                 0.29        0.34
## Transparence_Numerique             -0.07       -0.03
## U_Comment                           0.83        0.85
## U_Interets                          0.77        0.69
## U_pourquoi                          1.00        0.72
## U_Technique                         0.72        1.00
corrplot(M, method="color",type="lower", order="hclust")

Factor analysis and measurement reliability

done with Rcmdr (factanal) with 3 factors and a promax rotation that let the factor to be correlated.

more is here http://www.karlin.mff.cuni.cz/~maciak/NMST539/cvicenie8.html

or http://www.di.fc.ul.pt/~jpn/r/factoranalysis/factoranalysis.html

to do better use the package psych :

.FA <- factanal(~C_Bienveillance+C_Competence+C_Donnees.navigation+C_Ethique+C_Infos.perso+C_Securite+S_Interesse.par.produits.reco+S_Promoter.score+S_Recos.repondent.aux.besoins+S_Satisfaction.globale+S_Tentation.d.achat+U_Comment+U_Interets+U_pourquoi+U_Technique,factors=3, rotation="promax", scores="none", data=reco_a)
print(.FA)
## 
## Call:
## factanal(x = ~C_Bienveillance + C_Competence + C_Donnees.navigation +     C_Ethique + C_Infos.perso + C_Securite + S_Interesse.par.produits.reco +     S_Promoter.score + S_Recos.repondent.aux.besoins + S_Satisfaction.globale +     S_Tentation.d.achat + U_Comment + U_Interets + U_pourquoi +     U_Technique, factors = 3, data = reco_a, scores = "none",     rotation = "promax")
## 
## Uniquenesses:
##               C_Bienveillance                  C_Competence 
##                         0.490                         0.317 
##          C_Donnees.navigation                     C_Ethique 
##                         0.154                         0.385 
##                 C_Infos.perso                    C_Securite 
##                         0.174                         0.521 
## S_Interesse.par.produits.reco              S_Promoter.score 
##                         0.307                         0.369 
## S_Recos.repondent.aux.besoins        S_Satisfaction.globale 
##                         0.352                         0.358 
##           S_Tentation.d.achat                     U_Comment 
##                         0.226                         0.048 
##                    U_Interets                    U_pourquoi 
##                         0.285                         0.277 
##                   U_Technique 
##                         0.245 
## 
## Loadings:
##                               Factor1 Factor2 Factor3
## C_Bienveillance                0.614           0.137 
## C_Competence                   0.704           0.157 
## C_Donnees.navigation           0.823  -0.101   0.239 
## C_Ethique                      0.765   0.130  -0.142 
## C_Infos.perso                  0.932  -0.236   0.180 
## C_Securite                     0.310   0.464         
## S_Interesse.par.produits.reco -0.388   0.973   0.216 
## S_Promoter.score               0.360   0.580  -0.174 
## S_Recos.repondent.aux.besoins          0.736         
## S_Satisfaction.globale         0.401   0.541  -0.143 
## S_Tentation.d.achat           -0.136   0.962         
## U_Comment                                      0.945 
## U_Interets                     0.224           0.655 
## U_pourquoi                     0.103           0.776 
## U_Technique                                    0.791 
## 
##                Factor1 Factor2 Factor3
## SS loadings      3.633   3.357   2.803
## Proportion Var   0.242   0.224   0.187
## Cumulative Var   0.242   0.466   0.653
## 
## Factor Correlations:
##         Factor1 Factor2 Factor3
## Factor1   1.000  -0.364   0.585
## Factor2  -0.364   1.000  -0.671
## Factor3   0.585  -0.671   1.000
## 
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 81.51 on 63 degrees of freedom.
## The p-value is 0.0584

as we obtain a confirmation of the 3-dimensionnal structure of the response (Understanding,Trust, satisfaction) we could evaluate the reliability and validity of the scales computing conbach alpha and analysing between and inter scale correlations.

A Sem model is better of course ( see the last part) to test a Confirmatory Factor Analysis

for the case of Confiance( trust) alpha is excellen( (more than 0.90)). If the security item is eliminated, there is no loss of information. We could so reduce the scale without decreae its reliability.

reliability(cov(reco_a[,c("C_Bienveillance","C_Competence","C_Donnees.navigation","C_Ethique",
  "C_Infos.perso","C_Securite")], use="complete.obs"))
## Alpha reliability =  0.9005 
## Standardized alpha =  0.9058 
## 
## Reliability deleting each item in turn:
##                       Alpha Std.Alpha r(item, total)
## C_Bienveillance      0.8866    0.8929         0.7053
## C_Competence         0.8740    0.8773         0.8218
## C_Donnees.navigation 0.8678    0.8774         0.8235
## C_Ethique            0.8835    0.8885         0.7428
## C_Infos.perso        0.8775    0.8858         0.7678
## C_Securite           0.9057    0.9104         0.5801

correlation between item and score are far over correlation between concept, there is some good discrimination between the concept, and so a good validity of the three dimensional concept of consumer reponse.

M<-subset(reco_a, select=c("Confiance", "Understanding", "Satisfaction"))
M<-round(cor(M),2)
M
##               Confiance Understanding Satisfaction
## Confiance          1.00          0.71         0.60
## Understanding      0.71          1.00         0.43
## Satisfaction       0.60          0.43         1.00

model testing

for a quick but complete presentation http://www.sthda.com/english/articles/40-regression-analysis/168-multiple-linear-regression-in-r/

here we test two models to detect eventualy some interaction effect (and so moderating variable). The second one has an interaction effet that test the moderating effect of Purchase_frequency, The two model are then compared through an ANOVA test. ( see : https://bookdown.org/ndphillips/YaRrr/comparing-regression-models-with-anova.html)

LinearModel.2 <- lm(Confiance ~ Sexe + Transparence_Nominale, data=reco_a)
summary(LinearModel.2)
## 
## Call:
## lm(formula = Confiance ~ Sexe + Transparence_Nominale, data = reco_a)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.56883 -0.58208  0.06084  0.48776  1.37818 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      3.8218     0.2064  18.514   <2e-16 ***
## SexeMasculin                    -0.2530     0.2161  -1.171   0.2475    
## Transparence_Nominale2-Moderee   0.4877     0.2598   1.877   0.0666 .  
## Transparence_Nominale3-Forte    -0.2910     0.2637  -1.103   0.2754    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.768 on 48 degrees of freedom
## Multiple R-squared:  0.1872, Adjusted R-squared:  0.1364 
## F-statistic: 3.685 on 3 and 48 DF,  p-value: 0.01814
LinearModel.3 <- lm(Confiance ~ Sexe + Transparence_Nominale +Transparence_Nominale*Sexe, data=reco_a)
summary(LinearModel.3)
## 
## Call:
## lm(formula = Confiance ~ Sexe + Transparence_Nominale + Transparence_Nominale * 
##     Sexe, data = reco_a)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.54286 -0.56714  0.08312  0.47857  1.36000 
## 
## Coefficients:
##                                             Estimate Std. Error t value
## (Intercept)                                  3.84000    0.24801  15.483
## SexeMasculin                                -0.29714    0.38649  -0.769
## Transparence_Nominale2-Moderee               0.45091    0.34267   1.316
## Transparence_Nominale3-Forte                -0.30667    0.36035  -0.851
## SexeMasculin:Transparence_Nominale2-Moderee  0.09195    0.54145   0.170
## SexeMasculin:Transparence_Nominale3-Forte    0.03881    0.54278   0.072
##                                             Pr(>|t|)    
## (Intercept)                                   <2e-16 ***
## SexeMasculin                                   0.446    
## Transparence_Nominale2-Moderee                 0.195    
## Transparence_Nominale3-Forte                   0.399    
## SexeMasculin:Transparence_Nominale2-Moderee    0.866    
## SexeMasculin:Transparence_Nominale3-Forte      0.943    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7843 on 46 degrees of freedom
## Multiple R-squared:  0.1877, Adjusted R-squared:  0.09944 
## F-statistic: 2.126 on 5 and 46 DF,  p-value: 0.07909
anova(LinearModel.2,LinearModel.3)
## Analysis of Variance Table
## 
## Model 1: Confiance ~ Sexe + Transparence_Nominale
## Model 2: Confiance ~ Sexe + Transparence_Nominale + Transparence_Nominale * 
##     Sexe
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     48 28.312                           
## 2     46 28.294  2   0.01792 0.0146 0.9855
#for illustration 
level<- aggregate(cbind(Confiance, Satisfaction, Understanding) ~ Transparence_Nominale+Sexe, data=reco_a, FUN="mean")
level <- melt(level) 
## Using Transparence_Nominale, Sexe as id variables
p1 <- ggplot(data = level, aes(x = Transparence_Nominale, y = value, group =Sexe, color=Sexe )) 
p1<- p1 + geom_point(size=2)+geom_line(stat="identity", size=2)+facet_grid(.~variable)
p1

to improve the procedure, a classic aproach is “Process” implemented in the processr package : https://rpubs.com/markhw/processr.

It is usefull ( nd it is a growing trend) to represent graphically the results of a regression model. Jtools is usefull for this purpose with the Plot_summs function. See :

summ(LinearModel.3)
Observations 52
Dependent variable Confiance
Type OLS linear regression
F(5,46) 2.13
0.19
Adj. R² 0.10
Est. S.E. t val. p
(Intercept) 3.84 0.25 15.48 0.00
SexeMasculin -0.30 0.39 -0.77 0.45
Transparence_Nominale2-Moderee 0.45 0.34 1.32 0.19
Transparence_Nominale3-Forte -0.31 0.36 -0.85 0.40
SexeMasculin:Transparence_Nominale2-Moderee 0.09 0.54 0.17 0.87
SexeMasculin:Transparence_Nominale3-Forte 0.04 0.54 0.07 0.94
Standard errors: OLS
plot_summs(LinearModel.3, scale = TRUE)

Clustering

kmeans

look at this bank customer segmentation exemple https://rpubs.com/cpatinof/clusteringMktngDataCaseStudy

## K-means clustering with 3 clusters of sizes 17, 9, 26
## 
## Cluster means:
##   Confiance Understanding Satisfaction
## 1  3.388235      4.117647     4.070588
## 2  2.888889      3.277778     3.066667
## 3  4.361538      5.163462     4.530769
## 
## Clustering vector:
##  [1] 1 3 3 3 1 1 3 3 3 1 1 3 3 1 1 2 2 3 3 3 3 2 3 1 3 3 1 3 3 3 3 2 3 1 1
## [36] 3 1 3 2 3 1 2 1 2 3 3 1 1 2 3 2 1
## 
## Within cluster sum of squares by cluster:
## [1] 12.162647  6.644444 24.749712
##  (between_SS / total_SS =  58.1 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
## Using group as id variables

hierarchical clustering

this method come from biology and was develop to classify species :

https://cran.r-project.org/web/packages/dendextend/vignettes/Cluster_Analysis.html

# Ward Hierarchical Clustering
d <- dist(x, method = "euclidean") # distance matrix
fit <- hclust(d, method="ward.D")
plot(fit) # display dendogram
HCgroups <- cutree(fit, k=3) # cut tree into 5 clusters
# draw dendogram with red borders around the 5 clusters
rect.hclust(fit, k=3, border="red")

### Convergence test

do the two method give the same results?

just crosstabs and compute the chi² test.

KMcluster<-as.data.frame(kmeans[[1]]) #extract a component from a list
names(KMcluster)[names(KMcluster)=="kmeans[[1]]"] <- "KMcluster"
typo<-cbind(KMcluster,HCgroups)
typo$KMcluster<-as.factor(typo$KMcluster)
typo$HCgroups<-as.factor(typo$HCgroups)

.Table <- xtabs(~HCgroups+KMcluster, data=typo)
  cat("\nFrequency table:\n")
## 
## Frequency table:
  print(.Table)
##         KMcluster
## HCgroups  1  2  3
##        1 17  0  0
##        2  0 27  3
##        3  0  0  5
  .Test <- chisq.test(.Table, correct=FALSE)
  print(.Test)
## 
##  Pearson's Chi-squared test
## 
## data:  .Table
## X-squared = 81.25, df = 4, p-value < 2.2e-16
  cat("\nExpected counts:\n")
## 
## Expected counts:
  print(.Test$expected)
##         KMcluster
## HCgroups        1         2         3
##        1 5.557692  8.826923 2.6153846
##        2 9.807692 15.576923 4.6153846
##        3 1.634615  2.596154 0.7692308
  cat("\nChi-square components:\n")
## 
## Chi-square components:
  print(round(.Test$residuals^2, 2)) 
##         KMcluster
## HCgroups     1     2     3
##        1 23.56  8.83  2.62
##        2  9.81  8.38  0.57
##        3  1.63  2.60 23.27

Structural equation modeling

we use Lavaan

http://lavaan.ugent.be/

## lavaan 0.6-5 ended normally after 42 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of free parameters                         33
##                                                       
##   Number of observations                            52
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                               173.278
##   Degrees of freedom                               102
##   P-value (Chi-square)                           0.000
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Confiance =~                                                          
##     C_Bienveillanc    1.000                               0.667    0.689
##     C_Competence      0.941    0.171    5.510    0.000    0.628    0.823
##     C_Donnes.nvgtn    1.449    0.238    6.089    0.000    0.966    0.923
##     C_Ethique         0.815    0.172    4.725    0.000    0.543    0.697
##     C_Infos.perso     1.371    0.237    5.798    0.000    0.914    0.871
##   Satisfaction =~                                                       
##     S_Intrss.pr.p.    1.000                               0.608    0.745
##     S_Promoter.scr    1.009    0.181    5.587    0.000    0.613    0.781
##     S_Rcs.rpndnt..    1.165    0.198    5.883    0.000    0.708    0.820
##     S_Stsfctn.glbl    1.004    0.183    5.472    0.000    0.610    0.766
##     S_Tenttn.d.cht    1.462    0.243    6.016    0.000    0.888    0.838
##   Understanding =~                                                      
##     U_Comment         1.000                               1.011    0.962
##     U_Interets        0.691    0.071    9.745    0.000    0.698    0.842
##     U_pourquoi        0.773    0.075   10.353    0.000    0.781    0.861
##     U_Technique       0.936    0.089   10.541    0.000    0.946    0.866
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Confiance ~                                                           
##     Understanding     0.494    0.099    4.987    0.000    0.749    0.749
##   Satisfaction ~                                                        
##     Confiance         0.566    0.157    3.599    0.000    0.621    0.621
##   Confiance ~                                                           
##     Tr1               0.285    0.142    2.007    0.045    0.427    0.203
##     Tr2              -0.154    0.139   -1.108    0.268   -0.232   -0.109
##   Tr1 ~                                                                 
##     Tr2               0.000                               0.000    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .C_Bienveillanc    0.491    0.103    4.783    0.000    0.491    0.525
##    .C_Competence      0.188    0.043    4.351    0.000    0.188    0.323
##    .C_Donnes.nvgtn    0.161    0.053    3.037    0.002    0.161    0.147
##    .C_Ethique         0.311    0.065    4.768    0.000    0.311    0.514
##    .C_Infos.perso     0.267    0.067    3.959    0.000    0.267    0.242
##    .S_Intrss.pr.p.    0.295    0.067    4.393    0.000    0.295    0.444
##    .S_Promoter.scr    0.241    0.057    4.214    0.000    0.241    0.391
##    .S_Rcs.rpndnt..    0.244    0.062    3.930    0.000    0.244    0.328
##    .S_Stsfctn.glbl    0.263    0.061    4.297    0.000    0.263    0.414
##    .S_Tenttn.d.cht    0.335    0.089    3.756    0.000    0.335    0.298
##    .U_Comment         0.083    0.040    2.106    0.035    0.083    0.075
##    .U_Interets        0.200    0.045    4.468    0.000    0.200    0.291
##    .U_pourquoi        0.213    0.049    4.343    0.000    0.213    0.259
##    .U_Technique       0.297    0.069    4.299    0.000    0.297    0.249
##    .Tr1               0.226    0.044    5.099    0.000    0.226    1.000
##    .Confiance         0.172    0.066    2.605    0.009    0.386    0.386
##    .Satisfaction      0.227    0.078    2.897    0.004    0.615    0.615
##     Understanding     1.021    0.219    4.664    0.000    1.000    1.000
## lavaan 0.6-5 ended normally after 109 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of free parameters                         96
##                                                       
##   Number of observations per group:                   
##     Masculin                                        22
##     Feminin                                         30
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                               351.125
##   Degrees of freedom                               204
##   P-value (Chi-square)                           0.000
##   Test statistic for each group:
##     Masculin                                   214.370
##     Feminin                                    136.756
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard errors                             Standard
## 
## 
## Group 1 [Masculin]:
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Confiance =~                                                          
##     C_Bienveillanc    1.000                               0.565    0.616
##     C_Competence      1.035    0.340    3.043    0.002    0.585    0.800
##     C_Donnes.nvgtn    1.631    0.489    3.337    0.001    0.922    0.922
##     C_Ethique         0.778    0.326    2.384    0.017    0.440    0.581
##     C_Infos.perso     1.491    0.454    3.287    0.001    0.843    0.899
##   Satisfaction =~                                                       
##     S_Intrss.pr.p.    1.000                               0.595    0.755
##     S_Promoter.scr    0.953    0.248    3.838    0.000    0.567    0.808
##     S_Rcs.rpndnt..    1.114    0.278    4.002    0.000    0.663    0.841
##     S_Stsfctn.glbl    0.733    0.228    3.209    0.001    0.436    0.687
##     S_Tenttn.d.cht    1.363    0.339    4.018    0.000    0.811    0.844
##   Understanding =~                                                      
##     U_Comment         1.000                               1.137    0.976
##     U_Interets        0.607    0.068    8.897    0.000    0.691    0.906
##     U_pourquoi        0.727    0.088    8.263    0.000    0.827    0.891
##     U_Technique       1.042    0.079   13.127    0.000    1.185    0.968
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Confiance ~                                                           
##     Understanding     0.376    0.128    2.935    0.003    0.756    0.756
##   Satisfaction ~                                                        
##     Confiance         0.523    0.279    1.874    0.061    0.497    0.497
##   Confiance ~                                                           
##     Tr1               0.269    0.192    1.401    0.161    0.476    0.222
##     Tr2              -0.012    0.171   -0.069    0.945   -0.021   -0.010
##   Tr1 ~                                                                 
##     Tr2               0.000                               0.000    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .C_Bienveillanc    3.282    0.212   15.451    0.000    3.282    3.577
##    .C_Competence      4.189    0.177   23.690    0.000    4.189    5.728
##    .C_Donnes.nvgtn    3.413    0.250   13.670    0.000    3.413    3.416
##    .C_Ethique         2.982    0.174   17.140    0.000    2.982    3.938
##    .C_Infos.perso     3.742    0.233   16.077    0.000    3.742    3.991
##    .S_Intrss.pr.p.    4.048    0.174   23.246    0.000    4.048    5.137
##    .S_Promoter.scr    3.914    0.156   25.145    0.000    3.914    5.582
##    .S_Rcs.rpndnt..    4.180    0.176   23.812    0.000    4.180    5.302
##    .S_Stsfctn.glbl    4.014    0.140   28.765    0.000    4.014    6.321
##    .S_Tenttn.d.cht    3.806    0.214   17.785    0.000    3.806    3.961
##    .U_Comment         4.227    0.248   17.018    0.000    4.227    3.628
##    .U_Interets        4.682    0.162   28.820    0.000    4.682    6.145
##    .U_pourquoi        4.955    0.198   25.036    0.000    4.955    5.338
##    .U_Technique       4.045    0.261   15.504    0.000    4.045    3.305
##    .Tr1               0.318    0.099    3.204    0.001    0.318    0.683
##    .Confiance         0.000                               0.000    0.000
##    .Satisfaction      0.000                               0.000    0.000
##     Understanding     0.000                               0.000    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .C_Bienveillanc    0.523    0.165    3.177    0.001    0.523    0.621
##    .C_Competence      0.193    0.066    2.902    0.004    0.193    0.361
##    .C_Donnes.nvgtn    0.149    0.077    1.944    0.052    0.149    0.149
##    .C_Ethique         0.380    0.119    3.200    0.001    0.380    0.663
##    .C_Infos.perso     0.169    0.074    2.284    0.022    0.169    0.193
##    .S_Intrss.pr.p.    0.267    0.094    2.833    0.005    0.267    0.430
##    .S_Promoter.scr    0.170    0.065    2.622    0.009    0.170    0.346
##    .S_Rcs.rpndnt..    0.182    0.075    2.424    0.015    0.182    0.293
##    .S_Stsfctn.glbl    0.213    0.071    2.993    0.003    0.213    0.528
##    .S_Tenttn.d.cht    0.266    0.111    2.401    0.016    0.266    0.288
##    .U_Comment         0.064    0.039    1.649    0.099    0.064    0.047
##    .U_Interets        0.104    0.035    2.964    0.003    0.104    0.178
##    .U_pourquoi        0.178    0.059    3.025    0.002    0.178    0.207
##    .U_Technique       0.094    0.046    2.015    0.044    0.094    0.062
##    .Tr1               0.217    0.065    3.317    0.001    0.217    1.000
##    .Confiance         0.121    0.081    1.500    0.134    0.379    0.379
##    .Satisfaction      0.267    0.136    1.963    0.050    0.753    0.753
##     Understanding     1.294    0.410    3.154    0.002    1.000    1.000
## 
## 
## Group 2 [Feminin]:
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Confiance =~                                                          
##     C_Bienveillanc    1.000                               0.703    0.707
##     C_Competence      0.924    0.210    4.406    0.000    0.649    0.831
##     C_Donnes.nvgtn    1.452    0.292    4.970    0.000    1.021    0.944
##     C_Ethique         0.805    0.200    4.037    0.000    0.566    0.760
##     C_Infos.perso     1.358    0.298    4.563    0.000    0.954    0.861
##   Satisfaction =~                                                       
##     S_Intrss.pr.p.    1.000                               0.636    0.760
##     S_Promoter.scr    0.980    0.236    4.149    0.000    0.623    0.745
##     S_Rcs.rpndnt..    1.130    0.254    4.443    0.000    0.719    0.791
##     S_Stsfctn.glbl    1.115    0.242    4.604    0.000    0.709    0.817
##     S_Tenttn.d.cht    1.531    0.314    4.881    0.000    0.974    0.861
##   Understanding =~                                                      
##     U_Comment         1.000                               0.910    0.956
##     U_Interets        0.763    0.122    6.259    0.000    0.695    0.797
##     U_pourquoi        0.830    0.115    7.187    0.000    0.755    0.847
##     U_Technique       0.832    0.141    5.891    0.000    0.757    0.774
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Confiance ~                                                           
##     Understanding     0.582    0.149    3.904    0.000    0.753    0.753
##   Satisfaction ~                                                        
##     Confiance         0.602    0.202    2.983    0.003    0.665    0.665
##   Confiance ~                                                           
##     Tr1               0.279    0.189    1.475    0.140    0.397    0.191
##     Tr2              -0.230    0.196   -1.174    0.241   -0.327   -0.150
##   Tr1 ~                                                                 
##     Tr2               0.000                               0.000    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .C_Bienveillanc    3.600    0.199   18.061    0.000    3.600    3.623
##    .C_Competence      4.436    0.162   27.430    0.000    4.436    5.676
##    .C_Donnes.nvgtn    3.685    0.231   15.972    0.000    3.685    3.410
##    .C_Ethique         3.473    0.151   22.956    0.000    3.473    4.665
##    .C_Infos.perso     4.188    0.231   18.105    0.000    4.188    3.777
##    .S_Intrss.pr.p.    4.113    0.161   25.611    0.000    4.113    4.919
##    .S_Promoter.scr    4.114    0.160   25.651    0.000    4.114    4.917
##    .S_Rcs.rpndnt..    4.411    0.175   25.187    0.000    4.411    4.856
##    .S_Stsfctn.glbl    4.378    0.168   26.074    0.000    4.378    5.044
##    .S_Tenttn.d.cht    3.836    0.220   17.437    0.000    3.836    3.394
##    .U_Comment         4.400    0.174   25.310    0.000    4.400    4.621
##    .U_Interets        4.800    0.159   30.157    0.000    4.800    5.506
##    .U_pourquoi        4.933    0.163   30.295    0.000    4.933    5.531
##    .U_Technique       3.900    0.179   21.840    0.000    3.900    3.987
##    .Tr1               0.367    0.088    4.168    0.000    0.367    0.761
##    .Confiance         0.000                               0.000    0.000
##    .Satisfaction      0.000                               0.000    0.000
##     Understanding     0.000                               0.000    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .C_Bienveillanc    0.494    0.135    3.651    0.000    0.494    0.500
##    .C_Competence      0.189    0.056    3.364    0.001    0.189    0.310
##    .C_Donnes.nvgtn    0.126    0.062    2.024    0.043    0.126    0.108
##    .C_Ethique         0.234    0.066    3.566    0.000    0.234    0.422
##    .C_Infos.perso     0.319    0.099    3.208    0.001    0.319    0.259
##    .S_Intrss.pr.p.    0.295    0.089    3.325    0.001    0.295    0.422
##    .S_Promoter.scr    0.312    0.092    3.376    0.001    0.312    0.445
##    .S_Rcs.rpndnt..    0.308    0.096    3.201    0.001    0.308    0.374
##    .S_Stsfctn.glbl    0.251    0.082    3.066    0.002    0.251    0.333
##    .S_Tenttn.d.cht    0.329    0.121    2.713    0.007    0.329    0.258
##    .U_Comment         0.078    0.054    1.455    0.146    0.078    0.086
##    .U_Interets        0.277    0.081    3.443    0.001    0.277    0.365
##    .U_pourquoi        0.225    0.070    3.209    0.001    0.225    0.283
##    .U_Technique       0.383    0.109    3.510    0.000    0.383    0.401
##    .Tr1               0.232    0.060    3.873    0.000    0.232    1.000
##    .Confiance         0.184    0.090    2.041    0.041    0.373    0.373
##    .Satisfaction      0.226    0.100    2.249    0.024    0.558    0.558
##     Understanding     0.829    0.238    3.475    0.001    1.000    1.000