knitr::opts_chunk$set(echo = TRUE, include=TRUE, message=FALSE, warning=FALSE)
library(tidyverse) # a swiss kniffe
library(udpipe) # syntaxis and lexical annotations
library(flextable) #for table rendering
library(ggwordcloud) #for ploting
library(cowplot) #for ploting
library(quanteda) #a complete text mining package
library(quanteda.textmodels)
library(quanteda.textstats)
library(quanteda.textplots)
library(quanteda.dictionaries)
library(syuzhet) #analyse du sentimeent
library(lubridate) #date processing
library(udpipe) #annotations et tokenization
library(igraph) #network representations
library(FactoMineR)
library(factoextra)
theme_set(theme_minimal())
t1=Sys.time()
User Experience project from DITP Services public+
Comments are relative to a life experience (renew a passport) at a certain time, with a specific administration in a certain place.
The score has been compute with a transformer model, we will examine it further at session 6. It is a probability to be positive.
#read the file and sample to reduce computation ( look at the end)
#we put the content of the csv file in a dataframe and select only certain columns
df <- read_csv("./Data/data_original_12062022_normalise_sentiment_transformers_lemna_words_adj.csv")%>%
select (id, date_ecrit, titre, description, ressenti, pays,intitule, canaux_typologie_1, transformer_sentiment_score)
# we treat the data
# the mutate function enables us to create or modify a column
# here, we manipulate data for correcting default of the text.
df<-df %>%
mutate(description=str_replace_all(description,"\\.(?=[A-Za-z])", ". "), #space after dot
description=str_replace(description,"_", " "), #remove underscore
description=str_replace(description,"(\u20AC)", "euro") )#change euro in euro
#%>% sample_n(1000) to avoid too long intermediary computing.
head(df, 5) #we display the first 5 rows of the data frame
## # A tibble: 5 x 9
## id date_ecrit titre descr~1 resse~2 pays intit~3 canau~4 trans~5
## <dbl> <date> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 280177 2019-03-22 Suivi de doss~ J'ai a~ Négatif Fran~ CAF Téléph~ 0.373
## 2 280174 2019-03-22 Délai de répo~ J'ai e~ Négatif Fran~ CAF Compte~ 0.0579
## 3 280171 2019-03-26 RSA et Prime ~ J’ai f~ Négatif Fran~ CAF Démarc~ 0.484
## 4 280169 2019-03-27 Nouvel alloca~ J'ai d~ Neutre Fran~ <NA> <NA> 0.116
## 5 280166 2019-03-28 CAF : Un manq~ J'habi~ Négatif Fran~ CAF Démarc~ 0.295
## # ... with abbreviated variable names 1: description, 2: ressenti, 3: intitule,
## # 4: canaux_typologie_1, 5: transformer_sentiment_score
We plot the transformer sentiment distribution, as a density and as a
cumulative distribution. The plot_grid() function provides
a simple interface for arranging plots into a grid and adding labels to
them, it is offered by cowplot.
g1<-ggplot(df,aes(x=transformer_sentiment_score))+
geom_density(fill="pink", alpha=.5)
g2<-ggplot(df, aes(x=transformer_sentiment_score)) +
stat_ecdf(geom = "step",pad = FALSE)+
xlim(0,1)
#cowplot
plot_grid(
g1, g2,
labels = "AUTO"
)
ggsave("./Images/sentiment1.jpeg", width = 28, height = 20, units = "cm")
Compare with ” smiles”
#score et "ressenti"
g2<-ggplot(df, aes(y=transformer_sentiment_score, x=ressenti)) +
geom_violin(fill="pink", alpha=.5)+
ylim(0,1)+
geom_smooth()+
labs(x=NULL, y = "Score transformer")
g2
ggsave("./Images/Sentiment2.jpeg", width = 28, height = 20, units = "cm")
Each experience is relative to different channels. As this variable is encoded as a list of multiple choice, we need to rebuild a table with with as many columns as the modalities of the channel variable.
## multiple choice question recoding
typo<-as.data.frame(str_split_fixed(df$canaux_typologie_1, ",", 10))%>%
mutate(across(where(is.character), str_trim)) %>%
cbind(df$id) %>%
rename(id=11) %>%
pivot_longer(-id, names_to = "valeur", values_to="variable") %>%
filter(variable!="") %>%
group_by(id, variable) %>%
summarise(valeur=1) %>%
pivot_wider(id,names_from="variable", values_from="valeur")%>%
replace(is.na(.), 0)
head(typo)
## # A tibble: 6 x 12
## # Groups: id [6]
## id Démar~1 Télép~2 E-mai~3 Accueil Compt~4 Courr~5 Tchat~6 Patro~7 Résea~8
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 268971 1 1 0 0 0 0 0 0 0
## 2 268976 0 1 1 0 0 0 0 0 0
## 3 268983 0 0 0 1 0 0 0 0 0
## 4 269003 0 1 0 0 0 0 0 0 0
## 5 269006 0 0 0 1 0 0 0 0 0
## 6 269007 1 1 0 0 1 0 0 0 0
## # ... with 2 more variables: `Visio/Video` <dbl>, `,` <dbl>, and abbreviated
## # variable names 1: `Démarche en ligne`, 2: Téléphone, 3: `E-mail`,
## # 4: `Compte ou espace personnel`, 5: Courrier,
## # 6: `Tchat (messagerie instantanée)`, 7: Patrouille, 8: `Réseaux Sociaux`
#count the number of rows
n_c<-nrow(typo)
n_c
## [1] 15839
More data processing for the ggplot.
foo<-typo %>%
pivot_longer(-id, names_to = "variable", values_to="value") %>%
group_by(variable)%>%
summarise(Penetration=mean(value)) %>%filter(variable!=",")
ggplot(foo, aes(x=reorder(variable, Penetration),y=Penetration))+
geom_bar(stat="identity",fill="firebrick")+
coord_flip()+
labs(x=NULL, y="taux de pénétration")
ggsave("./Images/channel.jpeg", width = 28, height = 20, units = "cm")
A more deeper exploration
#cooccurence matrix
foo1 <- as.data.frame(typo)%>%
select(2:11)%>%
as.matrix()
out <- crossprod(foo1) # Same as: t(X) %*% X
#diag(out) <- 0 # (b/c you don't count co-occurrences of an aspect with itself)
out<- as.data.frame(out) %>%
rownames_to_column(var="Channels")%>%
pivot_longer(-Channels,names_to = "Channelsb", values_to = "frequency")
ggplot(data = out, aes(x=Channels, y=Channelsb, fill=log10(frequency))) +
geom_tile()+
scale_fill_gradient(low = "Gold", high = "Firebrick", name="Frequency")+
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1))+
labs(x=NULL, y=NULL)
# compare to a theoretical distribution
Y<-foo %>%
column_to_rownames(var="variable")%>%
as.matrix()
n=nrow(typo)
Z<-Y%*%t(Y) %>%as.data.frame()%>%
rownames_to_column(var="Channels")%>%
pivot_longer(-Channels,names_to = "Channelsb", values_to = "frequencyT")%>%
mutate(frequencyT=frequencyT*n)
w<- merge(out,Z) %>%
mutate(chi2=(frequency-frequencyT)^2/frequencyT)%>%
mutate(chi2=ifelse(Channels==Channelsb,0,chi2))
ggplot(data = w, aes(x=Channels, y=Channelsb, fill=chi2)) +
geom_tile()+
scale_fill_gradient(low = "Gold", high = "Firebrick", name="Chi2")+
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1))+
labs(x=NULL, y=NULL)
Each experience concern one administrations. He is the list of most populuous administration.
#typo
foo<-typo%>%
select(-id)
#service
t<-as.data.frame(table(df$intitule)) %>%
filter(Freq>50)
ggplot(t, aes(x=reorder(Var1, Freq),y=Freq))+
geom_bar(stat="identity",fill="firebrick")+
coord_flip()+
labs(x=NULL, y="n")
ggsave("./Images/channel_freq.jpeg", width = 28, height = 20, units = "cm")
Sentiment score comparison
foo<-df%>%mutate(n=1)%>%
group_by(intitule)%>%
summarise(score=mean(transformer_sentiment_score,na.rm=TRUE),n=sum(n)) %>%
filter(!is.na(intitule))%>%
filter(n>50)
ggplot(foo, aes(x=reorder(intitule,score), y=score ))+
geom_bar(stat="identity",aes(fill=desc(n)))+
coord_flip()
ggsave("./Images/channel_sent.jpeg", width = 28, height = 20, units = "cm")
foo<-merge(df,typo) %>%
select(7,11:19,5) %>%
mutate(across(where(is.numeric), as.character))
res.mca <- MCA(foo,quali.sup=c(1, 11))
fviz_mca_var(res.mca,
repel = TRUE, # Avoid text overlapping (slow)
ggtheme = theme_minimal(),
labelsize = 2)
ggsave("./Images/channel_MCA.jpeg", width = 28, height = 20, units = "cm")
Stylometry are the (old) set of technics that process the text as a whole and give information on a particular feature of style.
the number of words per review.
df$n_words<-str_count(df$description)
ggplot(df,aes(x=n_words))+
geom_density()+
scale_x_log10()
ggsave("./Images/count.jpeg", width = 28, height = 20, units = "cm")
Evolution of the number of reviews, size of reviews, and correlation between.
foo<-df %>%
group_by(date_ecrit)%>%
summarise(n=n(),
size_t=mean(n_words))
ggplot(foo, aes(x=date_ecrit, y=n))+
geom_line()+
scale_y_log10()+
geom_smooth()
ggplot(foo, aes(x=date_ecrit, y=size_t))+
geom_point()+
scale_y_log10()+
geom_smooth()
ggplot(foo, aes(x=n, y=size_t))+
geom_point()+
scale_x_log10()+ scale_y_log10()+
geom_smooth()
On agrège sur le mois, avec des fonctions lubridate
#la fonction de calcul de lisibilité
readability<-textstat_readability(df$description,
measure = c("Flesch",
"meanSentenceLength",
"meanWordSyllables"))
foo<-cbind(df[,2],readability[,2:4])
foo$date<-as.POSIXct(foo$date)
foo1<-foo %>%
dplyr::mutate(Year=year(date_ecrit), Month=month(date_ecrit), date=my(paste(Month, Year)))%>%
select(-Month, -date_ecrit,-Year)%>%
group_by(date) %>%
summarise(Flesch=mean(Flesch, na.rm=TRUE),
SentenceLength= mean(meanSentenceLength, na.rm=TRUE),
WordSyllables= mean(meanWordSyllables, na.rm=TRUE))
## Warning: 1 failed to parse.
foo2<-foo1 %>%
pivot_longer(-date,names_to="Variable", values_to="Score")%>%
drop_na()
ggplot(foo2,aes(x=date, y=Score, group=Variable))+
geom_line(size=1.2, aes(color=Variable), stat="identity")+
facet_wrap(vars(Variable), scale="free", ncol=1)+
labs(title = "Experience readability", x=NULL, y=NULL)
lexdiv<-tokens(df$description)%>%
textstat_lexdiv(df$text, measure = c("CTTR", "Maas"), log.base = 10,
remove_numbers = TRUE,
remove_punct = TRUE,
remove_symbols = TRUE,
remove_hyphens = TRUE)
foo<-cbind(df,lexdiv[,2:5])
foo1<-foo %>% mutate(Year=year(date_ecrit), Month=month(date_ecrit), date=my(paste(Month, Year)))%>%
group_by(date) %>%
summarise(CTTR=mean(CTTR, na.rm=TRUE),
Maas= mean(Maas, na.rm=TRUE)) %>%
pivot_longer(-date,names_to="Variable", values_to="Score")
ggplot(foo1,aes(x=date, y=Score, group=Variable))+
geom_line(size=1.2, aes(color=Variable), stat="identity")+
facet_wrap(vars(Variable), scale="free", ncol=1)+
labs(title = "Lexical diversity", x=NULL, y=NULL)
foo1<-foo %>%
mutate(Year=year(date_ecrit), Month=month(date_ecrit), date=my(paste(Month, Year)))%>%
group_by(date) %>%
summarise(CTTR=mean(CTTR, na.rm=TRUE),
Maas= mean(Maas, na.rm=TRUE))
cor(foo1$CTTR, foo1$Maas)
## [1] 0.5988366
ggplot(foo1,aes(x=CTTR, y=Maas))+
geom_point(size=1.2, aes(color=date), stat="identity")+
labs(title = "Lexical diversity", x=NULL, y=NULL)+geom_smooth(method="lm")
a very simple function.
#library(syuzhet) analyse du sentimeent
#paramétres
method <- "nrc"
lang <- "french"
phrase<-as.character(paste0(df$titre,". ",df$description))
#extraction
emotions <- get_nrc_sentiment(phrase,language = "french")
Now we plot the evolution
emotion<-emotions[,1:8]
polarity<-subset(emotions,select=c(positive, negative))
foo<-cbind(df,polarity)%>%
mutate(Year=year(date_ecrit), Month=month(date_ecrit), date=my(paste(Month, Year)))
#mean per
foo1<-foo %>%
mutate(Year=year(date_ecrit), Month=month(date_ecrit), date=my(paste(Month, Year)))%>%
mutate(positive=positive/n_words,
negative=negative/n_words)%>%
group_by(date) %>%
summarise(positive=mean(positive, na.rm=TRUE),
negative= -mean(negative, na.rm=TRUE),
valence=positive+negative,
expressivity=positive-negative) %>%
pivot_longer(-date,names_to="Variable", values_to="Score")
ggplot(foo1,aes(x=date, y=Score, group=Variable))+
geom_line(size=1.2, aes(color=Variable), stat="identity")+
labs(title = "Sentiment", x=NULL, y=NULL)+
scale_colour_manual(values=c("Orange"," Red", "Darkgreen","Grey"))
#library("quanteda.dictionaries")
dict_liwc_french <- dictionary(file = "FrenchLIWCDictionary.dic",
format = "LIWC")
test<-liwcalike(df$description,dictionary = dict_liwc_french) %>%
select(je,vous, il, ils, pronomimp)
foo<-cbind(df,test)
foo1<-foo %>%
mutate(Year=year(date_ecrit), Month=month(date_ecrit), date=my(paste(Month, Year)))%>%
group_by(date) %>%
summarise(je=mean(je, na.rm=TRUE),
vous= mean(vous, na.rm=TRUE),
il_s=mean(il+ils,na.rm=TRUE),
pronomimp=mean(pronomimp,na.rm=TRUE)) %>%
pivot_longer(-date,names_to="Variable", values_to="Score")
ggplot(foo1,aes(x=date, y=Score, group=Variable))+
geom_line(size=1.2, aes(color=Variable), stat="identity")+
labs(title = "Sentiment", x=NULL, y=NULL)+
scale_colour_manual(values=c("Orange"," Red", "Darkgreen","Grey"))
Just building list of words and given them a weight.
(we could improve the list of word and test availability of regex formulas)
my_text <- df$description
method <- "custom"
custom_lexicon <- data.frame(word=c("impot", "impôt","impots", "impôts", "taxe","taxes", "fisc", "fiscal", "fiscales", " fiscaux", "fiscalité", "redevance"),
value=c(1,1,1,1,1,1,1,1,1,1,1,1))
custom_distrib <- get_sentiment(my_text, method = method, lexicon = custom_lexicon)
custom_distrib<-as.data.frame(custom_distrib)
ggplot(custom_distrib,aes(x=custom_distrib))+geom_histogram()+scale_y_log10()
foo<-cbind(df,custom_distrib)
foo1<-foo %>%
mutate(Year=year(date_ecrit), Month=month(date_ecrit), date=my(paste(Month, Year)))%>%
group_by(date) %>%
summarise(custom_distrib=mean(custom_distrib, na.rm=TRUE))
ggplot(foo1,aes(x=date, y=custom_distrib))+
geom_line(size=1.2,stat="identity")+
labs(title = "Fisc", x=NULL, y=NULL)+
scale_colour_manual(values=c("Orange"," Red", "Darkgreen","Grey"))
Annotation are apply at the level of words : lexical (stem, lemma, Wordnet) and syntactic annotations ( PoS, syntaxis dependences).
The very first step. Useful to set the vocabulary
#df$text<-str_replace(df$text, "\\w+", "J ") # trouver la solution!!!! pour le '
corpus<-corpus(df,id_field = "id",text_field = "description")
foo<-tokens(corpus,remove_punct = TRUE, remove_symbols=TRUE, remove_numbers=TRUE)%>%
tokens_remove(stopwords("french"))
head(foo,5)
## Tokens consisting of 5 documents and 9 docvars.
## text1 :
## [1] "J'ai" "accouché" "février" "fille" "n'est" "encore"
## [7] "inscrite" "CAF" "Pourtant" "j'ai" "fait" "démarches"
## [ ... and 41 more ]
##
## text2 :
## [1] "J'ai" "envoyé" "messages" "attentes" "réponse"
## [6] "CAF" "demande" "date" "autre" "date"
## [11] "S'agissant" "questions"
## [ ... and 7 more ]
##
## text3 :
## [1] "J'ai" "fait" "demande" "RSA" "ligne" "trompé" "cases"
## [8] "m'a" "répondu" "n'avais" "droit" "RSA"
## [ ... and 53 more ]
##
## text4 :
## [1] "J'ai" "déménagé" "l'Hérault" "Gard" "mois" "janvier"
## [7] "dossier" "n'a" "toujours" "traité" "alors" "j'ai"
## [ ... and 11 more ]
##
## text5 :
## [1] "J'habite" "village" "bout" "département" "permanence"
## [6] "CAF" "après" "avoir" "subi" "l'annulation"
## [11] "rendez-vous" "permanence"
## [ ... and 147 more ]
foo1 <-unlist_tokens(foo)
dim(foo1)
## [1] 869021 2
foo2<-foo1 %>%
group_by(token)%>%
summarise(n=n())%>%
mutate(rank=rank(desc(n)))
dim(foo2)
## [1] 44512 3
ggplot(foo2, aes(x=rank,y=n))+
geom_point(alpha=.2)+geom_smooth(method=lm)+
scale_x_log10()+
scale_y_log10()+
labs(title = "Zipf like")
#with cleaning
dfmat1 <- dfm(foo,
remove = stopwords("french"), remove_punct = TRUE) %>%
dfm_trim(min_termfreq = 3)
textplot_wordcloud(dfmat1, max_words = 50)
dfmat2 <- dfm(corpus_subset(corpus, intitule == "CAF"),
remove = stopwords("french"), remove_punct = TRUE) %>%
dfm_trim(min_termfreq = 3)
textplot_wordcloud(dfmat2, max_words = 150)
ggsave("./Images/worcloud.jpeg", width = 28, height = 20, units = "cm")
looking for expressions
#on sélectionne les mots commençant par une majuscule
toks_cap <- tokens_select(foo,
pattern = "^[A-Z]",
valuetype = "regex",
case_insensitive = FALSE,
padding = TRUE)
#on cherche les collocations
tstat_col_cap <- textstat_collocations(toks_cap, min_count = 3, tolower = FALSE)
#head(as.data.frame(tstat_col_cap),15)
toks_comp <- tokens_compound(foo, pattern = tstat_col_cap[tstat_col_cap$lamba > 10],
case_insensitive = FALSE)
head(toks_comp)
## Tokens consisting of 6 documents and 9 docvars.
## text1 :
## [1] "J'ai" "accouché" "février" "fille" "n'est" "encore"
## [7] "inscrite" "CAF" "Pourtant" "j'ai" "fait" "démarches"
## [ ... and 41 more ]
##
## text2 :
## [1] "J'ai" "envoyé" "messages" "attentes" "réponse"
## [6] "CAF" "demande" "date" "autre" "date"
## [11] "S'agissant" "questions"
## [ ... and 7 more ]
##
## text3 :
## [1] "J'ai" "fait" "demande" "RSA" "ligne" "trompé" "cases"
## [8] "m'a" "répondu" "n'avais" "droit" "RSA"
## [ ... and 53 more ]
##
## text4 :
## [1] "J'ai" "déménagé" "l'Hérault" "Gard" "mois" "janvier"
## [7] "dossier" "n'a" "toujours" "traité" "alors" "j'ai"
## [ ... and 11 more ]
##
## text5 :
## [1] "J'habite" "village" "bout" "département" "permanence"
## [6] "CAF" "après" "avoir" "subi" "l'annulation"
## [11] "rendez-vous" "permanence"
## [ ... and 147 more ]
##
## text6 :
## [1] "J'avais" "rendez-vous" "CAF" "Gard" "Celui-ci"
## [6] "s'est" "très" "bien" "passé" "explications"
## [11] "claires" "concises"
## [ ... and 2 more ]
library(udpipe)
fr <- udpipe_download_model(language = "french")
udmodel_french <- udpipe_load_model(file = "french-gsd-ud-2.5-191206.udpipe")
UD <- udpipe_annotate(udmodel_french, x=df$description, trace =1000,parallel.cores = 4)
## 2022-10-30 16:26:19 Annotating text fragment 1/16576
## 2022-10-30 16:29:31 Annotating text fragment 1001/16576
## 2022-10-30 16:33:04 Annotating text fragment 2001/16576
## 2022-10-30 16:36:20 Annotating text fragment 3001/16576
## 2022-10-30 16:39:08 Annotating text fragment 4001/16576
## 2022-10-30 16:41:43 Annotating text fragment 5001/16576
## 2022-10-30 16:44:15 Annotating text fragment 6001/16576
## 2022-10-30 16:46:45 Annotating text fragment 7001/16576
## 2022-10-30 16:48:55 Annotating text fragment 8001/16576
## 2022-10-30 16:51:08 Annotating text fragment 9001/16576
## 2022-10-30 16:53:29 Annotating text fragment 10001/16576
## 2022-10-30 16:55:42 Annotating text fragment 11001/16576
## 2022-10-30 16:58:07 Annotating text fragment 12001/16576
## 2022-10-30 17:01:17 Annotating text fragment 13001/16576
## 2022-10-30 17:03:50 Annotating text fragment 14001/16576
## 2022-10-30 17:06:15 Annotating text fragment 15001/16576
## 2022-10-30 17:08:33 Annotating text fragment 16001/16576
UD <- as.data.frame(UD)
saveRDS(UD, "./Data/UD.rds")
Let examine the content : lemma per POS.
UD<-readRDS("./Data/UD.rds")
foo<-UD%>%
group_by(upos)%>%
summarise(n=n())%>%
ggplot(aes(x=reorder(upos,n), y=n))+
geom_bar(stat = "identity")+coord_flip()
foo
foo<-UD %>%
filter(upos=="NOUN") %>%
group_by(lemma)%>%
summarise(n=n()) %>%
filter(n>1000)%>%
ggplot(aes(x=reorder(lemma,n), y=n))+
geom_point(size=1.5, fill="blue3")+coord_flip()
foo
foo<-UD %>%
filter(upos=="ADJ" |upos=="VERB") %>%
group_by(lemma, upos)%>%
summarise(n=n()) %>%
filter(n>300)%>%
ggplot(aes(label = lemma, size = log(n), group=upos)) +
geom_text_wordcloud(aes(color=upos)) +
theme_minimal()+
facet_wrap(vars(upos))
foo
A few `dplyr gymnastics’, and thats already the session 2 .
We follow for idea of code.
for dataviz igraph is the tool.
foo1<-UD %>%
mutate(id=paste0(doc_id,paragraph_id,sentence_id,token_id))%>%
select(id, lemma)%>%
rename(noun=lemma)
foo<-UD %>%
filter(dep_rel=="amod")%>%
mutate(id=paste0(doc_id,paragraph_id,sentence_id,head_token_id))%>%
left_join(foo1)%>%
select(id, lemma, noun)%>%
rename(adj=lemma)%>%
group_by(noun,adj)%>%
summarise(n=n())%>%
filter(n>50)
#A Correspondance Analysis solution?
#igraph approach belong to the netx lesson
library(igraph)
g <- graph.data.frame(foo, directed=FALSE)
V(g)$type <- bipartite_mapping(g)$type ## Add the "type" attribute
V(g)$label.color <- ifelse(V(g)$type, "salmon4", "blue2")
V(g)$fill <- ifelse(V(g)$type, "salmon4", "blue2")
V(g)$shape <-ifelse(V(g)$type, "circle", "square")
plot(g,vertex.label.cex = 0.8)
Beware to computing time! Best to sample for testing. (come back to the beginning)
t2=Sys.time()
t<- t2-t1
print(t)
## Time difference of 1.058051 hours
We save the annotated dataset in a format *.rds for further usage.
df_work<-cbind(df,readability, lexdiv,emotions,test)
write_rds(df_work,"./Data/df_work.rds")
See you tomorrow and go to session 2
Some exercises before, for training :