Skip to contents

The genotype assignment function can be called with:

 system.time(mydata_new_with_genotype <- compute_genotype_labels(
        df = mydata, # dataframe input
        loci_column_start =15, ## The first locus column index
        loci_column_end = 40, ## The last locus column index
        NA_weight = 0,  ## The importance (=weight) of a "NA mismatch"
        n_thresholds = 2, 
        min_common_loci =6)) # minimum common non NA loci to have to allow the comparison of a sample pair. If greater, the pair is ignored and the genotype assignement uses all other possible pairs for these samples.

The discovery curve function can be called with:

discov_curv <- genotype_discovery_curve( mydata[,15:40], # genetics only
                                          max_loci = 13, # n microsatellites
                                          NA_weight = 0,
                                         n_thresholds = 3,
                                         n_bootstrap = 12,
                                         min_common_loci=1) #Where to start the curve # Too much more than nb of loci don't make sense!

plotting the curve can then be done with:

discov_curv %>% 
  ggplot(aes(x=num_loci, xend=num_loci, y=lower_CI, yend=upper_CI))+
  geom_segment()+
  geom_point(aes(x=num_loci, y=lower_CI), shape="-", size=6)+
  geom_point(aes(x=num_loci, y=upper_CI), shape="-", size=6)+
  scale_y_continuous(n.breaks = 4)+
  geom_line(aes(x=num_loci, y=median),linetype="dashed", size=0.5, color="#3b3b3b")+
  geom_segment()+
  geom_point(aes(x=num_loci, y=lower_CI), shape="-", size=6)+
  geom_point(aes(x=num_loci, y=upper_CI), shape="-", size=6)+
  facet_wrap(~ threshold, scales = "free_y")+
  ylab("Unique genotypes")+
  xlab("Nb of loci")+
  theme_bw() 

The probability of identity function can be called with: (in progress…)

 prob_identity(mydata = mydata,
  loci_column_start =15, ## The first locus column
  loci_column_end = 40) ## The last locus column

And plotted with:

prob$plot + theme_classic()+ 
 scale_color_manual(values = c("#852525", "#1e2aa7"))