## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup, message = FALSE, warning = FALSE----------------------------------
library(magrittr)
library(dplyr)
library(tidyr)
library(stringr)

## ----Spectronaut, eval=FALSE--------------------------------------------------
# # To read in your own data you can use read_protti()
# spectronaut_data <- read_protti(filename = "mydata/spectronaut.csv")

## ----MaxQuant_peptide, eval=FALSE---------------------------------------------
# # To read in your own data you can use read_protti()
# evidence <- read_protti(filename = "yourpath/evidence.txt")
# 
# evidence_proteotypic <- evidence %>%
#   # adds new column with logicals that are TRUE if the peptide can be assigned
#   # to only one protein and FALSE if it can be assigned to multiple
#   mutate(is_proteotypic = str_detect(
#     string = proteins,
#     pattern = ";",
#     negate = TRUE
#   )) %>%
#   # adds new column with logicals indicating if peptide is coming from a potential contaminant
#   mutate(is_contaminant = ifelse(potential_contaminant == "+", TRUE, FALSE))
# 
# # Make an annotation data frame and merge it with your data frame to obtain conditions
# # We are annotating sample 1-3 as controls and samples 4-6 as treated conditions
# 
# file_name <- c( # make sure that the names are the same name as in your report
#   "sample1",
#   "sample2",
#   "sample3",
#   "sample4",
#   "sample5",
#   "sample6"
# )
# 
# condition <- c(
#   "control",
#   "control",
#   "control",
#   "treated",
#   "treated",
#   "treated"
# )
# 
# annotation <- data.frame(file_name, condition)
# 
# # Combine your long data frame with the annotation
# evidence_annotated <- evidence_proteotypic %>%
#   left_join(y = annotation, by = "file_name")

## ----MaxQuant_protein, eval=FALSE---------------------------------------------
# # To read in your own data you can use read_protti()
# protein_groups <- read_protti(filename = "yourpath/proteinGroups.txt") %>%
#   # adds new column with logicals indicating if protein is a potential contaminant,
#   # you can filter these out later on. You should also consider filtering out proteins
#   # that were "only identified by site" and reverse hits, as well as proteins with only
#   # one identified peptide
#   mutate(is_potential_contaminant = ifelse(potential_contaminant == "+", TRUE, FALSE))
# 
# # Change wide format to long format and create new columns called `r_file_name`and `intensity`
# protein_groups_long <- protein_groups %>%
#   pivot_longer(
#     cols = starts_with("intensity_"),
#     names_to = "file_name",
#     values_to = "intensity"
#   )
# 
# # Make an annotation data frame and merge it with your data frame to obtain conditions
# # We are annotating sample 1-3 as controls and samples 4-6 as treated conditions
# 
# file_name <- c( # make sure that the names are the same name as in your report
#   "intensity_sample1",
#   "intensity_sample2",
#   "intensity_sample3",
#   "intensity_sample4",
#   "intensity_sample5",
#   "intensity_sample6"
# )
# 
# condition <- c(
#   "control",
#   "control",
#   "control",
#   "treated",
#   "treated",
#   "treated"
# )
# 
# annotation <- data.frame(file_name, condition)
# 
# # Combine your long data frame with the annotation
# protein_groups_annotated <- protein_groups_long %>%
#   left_join(y = annotation, by = "file_name")

## ----Skyline, eval=FALSE------------------------------------------------------
# # Load data
# skyline_data <- read_protti(filename = "yourpath/skyline.csv")
# 
# skyline_data_int <- skyline_data %>%
#   # create a column with precursor information
#   mutate(precursor = paste0(peptide_sequence, "_", charge)) %>%
#   group_by(replicate_name, precursor) %>%
#   # making a new column containing the summed up intensities of all transitions of one precursor
#   mutate(sum_intensity = sum(area)) %>%
#   select(-c(product_mz, area)) %>% # removing the columns we don't need
#   distinct() # removing duplicated rows from the data frame
# 
# # Add annotation
# # make sure that the names are the same name as in your report
# replicate_name <- c(
#   "sample_1",
#   "sample_2",
#   "sample_3",
#   "sample_1",
#   "sample_2",
#   "sample_3"
# )
# 
# condition <- c(
#   "control",
#   "control",
#   "control",
#   "treated",
#   "treated",
#   "treated"
# )
# 
# annotation <- data.frame(replicate_name, condition)
# 
# # Combine your long data frame with the annotation
# skyline_annotated <- skyline_data_int %>%
#   left_join(y = annotation, by = "replicate_name")

## ----Proteome_discoverer_pep, eval=FALSE--------------------------------------
# # Load data
# pd_pep_data <- read_protti("yourpath/PDpeptides.csv")
# 
# # Select relevant columns
# pd_pep_selected <- pd_pep_data %>%
#   select(
#     sequence,
#     modifications,
#     number_proteins,
#     contaminant,
#     master_protein_accessions,
#     starts_with("abundances_grouped"), # select all columns that start with "abundances_grouped"
#     quan_info
#   )
# 
# # Filter data frame
# pd_pep_filtered <- pd_pep_selected %>%
#   filter(contaminant == FALSE) %>% # remove annotated contaminants
#   filter(number_proteins == 1) %>% # select proteotypic peptides
#   filter(quan_info != "No Quan Values") # remove peptides that have no quantification values
# 
# # Convert into long format
# pd_pep_long <- pd_pep_filtered %>%
#   pivot_longer(
#     cols = starts_with("abundances"),
#     names_to = "file_name",
#     values_to = "intensity"
#   ) %>%
#   # combine peptide sequence and modifications to make a precursor column
#   mutate(precursor = paste(sequence, modifications))
# 
# # Make annotation data frame
# file_name <- c( # make sure that the names are the same name as in your report
#   "abundances_grouped_f1",
#   "abundances_grouped_f2",
#   "abundances_grouped_f3",
#   "abundances_grouped_f4",
#   "abundances_grouped_f5",
#   "abundances_grouped_f6"
# )
# 
# condition <- c(
#   "control",
#   "control",
#   "control",
#   "treated",
#   "treated",
#   "treated"
# )
# 
# annotation <- data.frame(file_name, condition)
# 
# # Combine your long data frame with the annotation
# pd_pep_long_annotated <- pd_pep_long %>%
#   left_join(y = annotation, by = "file_name")

## ----Proteome_discoverer_prot, eval=FALSE-------------------------------------
# # Load data
# pd_prot_data <- read_protti("yourpath/PDproteins.csv")
# 
# # Select relevant columns
# pd_prot_selected <- pd_prot_data %>%
#   select(
#     accession,
#     description,
#     contaminant,
#     number_peptides,
#     starts_with("abundances_grouped"), # select all columns that start with "abundances_grouped"
#   )
# 
# # Filter data frame
# pd_prot_data_filtered <- pd_prot_selected %>%
#   filter(contaminant == FALSE) %>% # remove annotated contaminants
#   filter(number_peptides > 1) # select proteins with more than one identified peptide
# 
# # Convert into long format
# pd_prot_long <- pd_prot_data_filtered %>%
#   pivot_longer(
#     cols = starts_with("abundances"),
#     names_to = "file_name",
#     values_to = "intensity"
#   )
# 
# # Make annotation data frame
# file_name <- c( # make sure that the names are the same name as in your report
#   "abundances_grouped_f1",
#   "abundances_grouped_f2",
#   "abundances_grouped_f3",
#   "abundances_grouped_f4",
#   "abundances_grouped_f5",
#   "abundances_grouped_f6"
# )
# 
# condition <- c(
#   "control",
#   "control",
#   "control",
#   "treated",
#   "treated",
#   "treated"
# )
# 
# annotation <- data.frame(file_name, condition)
# 
# # Combine your long data frame with the annotation
# pd_prot_long_annotated <- pd_prot_long %>%
#   left_join(y = annotation, by = "file_name")