% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/TargetingModels.R
\name{createMutabilityMatrix}
\alias{createMutabilityMatrix}
\title{Builds a mutability model}
\usage{
createMutabilityMatrix(
  db,
  substitutionModel,
  model = c("s", "rs"),
  sequenceColumn = "sequence_alignment",
  germlineColumn = "germline_alignment_d_mask",
  vCallColumn = "v_call",
  multipleMutation = c("independent", "ignore"),
  minNumSeqMutations = 500,
  numSeqMutationsOnly = FALSE
)
}
\arguments{
\item{db}{data.frame containing sequence data.}

\item{substitutionModel}{matrix of 5-mer substitution rates built by 
\link{createSubstitutionMatrix}. Note, this model will
only impact mutability scores when \code{model="s"}
(using only silent mutations).}

\item{model}{type of model to create. The default model, "s", 
builds a model by counting only silent mutations. \code{model="s"}
should be used for data that includes functional sequences.
Setting \code{model="rs"} creates a model by counting both 
replacement and silent mutations and may be used on fully 
non-functional sequence data sets.}

\item{sequenceColumn}{name of the column containing IMGT-gapped sample sequences.}

\item{germlineColumn}{name of the column containing IMGT-gapped germline sequences.}

\item{vCallColumn}{name of the column containing the V-segment allele call.}

\item{multipleMutation}{string specifying how to handle multiple mutations occuring 
within the same 5-mer. If \code{"independent"} then multiple 
mutations within the same 5-mer are counted indepedently. 
If \code{"ignore"} then 5-mers with multiple mutations are 
excluded from the total mutation tally.}

\item{minNumSeqMutations}{minimum number of mutations in sequences containing each 5-mer
to compute the mutability rates. If the number is smaller 
than this threshold, the mutability for the 5-mer will be 
inferred. Default is 500. Not required if 
\code{numSeqMutationsOnly=TRUE}.}

\item{numSeqMutationsOnly}{when \code{TRUE}, return only a vector counting the number of 
observed mutations in sequences containing each 5-mer. This 
option can be used for parameter tuning for \code{minNumSeqMutations} 
during preliminary analysis using \link{minNumSeqMutationsTune}. 
Default is \code{FALSE}.}
}
\value{
When \code{numSeqMutationsOnly} is \code{FALSE}, a \code{MutabilityModel} containing a
          named numeric vector of 1024 normalized mutability rates for each 5-mer motif with names 
          defining the 5-mer nucleotide sequence.
          
          When \code{numSeqMutationsOnly} is \code{TRUE}, a named numeric
          vector of length 1024 counting the number of observed mutations in sequences containing 
          each 5-mer.
}
\description{
\code{createMutabilityMatrix} builds a 5-mer nucleotide mutability model by counting 
the number of mutations occuring in the center position for all 5-mer motifs.
}
\details{
\strong{Caution: The targeting model functions do NOT support ambiguous 
          characters in their inputs. You MUST make sure that your input and germline
          sequences do NOT contain ambiguous characters (especially if they are 
          clonal consensuses returned from \code{collapseClones}).}
}
\examples{
\donttest{
# Subset example data to 50 sequences of one isotype and sample as a demo
data(ExampleDb, package="alakazam")
db <- subset(ExampleDb, c_call == "IGHA" & sample_id == "-1h")[1:50,]

# Create model using only silent mutations
sub_model <- createSubstitutionMatrix(db, sequenceColumn="sequence_alignment",
                                      germlineColumn="germline_alignment_d_mask",
                                      vCallColumn="v_call",model="s")
mut_model <- createMutabilityMatrix(db, sub_model, model="s", 
                                    sequenceColumn="sequence_alignment",
                                    germlineColumn="germline_alignment_d_mask",
                                    vCallColumn="v_call",
                                    minNumSeqMutations=200,
                                    numSeqMutationsOnly=FALSE)
                                    
# View top 5 mutability estimates
head(sort(mut_model, decreasing=TRUE), 5)

# View the number of S mutations used for estimating mutabilities
mut_model@numMutS

# Count the number of mutations in sequences containing each 5-mer
mut_count <- createMutabilityMatrix(db, sub_model, model="s", 
                                    sequenceColumn="sequence_alignment",
                                    germlineColumn="germline_alignment_d_mask",
                                    vCallColumn="v_call",
                                    numSeqMutationsOnly=TRUE)
}

}
\references{
\enumerate{
  \item  Yaari G, et al. Models of somatic hypermutation targeting and substitution based
           on synonymous mutations from high-throughput immunoglobulin sequencing data. 
           Front Immunol. 2013 4(November):358.
 }
}
\seealso{
\link{MutabilityModel}, \link{extendMutabilityMatrix}, \link{createSubstitutionMatrix}, 
          \link{createTargetingMatrix}, \link{createTargetingModel},
          \link{minNumSeqMutationsTune}
}
