libStatGen Software  1
SamFilter.h
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef __SAM_FILTER_H__
19 #define __SAM_FILTER_H__
20 
21 #include "SamRecord.h"
22 #include "GenomeSequence.h"
23 
24 /// Class for helping to filter a SAM/BAM record.
25 class SamFilter
26 {
27 public:
28  /// Enum describing what sort of filtering was done.
29  enum FilterStatus {
30  NONE, ///< The filter did not affect the read.
31  CLIPPED, ///< Filtering clipped the read.
32  FILTERED ///< Filtering caused the read to be modified to unmapped.
33  };
34 
35  /// Clip the read based on the specified mismatch threshold.
36  /// \return how the read was affected,
37  /// NONE if the read was not modified,
38  /// CLIPPED if the read was clipped,
39  /// FILTERED if the whole read would have been clipped so instead the
40  /// read was modified to unmapped.
42  GenomeSequence& refSequence,
43  double mismatchThreshold);
44 
45  /// Soft clip the record from the front and/or the back.
46  /// \param record record to be clipped (input/output parameter).
47  /// \param numFrontClips number of bases that should be clipped from the
48  /// front of the sequence read. (total count, including any that are
49  /// already clipped.)
50  /// \param backClipPos number of bases that should be clipped from the
51  /// back of the sequence read. (total count, including any that are
52  /// already clipped.)
53  static FilterStatus softClip(SamRecord& record,
54  int32_t numFrontClips,
55  int32_t numBackClips);
56 
57  /// Soft clip the cigar from the front and/or the back, writing the value
58  /// into the new cigar, updatedCigar & startPos are only updated if
59  /// the return FilterStatus is CLIPPED.
60  /// \param oldCigar cigar prior to clipping
61  /// \param numFrontClips number of bases that should be clipped from the
62  /// front of the sequence read. (total count, including any that are
63  /// already clipped.)
64  /// \param numBackClips number of bases that should be clipped from the
65  /// back of the sequence read. (total count, including any that are
66  /// already clipped.)
67  /// \param startPos 0-based start position associated with the
68  /// cigar prior to updating (input) and set to the 0-based start position
69  /// after updating (output) the cigar if it was CLIPPED.
70  /// \param updatedCigar set to the clipped cigar if CLIPPED (output param).
71  static FilterStatus softClip(Cigar& oldCigar,
72  int32_t numFrontClips,
73  int32_t numBackClips,
74  int32_t& startPos,
75  CigarRoller& updatedCigar);
76 
77  /// Filter the read based on the specified quality threshold.
78  /// \return how the read was affected,
79  /// NONE if the read was not modified,
80  /// FILTERED if the read was modified to unmapped because it was over
81  /// the quality threshold.
83  GenomeSequence& refSequence,
84  uint32_t qualityThreshold,
85  uint8_t defaultQualityInt);
86 
87  /// Get the sum of the qualities of all mismatches in the record.
88  /// \param record record on which to calculate the sum the mismatch qualities
89  /// \param refSequence reference to use to check for mismatches.
90  /// \param defaultQualityInt default value to use for the quality if no
91  /// quality was specified in the read.
92  /// \return sum of the qualities of mismatches
93  static uint32_t sumMismatchQuality(SamRecord& record,
94  GenomeSequence& refSequence,
95  uint8_t defaultQualityInt);
96 
97  /// Filter the read by marking it as unmapped.
98  static void filterRead(SamRecord& record);
99 };
100 
101 #endif
102 
Filtering clipped the read.
Definition: SamFilter.h:31
static uint32_t sumMismatchQuality(SamRecord &record, GenomeSequence &refSequence, uint8_t defaultQualityInt)
Get the sum of the qualities of all mismatches in the record.
Definition: SamFilter.cpp:451
The filter did not affect the read.
Definition: SamFilter.h:30
Class for helping to filter a SAM/BAM record.
Definition: SamFilter.h:25
This class represents the CIGAR without any methods to set the cigar (see CigarRoller for that)...
Definition: Cigar.h:83
static void filterRead(SamRecord &record)
Filter the read by marking it as unmapped.
Definition: SamFilter.cpp:486
static FilterStatus filterOnMismatchQuality(SamRecord &record, GenomeSequence &refSequence, uint32_t qualityThreshold, uint8_t defaultQualityInt)
Filter the read based on the specified quality threshold.
Definition: SamFilter.cpp:430
Filtering caused the read to be modified to unmapped.
Definition: SamFilter.h:32
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.
FilterStatus
Enum describing what sort of filtering was done.
Definition: SamFilter.h:29
static FilterStatus clipOnMismatchThreshold(SamRecord &record, GenomeSequence &refSequence, double mismatchThreshold)
Clip the read based on the specified mismatch threshold.
Definition: SamFilter.cpp:27
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record...
Definition: SamRecord.h:51
static FilterStatus softClip(SamRecord &record, int32_t numFrontClips, int32_t numBackClips)
Soft clip the record from the front and/or the back.
Definition: SamFilter.cpp:155
The purpose of this class is to provide accessors for setting, updating, modifying the CIGAR object...
Definition: CigarRoller.h:66