qc_read.Rd
Read FastQC data into R.
qc_read(file, modules = "all", verbose = TRUE)
file | Path to the file to be imported. Can be the path to either :
|
---|---|
modules | Character vector containing the names of FastQC modules for which you want to import/inspect the data. Default is all. Allowed values include one or the combination of:
Partial match of module names allowed. For example, you can use modules = "GC content", instead of the full names modules = "Per sequence GC content". |
verbose | logical value. If TRUE, print filename when reading. |
Returns a list of tibbles containing the data for specified modules.
# Demo file qc.file <- system.file("fastqc_results", "S1_fastqc.zip", package = "fastqcr") qc.file#> [1] "/Users/kassambara/Documents/R/MyPackages/fastqcr/inst/fastqc_results/S1_fastqc.zip"# Read all modules qc_read(qc.file)#>#> $summary #> [90m# A tibble: 12 x 3[39m #> status module sample #> [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m #> [90m 1[39m PASS Basic Statistics S1.fastq #> [90m 2[39m PASS Per base sequence quality S1.fastq #> [90m 3[39m PASS Per tile sequence quality S1.fastq #> [90m 4[39m PASS Per sequence quality scores S1.fastq #> [90m 5[39m FAIL Per base sequence content S1.fastq #> [90m 6[39m WARN Per sequence GC content S1.fastq #> [90m 7[39m PASS Per base N content S1.fastq #> [90m 8[39m WARN Sequence Length Distribution S1.fastq #> [90m 9[39m PASS Sequence Duplication Levels S1.fastq #> [90m10[39m PASS Overrepresented sequences S1.fastq #> [90m11[39m PASS Adapter Content S1.fastq #> [90m12[39m PASS Kmer Content S1.fastq #> #> $basic_statistics #> [90m# A tibble: 8 x 2[39m #> `Basic Statistics` pass #> [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m #> [90m1[39m Measure Value #> [90m2[39m Filename S1.fastq #> [90m3[39m File type Conventional base calls #> [90m4[39m Encoding Sanger / Illumina 1.9 #> [90m5[39m Total Sequences 50299587 #> [90m6[39m Sequences flagged as poor quality 0 #> [90m7[39m Sequence length 35-76 #> [90m8[39m %GC 48 #> #> $per_base_sequence_quality #> [90m# A tibble: 43 x 7[39m #> Base Mean Median `Lower Quartile` `Upper Quartile` `10th Percentil… #> [3m[90m<chr>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m #> [90m 1[39m 1 31.2 32 32 32 32 #> [90m 2[39m 2 31.5 32 32 32 32 #> [90m 3[39m 3 31.7 32 32 32 32 #> [90m 4[39m 4 31.7 32 32 32 32 #> [90m 5[39m 5 31.7 32 32 32 32 #> [90m 6[39m 6 35.3 36 36 36 36 #> [90m 7[39m 7 35.3 36 36 36 36 #> [90m 8[39m 8 35.3 36 36 36 36 #> [90m 9[39m 9 35.3 36 36 36 36 #> [90m10[39m 10-11 35.3 36 36 36 36 #> [90m# ... with 33 more rows, and 1 more variable: `90th Percentile` [3m[90m<dbl>[90m[23m[39m #> #> $per_tile_sequence_quality #> [90m# A tibble: 18,576 x 3[39m #> Tile Base Mean #> [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m [3m[90m<dbl>[39m[23m #> [90m 1[39m [4m1[24m[4m1[24m101 1 0.175 #> [90m 2[39m [4m1[24m[4m1[24m101 2 0.047[4m8[24m #> [90m 3[39m [4m1[24m[4m1[24m101 3 0.066[4m8[24m #> [90m 4[39m [4m1[24m[4m1[24m101 4 0.055[4m8[24m #> [90m 5[39m [4m1[24m[4m1[24m101 5 0.048[4m5[24m #> [90m 6[39m [4m1[24m[4m1[24m101 6 0.019[4m4[24m #> [90m 7[39m [4m1[24m[4m1[24m101 7 0.104 #> [90m 8[39m [4m1[24m[4m1[24m101 8 0.062[4m9[24m #> [90m 9[39m [4m1[24m[4m1[24m101 9 0.103 #> [90m10[39m [4m1[24m[4m1[24m101 10-11 0.058[4m0[24m #> [90m# ... with 18,566 more rows[39m #> #> $per_sequence_quality_scores #> [90m# A tibble: 34 x 2[39m #> Quality Count #> [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m #> [90m 1[39m 2 75 #> [90m 2[39m 3 0 #> [90m 3[39m 4 0 #> [90m 4[39m 5 0 #> [90m 5[39m 6 0 #> [90m 6[39m 7 0 #> [90m 7[39m 8 0 #> [90m 8[39m 9 0 #> [90m 9[39m 10 0 #> [90m10[39m 11 0 #> [90m# ... with 24 more rows[39m #> #> $per_base_sequence_content #> [90m# A tibble: 43 x 5[39m #> Base G A T C #> [3m[90m<chr>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m #> [90m 1[39m 1 24.1 27.4 24.5 24.0 #> [90m 2[39m 2 23.5 27.2 25.5 23.8 #> [90m 3[39m 3 23.2 25.8 26.3 24.7 #> [90m 4[39m 4 23.5 25.9 26.2 24.3 #> [90m 5[39m 5 23.7 26.3 26.1 23.9 #> [90m 6[39m 6 24.3 25.4 25.6 24.7 #> [90m 7[39m 7 24.1 25.7 26.1 24.1 #> [90m 8[39m 8 23.5 25.8 26.2 24.5 #> [90m 9[39m 9 23.5 25.6 26.4 24.5 #> [90m10[39m 10-11 23.6 25.9 26.4 24.1 #> [90m# ... with 33 more rows[39m #> #> $per_sequence_gc_content #> [90m# A tibble: 101 x 2[39m #> `GC Content` Count #> [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m #> [90m 1[39m 0 81 #> [90m 2[39m 1 44 #> [90m 3[39m 2 14 #> [90m 4[39m 3 39.5 #> [90m 5[39m 4 58 #> [90m 6[39m 5 78.5 #> [90m 7[39m 6 143 #> [90m 8[39m 7 264. #> [90m 9[39m 8 342. #> [90m10[39m 9 428. #> [90m# ... with 91 more rows[39m #> #> $per_base_n_content #> [90m# A tibble: 43 x 2[39m #> Base `N-Count` #> [3m[90m<chr>[39m[23m [3m[90m<dbl>[39m[23m #> [90m 1[39m 1 0.063[4m4[24m #> [90m 2[39m 2 0.000[4m3[24m[4m1[24m[4m0[24m #> [90m 3[39m 3 0.000[4m2[24m[4m7[24m[4m0[24m #> [90m 4[39m 4 0.000[4m1[24m[4m5[24m[4m3[24m #> [90m 5[39m 5 0.000[4m1[24m[4m4[24m[4m9[24m #> [90m 6[39m 6 0.009[4m3[24m[4m8[24m #> [90m 7[39m 7 0.002[4m5[24m[4m6[24m #> [90m 8[39m 8 0.000[4m2[24m[4m6[24m[4m0[24m #> [90m 9[39m 9 0.000[4m2[24m[4m8[24m[4m2[24m #> [90m10[39m 10-11 0.000[4m6[24m[4m0[24m[4m4[24m #> [90m# ... with 33 more rows[39m #> #> $sequence_length_distribution #> [90m# A tibble: 42 x 2[39m #> Length Count #> [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m #> [90m 1[39m 35 [4m1[24m282 #> [90m 2[39m 36 144 #> [90m 3[39m 37 160 #> [90m 4[39m 38 172 #> [90m 5[39m 39 177 #> [90m 6[39m 40 164 #> [90m 7[39m 41 174 #> [90m 8[39m 42 183 #> [90m 9[39m 43 167 #> [90m10[39m 44 198 #> [90m# ... with 32 more rows[39m #> #> $sequence_duplication_levels #> [90m# A tibble: 16 x 3[39m #> `Duplication Level` `Percentage of deduplicated` `Percentage of total` #> [3m[90m<chr>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m #> [90m 1[39m 1 83.8 69.4 #> [90m 2[39m 2 12.7 21.1 #> [90m 3[39m 3 2.63 6.53 #> [90m 4[39m 4 0.591 1.96 #> [90m 5[39m 5 0.152 0.629 #> [90m 6[39m 6 0.040[4m0[24m 0.199 #> [90m 7[39m 7 0.012[4m8[24m 0.074[4m4[24m #> [90m 8[39m 8 0.005[4m3[24m[4m2[24m 0.035[4m2[24m #> [90m 9[39m 9 0.002[4m4[24m[4m3[24m 0.018[4m1[24m #> [90m10[39m >10 0.003[4m9[24m[4m3[24m 0.041[4m5[24m #> [90m11[39m >50 0.000[4m0[24m[4m2[24m[4m9[24m8 0.002[4m1[24m[4m8[24m #> [90m12[39m >100 0.000[4m0[24m[4m2[24m[4m4[24m7 0.005[4m2[24m[4m4[24m #> [90m13[39m >500 0.000[4m0[24m[4m0[24m[4m3[24m00 0.002[4m0[24m[4m2[24m #> [90m14[39m >1k 0.000[4m0[24m[4m0[24m[4m2[24m66 0.002[4m6[24m[4m0[24m #> [90m15[39m >5k 0 0 #> [90m16[39m >10k+ 0.000[4m0[24m[4m0[24m[4m2[24m41 0.056[4m5[24m #> #> $overrepresented_sequences #> [90m# A tibble: 0 x 0[39m #> #> $adapter_content #> [90m# A tibble: 64 x 5[39m #> Position `Illumina Unive… `Illumina Small… `Nextera Transp… `SOLID Small RN… #> [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m #> [90m 1[39m 1 0.000[4m0[24m[4m0[24m[4m9[24m94 0.000[4m0[24m[4m0[24m[4m1[24m99 0.000[4m0[24m[4m1[24m[4m3[24m9 0 #> [90m 2[39m 2 0.000[4m0[24m[4m1[24m[4m1[24m9 0.000[4m0[24m[4m0[24m[4m1[24m99 0.000[4m0[24m[4m2[24m[4m5[24m8 0.000[4m0[24m[4m0[24m[4m1[24m99 #> [90m 3[39m 3 0.000[4m0[24m[4m1[24m[4m7[24m9 0.000[4m0[24m[4m0[24m[4m3[24m98 0.000[4m0[24m[4m3[24m[4m5[24m8 0.000[4m0[24m[4m0[24m[4m5[24m96 #> [90m 4[39m 4 0.000[4m0[24m[4m2[24m[4m5[24m8 0.000[4m0[24m[4m0[24m[4m3[24m98 0.000[4m0[24m[4m4[24m[4m3[24m7 0.000[4m0[24m[4m0[24m[4m5[24m96 #> [90m 5[39m 5 0.000[4m0[24m[4m3[24m[4m3[24m8 0.000[4m0[24m[4m0[24m[4m3[24m98 0.000[4m0[24m[4m4[24m[4m9[24m7 0.000[4m0[24m[4m0[24m[4m5[24m96 #> [90m 6[39m 6 0.000[4m0[24m[4m3[24m[4m7[24m8 0.000[4m0[24m[4m0[24m[4m3[24m98 0.000[4m0[24m[4m5[24m[4m9[24m6 0.000[4m0[24m[4m0[24m[4m7[24m95 #> [90m 7[39m 7 0.000[4m0[24m[4m4[24m[4m3[24m7 0.000[4m0[24m[4m0[24m[4m3[24m98 0.000[4m0[24m[4m6[24m[4m7[24m6 0.000[4m0[24m[4m0[24m[4m7[24m95 #> [90m 8[39m 8 0.000[4m0[24m[4m5[24m[4m3[24m7 0.000[4m0[24m[4m0[24m[4m3[24m98 0.000[4m0[24m[4m7[24m[4m3[24m6 0.000[4m0[24m[4m0[24m[4m9[24m94 #> [90m 9[39m 9 0.000[4m0[24m[4m5[24m[4m5[24m7 0.000[4m0[24m[4m0[24m[4m3[24m98 0.000[4m0[24m[4m8[24m[4m3[24m5 0.000[4m0[24m[4m0[24m[4m9[24m94 #> [90m10[39m 10 0.000[4m0[24m[4m5[24m[4m5[24m7 0.000[4m0[24m[4m0[24m[4m3[24m98 0.000[4m0[24m[4m9[24m[4m5[24m4 0.000[4m0[24m[4m0[24m[4m9[24m94 #> [90m# ... with 54 more rows[39m #> #> $kmer_content #> [90m# A tibble: 0 x 0[39m #> #> $total_deduplicated_percentage #> [1] 82.76 #> #> attr(,"class") #> [1] "list" "qc_read"# Read a specified module qc_read(qc.file,"Per base sequence quality")#>#> $per_base_sequence_quality #> [90m# A tibble: 43 x 7[39m #> Base Mean Median `Lower Quartile` `Upper Quartile` `10th Percentil… #> [3m[90m<chr>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m #> [90m 1[39m 1 31.2 32 32 32 32 #> [90m 2[39m 2 31.5 32 32 32 32 #> [90m 3[39m 3 31.7 32 32 32 32 #> [90m 4[39m 4 31.7 32 32 32 32 #> [90m 5[39m 5 31.7 32 32 32 32 #> [90m 6[39m 6 35.3 36 36 36 36 #> [90m 7[39m 7 35.3 36 36 36 36 #> [90m 8[39m 8 35.3 36 36 36 36 #> [90m 9[39m 9 35.3 36 36 36 36 #> [90m10[39m 10-11 35.3 36 36 36 36 #> [90m# ... with 33 more rows, and 1 more variable: `90th Percentile` [3m[90m<dbl>[90m[23m[39m #> #> attr(,"class") #> [1] "list" "qc_read"