Formats information from a VCF object for use in CCAFE methods as follows: From the rowRanges object: seqnames (chromosome), ranges (position), From the geno object: ES (effect size of ALT), SE, AF (allele frequency of ALT)
CCAFE_convertVCF(vcf)
a dataframe object with columns Position, RSID, Chromosome, REF, ALT, beta, SE, AF, OR
library(VariantAnnotation)
#> Loading required package: BiocGenerics
#>
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:stats':
#>
#> IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#>
#> Filter, Find, Map, Position, Reduce, anyDuplicated, aperm, append,
#> as.data.frame, basename, cbind, colnames, dirname, do.call,
#> duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
#> lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
#> pmin.int, rank, rbind, rownames, sapply, saveRDS, setdiff, table,
#> tapply, union, unique, unsplit, which.max, which.min
#> Loading required package: MatrixGenerics
#> Loading required package: matrixStats
#>
#> Attaching package: 'MatrixGenerics'
#> The following objects are masked from 'package:matrixStats':
#>
#> colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
#> colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
#> colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
#> colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
#> colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
#> colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
#> colWeightedMeans, colWeightedMedians, colWeightedSds,
#> colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
#> rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
#> rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
#> rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
#> rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
#> rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
#> rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
#> rowWeightedSds, rowWeightedVars
#> Loading required package: GenomeInfoDb
#> Warning: package 'GenomeInfoDb' was built under R version 4.4.2
#> Loading required package: S4Vectors
#> Loading required package: stats4
#>
#> Attaching package: 'S4Vectors'
#> The following object is masked from 'package:utils':
#>
#> findMatches
#> The following objects are masked from 'package:base':
#>
#> I, expand.grid, unname
#> Loading required package: IRanges
#>
#> Attaching package: 'IRanges'
#> The following object is masked from 'package:grDevices':
#>
#> windows
#> Loading required package: GenomicRanges
#> Loading required package: SummarizedExperiment
#> Loading required package: Biobase
#> Welcome to Bioconductor
#>
#> Vignettes contain introductory material; view with
#> 'browseVignettes()'. To cite Bioconductor, see
#> 'citation("Biobase")', and for packages 'citation("pkgname")'.
#>
#> Attaching package: 'Biobase'
#> The following object is masked from 'package:MatrixGenerics':
#>
#> rowMedians
#> The following objects are masked from 'package:matrixStats':
#>
#> anyMissing, rowMedians
#> Loading required package: Rsamtools
#> Loading required package: Biostrings
#> Loading required package: XVector
#>
#> Attaching package: 'Biostrings'
#> The following object is masked from 'package:base':
#>
#> strsplit
#>
#> Attaching package: 'VariantAnnotation'
#> The following object is masked from 'package:base':
#>
#> tabulate
library(CCAFE)
# load the data
data("vcf_sample")
# run the method
df_sample <- CCAFE_convertVCF(vcf_sample)
#> Valid VCF object, converting to dataframe for CCAFE...
print(head(df_sample))
#> Position RSID Chromosome REF ALT beta SE AF OR
#> 1 762320 rs75333668 1 C T -0.0352 0.0574 0.0078 0.9654123
#> 2 861349 rs200686669 1 C T -0.0299 0.1821 0.0034 0.9705426
#> 3 865545 rs201186828 1 G A 0.1188 0.1535 0.0041 1.1261447
#> 4 865584 rs148711625 1 G A -0.2004 0.0840 0.0219 0.8184033
#> 5 865625 rs146327803 1 G A -0.0778 0.2306 0.0046 0.9251494
#> 6 865628 rs41285790 1 G A -0.0037 0.0510 0.0049 0.9963068
# can then use in CCAFE methods
# since we have total AF, will use CaseControl_AF
df_sample <- CaseControl_AF(data = df_sample,
N_case = 48286,
N_control = 250671,
OR_colname = "OR",
AF_total_colname = "AF")
head(df_sample)
#> Position RSID Chromosome REF ALT beta SE AF OR
#> 1 762320 rs75333668 1 C T -0.0352 0.0574 0.0078 0.9654123
#> 2 861349 rs200686669 1 C T -0.0299 0.1821 0.0034 0.9705426
#> 3 865545 rs201186828 1 G A 0.1188 0.1535 0.0041 1.1261447
#> 4 865584 rs148711625 1 G A -0.2004 0.0840 0.0219 0.8184033
#> 5 865625 rs146327803 1 G A -0.0778 0.2306 0.0046 0.9251494
#> 6 865628 rs41285790 1 G A -0.0037 0.0510 0.0049 0.9963068
#> AF_case AF_control
#> 1 0.007574252 0.007843485
#> 2 0.003315901 0.003416200
#> 3 0.004523116 0.004018497
#> 4 0.018530155 0.022549123
#> 5 0.004309041 0.004656047
#> 6 0.004884892 0.004902910