Source: https://github.com/markziemann/shoulder-instability-osteroarthritis
Here we are analysing the gene expression of six patients in these tissues.
B: Bone
C: Capsule
F: Fat
M: Muscle
S: Synovium
They have a range of QuickDASH scores which I will examine.
suppressPackageStartupMessages({
library("zoo")
library("tidyverse")
library("reshape2")
library("DESeq2")
library("gplots")
library("fgsea")
library("MASS")
library("mitch")
library("eulerr")
library("limma")
library("topconfects")
library("kableExtra")
})
tmp <- read.table("3col.tsv.gz",header=F)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)
accession <- sapply((strsplit(rownames(x),"\\|")),"[[",2)
symbol<-sapply((strsplit(rownames(x),"\\|")),"[[",6)
x$geneid <- paste(accession,symbol)
xx <- aggregate(. ~ geneid,x,sum)
rownames(xx) <- xx$geneid
colnames <- gsub("T0R","T0",colnames(xx))
xx$geneid = NULL
xx <- round(xx)
colnames(xx) <- sapply(strsplit(colnames(xx),"-"),"[[",2)
head(xx)
## 41B 41C 41F 41M 41S 42B 42C 42F 42M 42S 43B 43C
## ENSG00000000003.15 TSPAN6 327 231 613 27 346 432 301 579 46 336 279 683
## ENSG00000000005.6 TNMD 9 45 477 1 30 52 2 275 1 308 17 356
## ENSG00000000419.13 DPM1 304 497 485 132 502 349 333 422 126 339 851 815
## ENSG00000000457.14 SCYL3 245 245 280 83 231 202 185 183 93 168 287 281
## ENSG00000000460.17 C1orf112 101 91 79 15 82 86 71 53 9 49 229 105
## ENSG00000000938.13 FGR 2962 828 661 91 736 562 1012 713 95 479 1294 1404
## 43F 43M 43S 44B 44C 44F 44M 44S 45B 45C 45F 45M
## ENSG00000000003.15 TSPAN6 885 69 370 526 386 428 57 301 85 371 368 78
## ENSG00000000005.6 TNMD 772 12 387 3 22 480 2 10 60 157 246 4
## ENSG00000000419.13 DPM1 486 328 617 366 529 218 281 595 395 300 168 493
## ENSG00000000457.14 SCYL3 326 138 278 214 199 177 141 262 261 149 140 157
## ENSG00000000460.17 C1orf112 68 21 89 149 58 32 38 103 179 60 58 38
## ENSG00000000938.13 FGR 421 69 605 4440 775 406 74 803 8028 533 450 72
## 45S 46B 46C 46F 46M 46S
## ENSG00000000003.15 TSPAN6 231 722 342 385 55 363
## ENSG00000000005.6 TNMD 139 40 4 171 13 11
## ENSG00000000419.13 DPM1 414 525 434 273 238 516
## ENSG00000000457.14 SCYL3 184 300 242 147 113 237
## ENSG00000000460.17 C1orf112 70 134 60 47 19 116
## ENSG00000000938.13 FGR 1392 620 968 362 364 1021
Here I’ll look at a few different quality control measures.
par(mar=c(5,8,3,1))
barplot(colSums(xx),horiz=TRUE,las=1,xlab="num reads")
sums <- colSums(xx)
sums <- sums[order(sums)]
barplot(sums,horiz=TRUE,las=1,xlab="num reads")
abline(v=15000000,col="red")
URL="https://raw.githubusercontent.com/markziemann/shoulder-instability-osteroarthritis/main/OA_samplesheet.tsv"
ss <- read.table(URL,header=TRUE)
ss$col <- as.numeric(factor(ss$TissueType))
ss$id <- sapply(strsplit(ss$Dataset,"-"),"[[",1)
ss$Dataset <- sapply(strsplit(ss$Dataset,"-"),"[[",2)
URL="https://raw.githubusercontent.com/markziemann/shoulder-instability-osteroarthritis/main/OA_patients.tsv"
pat <- read.table(URL,header=TRUE,sep="\t")
pat$id <- sapply(strsplit(pat$Record_number," "),"[[",2)
Multidimensional scaling plot to show the variation between samples, very similar to PCA.
colvec <- c("orange2","pink","purple1","yellow","lightblue")
cols <- rep(colvec,6)
plot(cmdscale(dist(t(xx))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "p",bty="n",pch=19, cex=4, col=cols)
text(cmdscale(dist(t(xx))), labels=ss$ParticipantID )
legend("topright", inset=0.1, title="tissue type", c("Bone","Capsule","Fat","Muscle","Synovium"),
pch=19,col=colvec, cex=1.5)
heatmap.2(cor(xx),trace="n",main="Pearson correlation heatmap")
There were 4 samples with fewer than 15M reads and this might impact data quality: 4002-42S, 4006-46B, 4005-42F and 4006-46C.
MDS plot shows samples clustered by tissue type as expected. For example M samples to the right, F samples in the centre, C samples in the lower right and S samples between C and F. However the B samples were somewhat not clustered, which is something to look into.
The result of the MDS is confirmed in the correlation heatmap.
Also a good point to filter out any genes with low expression (average < 10 counts).
Below I show the dimensions of each dataset (separated by tissue type), in terms of number of genes detected and samples.
ssb <- subset(ss,TissueType=="B")
xxb <- xx[,which(colnames(xx) %in% ssb$Dataset)]
xxb <- xxb[which(rowMeans(xxb)>10),]
ssc <- subset(ss,TissueType=="C")
xxc <- xx[,which(colnames(xx) %in% ssc$Dataset)]
xxc <- xxc[which(rowMeans(xxc)>10),]
ssf <- subset(ss,TissueType=="F")
xxf <- xx[,which(colnames(xx) %in% ssf$Dataset)]
xxf <- xxf[which(rowMeans(xxf)>10),]
ssm <- subset(ss,TissueType=="M")
xxm <- xx[,which(colnames(xx) %in% ssm$Dataset)]
xxm <- xxm[which(rowMeans(xxm)>10),]
sss <- subset(ss,TissueType=="S")
xxs <- xx[,which(colnames(xx) %in% sss$Dataset)]
xxs <- xxs[which(rowMeans(xxs)>10),]
lapply(list(xx,xxb,xxc,xxf,xxm,xxs),dim)
## [[1]]
## [1] 60651 30
##
## [[2]]
## [1] 20241 6
##
## [[3]]
## [1] 18726 6
##
## [[4]]
## [1] 19072 6
##
## [[5]]
## [1] 15920 6
##
## [[6]]
## [1] 19361 6
ssb$quickdash <- scale(pat[match(ssb$id,pat$id),"QuickDASH_baseline"])
dds <- DESeqDataSetFromMatrix(countData = xxb , colData = ssb, design = ~ quickdash )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge,20) %>% kbl(caption = "Top gene expression correlates with QuickDASH in bone") %>% kable_paper("hover", full_width = F)
baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | 41B | 42B | 43B | 44B | 45B | 46B | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
ENSG00000211638.2 IGLV8-61 | 440.91837 | -2.931254 | 0.5159649 | -5.681111 | 0.0000000 | 0.0002709 | 8.930659 | 4.205568 | 8.597901 | 3.501237 | 10.671973 | 7.232449 |
ENSG00000274419.6 TBC1D3D | 46.71931 | -3.506396 | 0.7067664 | -4.961181 | 0.0000007 | 0.0048521 | 5.716401 | 1.861318 | 5.328218 | 1.861318 | 7.103516 | 6.290270 |
ENSG00000145423.5 SFRP2 | 460.55116 | -1.786447 | 0.3604527 | -4.956120 | 0.0000007 | 0.0048521 | 9.225523 | 5.445464 | 6.876516 | 6.747061 | 10.415843 | 9.162634 |
ENSG00000196436.8 NPIPB15 | 83.47206 | -2.540972 | 0.5571418 | -4.560727 | 0.0000051 | 0.0257955 | 7.177856 | 3.405667 | 3.060644 | 3.501237 | 7.693135 | 7.305542 |
ENSG00000271130.1 IGHV3OR16-8 | 27.99582 | -3.564167 | 0.8218232 | -4.336903 | 0.0000145 | 0.0584983 | 5.040078 | 1.861318 | 3.840140 | 1.861318 | 7.123073 | 2.839618 |
ENSG00000095752.7 IL11 | 298.67564 | 3.846244 | 0.9014247 | 4.266851 | 0.0000198 | 0.0668802 | 3.780852 | 3.079033 | 2.721216 | 10.789836 | 2.586595 | 4.535245 |
ENSG00000073756.13 PTGS2 | 141.73199 | 1.285224 | 0.3072010 | 4.183659 | 0.0000287 | 0.0829461 | 5.815436 | 8.399398 | 6.955978 | 8.123029 | 5.620540 | 5.551745 |
ENSG00000270149.5 AL591806.2 | 105.30108 | -1.121805 | 0.2727571 | -4.112835 | 0.0000391 | 0.0988847 | 7.021321 | 5.213977 | 6.373073 | 5.544574 | 8.085718 | 6.729005 |
ENSG00000281383.1 FP671120.7 | 60.01779 | 1.433140 | 0.3545168 | 4.042517 | 0.0000529 | 0.1189281 | 4.910451 | 6.903751 | 6.396072 | 7.003666 | 4.151981 | 4.419797 |
ENSG00000214940.8 NPIPA8 | 88.24011 | -2.685878 | 0.6790047 | -3.955611 | 0.0000763 | 0.1545185 | 4.767596 | 3.079033 | 3.514704 | 3.342451 | 8.967258 | 3.220658 |
ENSG00000130720.13 FIBCD1 | 12.86486 | 3.555247 | 0.9047775 | 3.929416 | 0.0000852 | 0.1566884 | 2.583587 | 3.767650 | 1.861318 | 6.248497 | 1.861318 | 1.861318 |
ENSG00000119508.18 NR4A3 | 210.56959 | 2.481732 | 0.6488879 | 3.824593 | 0.0001310 | 0.2106384 | 4.663511 | 10.190786 | 4.810847 | 4.675312 | 3.982721 | 5.350149 |
ENSG00000153993.13 SEMA3D | 263.02297 | 1.762272 | 0.4617345 | 3.816634 | 0.0001353 | 0.2106384 | 7.681316 | 8.579345 | 7.144141 | 9.615365 | 3.787985 | 6.539845 |
ENSG00000229183.9 PGA4 | 13.52739 | -3.644256 | 0.9753663 | -3.736295 | 0.0001868 | 0.2700033 | 3.780852 | 1.861318 | 3.060644 | 1.861318 | 4.070076 | 6.083706 |
ENSG00000039537.14 C6 | 98.93901 | -1.159004 | 0.3170880 | -3.655151 | 0.0002570 | 0.3468372 | 6.489877 | 5.475657 | 6.147608 | 4.947782 | 7.775736 | 7.497997 |
ENSG00000134184.13 GSTM1 | 42.95966 | -7.719138 | 2.1431726 | -3.601734 | 0.0003161 | 0.3899222 | 1.861318 | 1.861318 | 1.861318 | 1.861318 | 1.861318 | 8.049712 |
ENSG00000223931.2 AC142381.1 | 57.50480 | -1.931489 | 0.5376410 | -3.592526 | 0.0003275 | 0.3899222 | 5.268256 | 3.538885 | 3.514704 | 3.984908 | 8.153817 | 4.478738 |
ENSG00000136634.7 IL10 | 70.99991 | 2.008719 | 0.5632486 | 3.566310 | 0.0003620 | 0.4071179 | 4.663511 | 8.387616 | 5.465747 | 4.335672 | 3.270504 | 5.001029 |
ENSG00000137077.8 CCL21 | 15.70882 | -1.701534 | 0.4914766 | -3.462085 | 0.0005360 | 0.5710177 | 3.975064 | 2.864993 | 4.098614 | 3.157099 | 6.005978 | 4.225850 |
ENSG00000153266.13 FEZF2 | 26.86972 | 2.060832 | 0.5992079 | 3.439260 | 0.0005833 | 0.5903348 | 1.861318 | 6.677080 | 3.840140 | 5.810519 | 3.270504 | 3.499068 |
dgeb <- dge
write.table(dgeb,file="deseq2_bone.tsv",quote=FALSE,sep='\t')
ssc$quickdash <- scale(pat[match(ssc$id,pat$id),"QuickDASH_baseline"])
dds <- DESeqDataSetFromMatrix(countData = xxc , colData = ssc, design = ~ quickdash )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge,20) %>% kbl(caption = "Top gene expression correlates with QuickDASH in capsule") %>% kable_paper("hover", full_width = F)
baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | 41C | 42C | 43C | 44C | 45C | 46C | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
ENSG00000248144.6 ADH1C | 49.68522 | -2.0385840 | 0.3247349 | -6.277686 | 0.00e+00 | 0.0000034 | 6.248232 | 4.879783 | 5.703559 | 5.255560 | 7.706001 | 6.636002 |
ENSG00000274419.6 TBC1D3D | 73.36200 | -4.1811328 | 0.6672278 | -6.266425 | 0.00e+00 | 0.0000034 | 6.658670 | 4.403795 | 5.678897 | 4.403795 | 7.624139 | 7.922841 |
ENSG00000039537.14 C6 | 299.22535 | -2.4279663 | 0.4208040 | -5.769827 | 0.00e+00 | 0.0000486 | 10.016795 | 5.449903 | 7.719263 | 5.867846 | 9.028609 | 7.267074 |
ENSG00000142178.9 SIK1 | 110.70007 | 2.3193982 | 0.4383154 | 5.291619 | 1.00e-07 | 0.0004610 | 5.965574 | 7.119941 | 5.355726 | 9.086798 | 5.067327 | 5.773043 |
ENSG00000125730.17 C3 | 2208.32354 | -1.8826918 | 0.3562169 | -5.285240 | 1.00e-07 | 0.0004610 | 12.832108 | 8.257105 | 10.190717 | 8.351984 | 11.663371 | 10.183791 |
ENSG00000186431.19 FCAR | 30.38540 | 2.5646086 | 0.4913965 | 5.219021 | 2.00e-07 | 0.0004781 | 5.078262 | 7.325994 | 5.242080 | 6.292669 | 4.403795 | 5.254083 |
ENSG00000166482.12 MFAP4 | 3157.73936 | -0.9271698 | 0.1777349 | -5.216589 | 2.00e-07 | 0.0004781 | 12.438460 | 10.259131 | 11.298717 | 10.421206 | 12.450246 | 11.519873 |
ENSG00000176945.17 MUC20 | 344.82717 | -1.1779858 | 0.2346290 | -5.020632 | 5.00e-07 | 0.0011821 | 9.574743 | 7.107541 | 8.080757 | 7.126881 | 9.084261 | 8.841379 |
ENSG00000211669.3 IGLV3-10 | 19.79640 | -3.9906631 | 0.8137022 | -4.904329 | 9.00e-07 | 0.0019127 | 5.008137 | 4.403795 | 4.403795 | 4.403795 | 7.260605 | 4.729155 |
ENSG00000159263.16 SIM2 | 117.11826 | -1.1230524 | 0.2331657 | -4.816542 | 1.50e-06 | 0.0026821 | 7.279374 | 5.919030 | 7.111892 | 6.451796 | 8.469127 | 7.102895 |
ENSG00000124107.5 SLPI | 135.25645 | -1.2871914 | 0.2741228 | -4.695674 | 2.70e-06 | 0.0044357 | 8.295381 | 5.886992 | 7.401278 | 6.214895 | 7.919685 | 7.529725 |
ENSG00000188257.12 PLA2G2A | 32861.23105 | -1.5584845 | 0.3479723 | -4.478760 | 7.50e-06 | 0.0113600 | 15.972762 | 11.078605 | 15.408404 | 12.933800 | 15.194721 | 15.362567 |
ENSG00000197766.8 CFD | 10476.63157 | -1.2618786 | 0.2833101 | -4.454055 | 8.40e-06 | 0.0113600 | 14.658820 | 10.791169 | 12.998718 | 11.843394 | 13.934758 | 12.957715 |
ENSG00000163735.7 CXCL5 | 57.91874 | 2.7383282 | 0.6156688 | 4.447730 | 8.70e-06 | 0.0113600 | 4.707618 | 8.423334 | 5.281803 | 5.754112 | 4.737747 | 5.590062 |
ENSG00000128383.13 APOBEC3A | 68.97410 | 1.2533513 | 0.2827128 | 4.433303 | 9.30e-06 | 0.0113600 | 5.915336 | 7.408607 | 6.106610 | 7.773314 | 5.873228 | 5.838137 |
ENSG00000136689.19 IL1RN | 78.20007 | 1.3717917 | 0.3116023 | 4.402381 | 1.07e-05 | 0.0122876 | 5.989888 | 8.106177 | 6.824687 | 6.787031 | 5.439799 | 6.402780 |
ENSG00000145824.13 CXCL14 | 1562.57168 | -1.2371931 | 0.2835391 | -4.363396 | 1.28e-05 | 0.0138319 | 11.450332 | 8.940588 | 11.098540 | 8.177586 | 10.959583 | 10.863248 |
ENSG00000162747.12 FCGR3B | 332.89923 | 1.4785624 | 0.3458081 | 4.275673 | 1.91e-05 | 0.0189012 | 7.158334 | 9.732960 | 6.338019 | 9.844083 | 6.942508 | 6.875852 |
ENSG00000205221.12 VIT | 242.40661 | -1.5450981 | 0.3618600 | -4.269878 | 1.96e-05 | 0.0189012 | 9.380209 | 5.545030 | 7.375552 | 6.862148 | 8.900090 | 7.750909 |
ENSG00000122121.12 XPNPEP2 | 92.06925 | -1.4711424 | 0.3566169 | -4.125274 | 3.70e-05 | 0.0339967 | 8.059610 | 5.343401 | 6.918292 | 5.894526 | 7.590016 | 6.568593 |
dgec <- dge
write.table(dgec,file="deseq2_capsule.tsv",quote=FALSE,sep='\t')
ssf$quickdash <- scale(pat[match(ssf$id,pat$id),"QuickDASH_baseline"])
dds <- DESeqDataSetFromMatrix(countData = xxf , colData = ssf, design = ~ quickdash )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge,20) %>% kbl(caption = "Top gene expression correlates with QuickDASH in fat") %>% kable_paper("hover", full_width = F)
baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | 41F | 42F | 43F | 44F | 45F | 46F | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
ENSG00000073756.13 PTGS2 | 119.90779 | 3.7022183 | 0.6818779 | 5.429445 | 1.00e-07 | 0.0010781 | 4.734032 | 9.468892 | 4.825193 | 4.775833 | 3.498061 | 4.413838 |
ENSG00000278299.5 TBC1D3C | 21.01376 | -3.8365212 | 0.7254307 | -5.288612 | 1.00e-07 | 0.0011753 | 5.291506 | 3.498061 | 3.498061 | 3.498061 | 6.857765 | 4.913062 |
ENSG00000196436.8 NPIPB15 | 52.46229 | -2.5979241 | 0.4991609 | -5.204582 | 2.00e-07 | 0.0012361 | 6.745880 | 4.262497 | 4.251850 | 4.152412 | 7.328100 | 6.680265 |
ENSG00000273294.1 C1QTNF3-AMACR | 25.76986 | -3.9396329 | 0.7653204 | -5.147691 | 3.00e-07 | 0.0012574 | 5.349143 | 3.498061 | 3.498061 | 3.498061 | 6.857765 | 5.889474 |
ENSG00000019169.11 MARCO | 1154.06922 | -1.0710154 | 0.2205026 | -4.857155 | 1.20e-06 | 0.0045424 | 10.428500 | 8.474129 | 9.269474 | 9.092977 | 11.571676 | 10.126887 |
ENSG00000205502.4 C2CD4B | 204.79684 | 2.4598164 | 0.5102746 | 4.820574 | 1.40e-06 | 0.0045501 | 5.134134 | 10.143152 | 5.379069 | 6.061604 | 4.891409 | 5.473738 |
ENSG00000214940.8 NPIPA8 | 80.18742 | -2.6254709 | 0.5669613 | -4.630776 | 3.60e-06 | 0.0099255 | 5.064806 | 4.477605 | 4.338507 | 4.152412 | 8.850932 | 5.094199 |
ENSG00000258430.1 AL583722.1 | 21.01845 | -3.2268797 | 0.7033708 | -4.587736 | 4.50e-06 | 0.0104211 | 5.134134 | 3.498061 | 4.613647 | 3.498061 | 6.261418 | 6.082966 |
ENSG00000164694.17 FNDC1 | 1731.21142 | -1.3540428 | 0.2964017 | -4.568269 | 4.90e-06 | 0.0104211 | 10.629144 | 8.177412 | 9.233994 | 9.428736 | 12.584622 | 10.246329 |
ENSG00000164825.4 DEFB1 | 158.38918 | -1.3044346 | 0.2959097 | -4.408218 | 1.04e-05 | 0.0181199 | 7.372966 | 5.552629 | 7.900770 | 5.893925 | 8.427900 | 7.802637 |
ENSG00000215914.4 MMP23A | 17.82400 | -2.9069569 | 0.6595285 | -4.407629 | 1.05e-05 | 0.0181199 | 4.991015 | 3.498061 | 4.725032 | 3.498061 | 6.346294 | 5.473738 |
ENSG00000169429.11 CXCL8 | 93.23866 | 2.4216015 | 0.5614329 | 4.313252 | 1.61e-05 | 0.0255677 | 4.826800 | 9.026023 | 5.287173 | 5.348505 | 4.593292 | 4.150954 |
ENSG00000153266.13 FEZF2 | 25.54163 | 2.5841825 | 0.6045138 | 4.274811 | 1.91e-05 | 0.0280651 | 4.225928 | 7.068360 | 4.416223 | 5.667402 | 4.140369 | 3.498061 |
ENSG00000261654.1 AL360270.2 | 356.86548 | 2.1074011 | 0.4975544 | 4.235519 | 2.28e-05 | 0.0310634 | 6.879139 | 10.852957 | 7.096235 | 5.833049 | 5.561161 | 5.889474 |
ENSG00000274419.6 TBC1D3D | 48.89398 | -2.3660608 | 0.5609489 | -4.217961 | 2.47e-05 | 0.0313443 | 5.028511 | 4.477605 | 5.644417 | 3.498061 | 6.828880 | 7.440265 |
ENSG00000106976.21 DNM1 | 1568.41911 | -0.9708034 | 0.2322853 | -4.179358 | 2.92e-05 | 0.0338734 | 11.424041 | 9.081865 | 10.265546 | 9.475019 | 11.629322 | 10.161399 |
ENSG00000277027.1 RMRP | 17.32159 | -2.8477992 | 0.6825976 | -4.172003 | 3.02e-05 | 0.0338734 | 5.099987 | 3.498061 | 4.871869 | 3.498061 | 5.942595 | 5.828673 |
ENSG00000100473.18 COCH | 128.93323 | -1.3349220 | 0.3223252 | -4.141538 | 3.45e-05 | 0.0353936 | 7.835377 | 6.094936 | 6.324899 | 5.348505 | 8.640548 | 6.382682 |
ENSG00000215246.5 AC116351.1 | 47.55904 | 2.3707474 | 0.5731249 | 4.136528 | 3.53e-05 | 0.0353936 | 5.134134 | 8.023617 | 5.116596 | 4.775833 | 3.954101 | 4.413838 |
ENSG00000104921.15 FCER2 | 130.89946 | -1.2035532 | 0.2934860 | -4.100888 | 4.12e-05 | 0.0375994 | 8.151345 | 5.402904 | 6.809562 | 6.187101 | 8.053827 | 7.085475 |
dgef <- dge
write.table(dgef,file="deseq2_fat.tsv",quote=FALSE,sep='\t')
ssm$quickdash <- scale(pat[match(ssm$id,pat$id),"QuickDASH_baseline"])
dds <- DESeqDataSetFromMatrix(countData = xxm , colData = ssm, design = ~ quickdash )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge,20) %>% kbl(caption = "Top gene expression correlates with QuickDASH in muscle") %>% kable_paper("hover", full_width = F)
baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | 41M | 42M | 43M | 44M | 45M | 46M | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
ENSG00000274419.6 TBC1D3D | 19.41891 | -3.6142012 | 0.7764588 | -4.654724 | 0.0000032 | 0.0516470 | 5.482788 | 4.094096 | 4.710388 | 4.094096 | 5.934661 | 6.578833 |
ENSG00000160888.7 IER2 | 518.83277 | 1.4210313 | 0.3193194 | 4.450188 | 0.0000086 | 0.0612357 | 7.714334 | 11.051849 | 7.892072 | 8.332358 | 7.098796 | 8.135541 |
ENSG00000081041.9 CXCL2 | 76.78205 | 1.7761791 | 0.4104752 | 4.327128 | 0.0000151 | 0.0612357 | 4.808246 | 8.520217 | 5.875837 | 6.418142 | 5.287051 | 5.776348 |
ENSG00000171617.15 ENC1 | 190.11873 | 0.8976600 | 0.2076431 | 4.323090 | 0.0000154 | 0.0612357 | 6.962449 | 8.411764 | 7.872623 | 8.627597 | 6.851619 | 6.965453 |
ENSG00000196436.8 NPIPB15 | 16.76047 | -2.9244111 | 0.7262633 | -4.026654 | 0.0000566 | 0.1610387 | 5.398903 | 4.460127 | 4.451708 | 4.094096 | 6.337441 | 5.969007 |
ENSG00000134184.13 GSTM1 | 119.57051 | -8.5934024 | 2.1429395 | -4.010100 | 0.0000607 | 0.1610387 | 4.094096 | 4.094096 | 4.094096 | 4.094096 | 4.094096 | 9.553037 |
ENSG00000154734.16 ADAMTS1 | 666.03357 | 0.9346697 | 0.2388152 | 3.913778 | 0.0000909 | 0.2066490 | 8.827198 | 10.886483 | 9.145379 | 9.225506 | 8.121563 | 8.668636 |
ENSG00000204460.3 LINC01854 | 381.98127 | 1.0571224 | 0.2877602 | 3.673623 | 0.0002391 | 0.4709364 | 8.706825 | 8.866629 | 8.038808 | 10.070861 | 7.117544 | 6.996709 |
ENSG00000224687.2 RASAL2-AS1 | 38.21144 | 2.0203818 | 0.5541188 | 3.646117 | 0.0002662 | 0.4709364 | 4.808246 | 4.904082 | 4.885817 | 7.868738 | 4.819832 | 5.117292 |
ENSG00000081181.8 ARG2 | 139.55961 | -1.4436456 | 0.3990482 | -3.617723 | 0.0002972 | 0.4731530 | 5.941149 | 5.726977 | 5.957945 | 6.267292 | 9.412878 | 6.399891 |
ENSG00000168621.15 GDNF | 610.01979 | 0.9797612 | 0.2741525 | 3.573782 | 0.0003519 | 0.5092399 | 8.931970 | 10.869537 | 8.249185 | 9.371930 | 8.121563 | 8.000207 |
ENSG00000136244.12 IL6 | 13.43905 | 2.4760095 | 0.7132823 | 3.471290 | 0.0005180 | 0.6871673 | 4.453894 | 6.656734 | 4.803969 | 4.698627 | 4.421385 | 4.559315 |
ENSG00000122877.16 EGR2 | 27.52304 | 1.7208326 | 0.5036746 | 3.416556 | 0.0006342 | 0.7766346 | 4.601627 | 7.254736 | 5.251880 | 5.278482 | 4.744556 | 5.410203 |
ENSG00000162772.17 ATF3 | 180.42876 | 1.0884805 | 0.3249612 | 3.349570 | 0.0008094 | 0.9203697 | 5.734328 | 8.688426 | 6.844778 | 8.616434 | 6.432825 | 7.739545 |
ENSG00000125740.14 FOSB | 727.74879 | 3.5458419 | 1.0869717 | 3.262129 | 0.0011058 | 0.9999305 | 4.453894 | 12.028100 | 7.467668 | 4.870848 | 4.819832 | 6.646910 |
ENSG00000170345.10 FOS | 5286.85345 | 3.0332649 | 0.9371373 | 3.236735 | 0.0012091 | 0.9999305 | 6.094375 | 14.861025 | 8.847656 | 8.446097 | 6.432825 | 10.221335 |
ENSG00000177606.8 JUN | 7279.99453 | 0.9818296 | 0.3081936 | 3.185756 | 0.0014438 | 0.9999305 | 11.723421 | 14.584456 | 11.885568 | 11.831670 | 11.273073 | 12.558502 |
ENSG00000267247.1 AP005264.2 | 43.98595 | 1.1211783 | 0.3547236 | 3.160710 | 0.0015739 | 0.9999305 | 5.797137 | 7.470043 | 5.931180 | 5.926055 | 5.364608 | 5.410203 |
ENSG00000159399.10 HK2 | 766.44623 | -0.7636722 | 0.2417055 | -3.159515 | 0.0015803 | 0.9999305 | 9.865551 | 7.884404 | 9.131868 | 9.139829 | 10.107042 | 10.454723 |
ENSG00000128965.13 CHAC1 | 31.98198 | 1.4449630 | 0.4574674 | 3.158614 | 0.0015852 | 0.9999305 | 5.207028 | 7.296406 | 5.251880 | 5.409718 | 4.744556 | 5.776348 |
dgem <- dge
write.table(dgem,file="deseq2_muscle.tsv",quote=FALSE,sep='\t')
sss$quickdash <- scale(pat[match(sss$id,pat$id),"QuickDASH_baseline"])
dds <- DESeqDataSetFromMatrix(countData = xxs , colData = sss, design = ~ quickdash )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge,20) %>% kbl(caption = "Top gene expression correlates with QuickDASH in synovium") %>% kable_paper("hover", full_width = F)
baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | 41S | 42S | 43S | 44S | 45S | 46S | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
ENSG00000119508.18 NR4A3 | 481.25337 | 2.5912836 | 0.4224586 | 6.133817 | 0.0e+00 | 0.0000089 | 6.342950 | 11.312129 | 7.252746 | 7.529529 | 5.661256 | 6.794226 |
ENSG00000211893.4 IGHG2 | 610.05143 | -2.7334236 | 0.4464040 | -6.123206 | 0.0e+00 | 0.0000089 | 6.594382 | 5.936293 | 5.962233 | 5.715193 | 11.567969 | 9.184508 |
ENSG00000198435.4 NRARP | 381.16544 | 1.1179232 | 0.1899825 | 5.884347 | 0.0e+00 | 0.0000258 | 8.244739 | 9.925151 | 8.107532 | 9.270060 | 7.120048 | 7.842529 |
ENSG00000137962.13 ARHGAP29 | 654.33270 | 0.7928283 | 0.1461255 | 5.425668 | 1.0e-07 | 0.0002795 | 9.006135 | 10.173490 | 9.329650 | 10.002710 | 8.044563 | 9.176952 |
ENSG00000069122.19 ADGRF5 | 958.81660 | 0.6851472 | 0.1322419 | 5.181016 | 2.0e-07 | 0.0008545 | 9.642179 | 10.460410 | 9.784922 | 10.690402 | 8.798755 | 9.682777 |
ENSG00000160200.18 CBS | 153.06842 | -1.2082613 | 0.2348533 | -5.144749 | 3.0e-07 | 0.0008644 | 7.266678 | 6.662419 | 7.209423 | 6.090989 | 8.967252 | 7.535559 |
ENSG00000172201.12 ID4 | 1039.66904 | 0.8189104 | 0.1620821 | 5.052442 | 4.0e-07 | 0.0011039 | 9.646803 | 11.144889 | 9.912144 | 10.429460 | 8.931841 | 9.224151 |
ENSG00000091879.14 ANGPT2 | 263.11042 | 1.0103110 | 0.2003035 | 5.043900 | 5.0e-07 | 0.0011039 | 7.618551 | 8.864783 | 7.501996 | 9.301480 | 6.891241 | 7.861650 |
ENSG00000128917.8 DLL4 | 474.37653 | 0.8229100 | 0.1672282 | 4.920881 | 9.0e-07 | 0.0018534 | 8.392112 | 9.962565 | 9.172649 | 9.315911 | 7.897936 | 8.290204 |
ENSG00000255422.3 AP002954.1 | 34.26502 | -1.4750875 | 0.3026374 | -4.874108 | 1.1e-06 | 0.0021162 | 6.048514 | 5.284400 | 5.839404 | 5.209793 | 6.984841 | 6.504942 |
ENSG00000102755.12 FLT1 | 765.33467 | 0.6504951 | 0.1366575 | 4.760038 | 1.9e-06 | 0.0033008 | 9.274835 | 10.300166 | 9.337561 | 10.250167 | 8.628099 | 9.466493 |
ENSG00000153721.19 CNKSR3 | 270.61973 | 0.9462839 | 0.1992663 | 4.748841 | 2.0e-06 | 0.0033008 | 7.282919 | 9.416852 | 8.112175 | 8.592940 | 7.154720 | 7.829637 |
ENSG00000211660.3 IGLV2-23 | 127.58748 | -4.7808741 | 1.0122016 | -4.723243 | 2.3e-06 | 0.0034569 | 4.842938 | 4.439336 | 4.849674 | 4.439336 | 9.585722 | 6.197751 |
ENSG00000142178.9 SIK1 | 83.51102 | 2.5309590 | 0.5568599 | 4.545056 | 5.5e-06 | 0.0075952 | 5.961863 | 6.699988 | 6.249406 | 8.681734 | 4.897052 | 4.439336 |
ENSG00000270550.1 IGHV3-30 | 64.50448 | -3.2358005 | 0.7200406 | -4.493914 | 7.0e-06 | 0.0088325 | 5.133949 | 4.439336 | 4.729976 | 4.947109 | 8.607821 | 6.059306 |
ENSG00000239922.2 AC092958.1 | 52.08835 | -1.0543928 | 0.2353789 | -4.479555 | 7.5e-06 | 0.0088325 | 6.746456 | 5.663994 | 6.231031 | 5.801409 | 7.346255 | 6.398354 |
ENSG00000224799.1 AL139397.1 | 22.12071 | 3.9014647 | 0.8728998 | 4.469545 | 7.8e-06 | 0.0088325 | 4.439336 | 7.360643 | 5.200730 | 4.947109 | 4.439336 | 4.439336 |
ENSG00000196436.8 NPIPB15 | 94.67156 | -2.2311487 | 0.5003033 | -4.459592 | 8.2e-06 | 0.0088325 | 7.322710 | 5.570767 | 4.849674 | 4.947109 | 8.510659 | 7.204041 |
ENSG00000164035.10 EMCN | 921.78699 | 0.7818082 | 0.1766971 | 4.424567 | 9.7e-06 | 0.0092012 | 9.262829 | 10.632730 | 9.374555 | 10.648705 | 8.599629 | 9.858255 |
ENSG00000120279.6 MYCT1 | 300.89766 | 0.8354745 | 0.1890875 | 4.418453 | 9.9e-06 | 0.0092012 | 8.014715 | 9.307625 | 8.354564 | 8.697591 | 6.997701 | 8.193281 |
dges <- dge
write.table(dges,file="deseq2_synovium.tsv",quote=FALSE,sep='\t')
Here let’s look at some plots.
MA plot shows the avrage level and fold change of all detected genes. Volcano plot shows the fold change and the significance, as measured by -log(p-value). Significant genes are shown as red points.
There are heatmaps of the top ranked genes by p-value. Above the gene expression values there is a bar in yellow/orange colours. Yellow shows relatively low QuickDASH score and orange shows high score.
maplot <- function(de,contrast_name) {
sig <-subset(de, padj < 0.05 )
up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
GENESUP <- length(up)
GENESDN <- length(dn)
DET=nrow(de)
SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
ns <-subset(de, padj > 0.05 )
plot(log2(de$baseMean),de$log2FoldChange,
xlab="log2 basemean", ylab="log2 foldchange",
pch=19, cex=0.5, col="dark gray",
main=contrast_name, cex.main=1)
points(log2(sig$baseMean),sig$log2FoldChange,
pch=19, cex=0.5, col="red")
mtext(SUBHEADER,cex = 1)
}
make_volcano <- function(de,name) {
sig <- subset(de,padj<0.05)
N_SIG=nrow(sig)
N_UP=nrow(subset(sig,log2FoldChange>0))
N_DN=nrow(subset(sig,log2FoldChange<0))
DET=nrow(de)
HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
plot(de$log2FoldChange,-log10(de$pval),cex=0.5,pch=19,col="darkgray",
main=name, xlab="log2 FC", ylab="-log10 pval", xlim=c(-6,6))
mtext(HEADER)
grid()
points(sig$log2FoldChange,-log10(sig$pval),cex=0.5,pch=19,col="red")
}
make_heatmap <- function(de,name,myss,mx,n=30){
colfunc <- colorRampPalette(c("blue", "white", "red"))
values <- myss$quickdash
f <- colorRamp(c("yellow", "orange"))
rr <- range(values)
svals <- (values-rr[1])/diff(rr)
colcols <- rgb(f(svals)/255)
mxn <- mx/rowSums(mx)*1000000
x <- mxn[which(rownames(mxn) %in% rownames(head(de,n))),]
heatmap.2(as.matrix(x),trace="none",col=colfunc(25),scale="row", margins = c(10,25), cexRow=1.5,
main=paste("Top ranked",n,"genes in",name) , ColSideColors = colcols )
}
maplot(dgeb,"bone")
make_volcano(dgeb,"bone")
make_heatmap(dgeb,"bone",ssb,xxb,n=20)
maplot(dgec,"capsule")
make_volcano(dgec,"capsule")
make_heatmap(dgec,"capsule",ssc,xxc,n=20)
maplot(dgef,"fat")
make_volcano(dgef,"fat")
make_heatmap(dgef,"fat",ssf,xxf,n=20)
maplot(dgem,"muscle")
make_volcano(dgem,"muscle")
make_heatmap(dgem,"muscle",ssm,xxm,n=20)
maplot(dges,"synovium")
make_volcano(dges,"synovium")
make_heatmap(dges,"synovium",sss,xxs,n=20)
Here I’m using the mitch package and gene pathways from Reactome to understand the affected pathways separately for each tissue type.
Reactome pathways downloaded 24th Nov 2021.
#download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip", destfile="ReactomePathways.gmt.zip")
#unzip("ReactomePathways.gmt.zip")
genesets <- gmt_import("ReactomePathways.gmt")
# gene table
gt <- as.data.frame(rownames(xx))
gt$gene <- sapply(strsplit(gt[,1]," "),"[[",2)
m <- mitch_import(dgeb, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 20241
## Note: no. genes in output = 20208
## Note: estimated proportion of input genes in output = 0.998
mb <- mitch_calc(m, genesets, priority="significance")
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
head(mb$enrichment_result,20) %>% kbl(caption = "Top gene pathway correlates with QuickDASH in bone") %>% kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
197 | Classical antibody-mediated complement activation | 73 | 0 | -0.8228846 | 0 |
379 | FCGR activation | 80 | 0 | -0.7774034 | 0 |
1120 | Scavenging of heme from plasma | 74 | 0 | -0.8016828 | 0 |
225 | Creation of C4 and C2 activators | 78 | 0 | -0.7756990 | 0 |
555 | Initial triggering of complement | 85 | 0 | -0.7419084 | 0 |
1077 | Role of LAT2/NTAL/LAB on calcium mobilization | 78 | 0 | -0.7241558 | 0 |
125 | CD22 mediated BCR regulation | 61 | 0 | -0.7912800 | 0 |
1078 | Role of phospholipids in phagocytosis | 93 | 0 | -0.6392111 | 0 |
541 | Immunoregulatory interactions between a Lymphoid and a non-Lymphoid cell | 178 | 0 | -0.4475225 | 0 |
377 | FCERI mediated MAPK activation | 94 | 0 | -0.5995284 | 0 |
1008 | Regulation of Complement cascade | 102 | 0 | -0.5698153 | 0 |
209 | Complement cascade | 109 | 0 | -0.5505979 | 0 |
376 | FCERI mediated Ca+2 mobilization | 93 | 0 | -0.5870080 | 0 |
112 | Binding and Uptake of Ligands by Scavenger Receptors | 102 | 0 | -0.5573626 | 0 |
76 | Antigen activates B Cell Receptor (BCR) leading to generation of second messengers | 86 | 0 | -0.5856265 | 0 |
381 | FCGR3A-mediated phagocytosis | 127 | 0 | -0.4821218 | 0 |
625 | Leishmania phagocytosis | 127 | 0 | -0.4821218 | 0 |
835 | Parasite infection | 127 | 0 | -0.4821218 | 0 |
1036 | Regulation of actin dynamics for phagocytic cup formation | 129 | 0 | -0.4707796 | 0 |
380 | FCGR3A-mediated IL10 synthesis | 104 | 0 | -0.5238347 | 0 |
write.table(mb$enrichment_result,file="mitch_bone.tsv",quote=FALSE,sep='\t')
m <- mitch_import(dgec, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 18726
## Note: no. genes in output = 18694
## Note: estimated proportion of input genes in output = 0.998
mc <- mitch_calc(m, genesets, priority="significance")
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
head(mc$enrichment_result,20) %>% kbl(caption = "Top gene pathway correlates with QuickDASH in capsule") %>% kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
554 | Initial triggering of complement | 49 | 0 | -0.8492346 | 0 |
199 | Classical antibody-mediated complement activation | 38 | 0 | -0.9169140 | 0 |
227 | Creation of C4 and C2 activators | 42 | 0 | -0.8686289 | 0 |
163 | Cell Cycle | 611 | 0 | 0.2275055 | 0 |
165 | Cell Cycle, Mitotic | 495 | 0 | 0.2394929 | 0 |
211 | Complement cascade | 72 | 0 | -0.6135992 | 0 |
1003 | Regulation of Complement cascade | 66 | 0 | -0.6355730 | 0 |
1113 | Scavenging of heme from plasma | 39 | 0 | -0.8065687 | 0 |
114 | Binding and Uptake of Ligands by Scavenger Receptors | 66 | 0 | -0.6089204 | 0 |
164 | Cell Cycle Checkpoints | 252 | 0 | 0.3122514 | 0 |
380 | FCGR activation | 45 | 0 | -0.7008383 | 0 |
633 | M Phase | 354 | 0 | 0.2381895 | 0 |
127 | CD22 mediated BCR regulation | 36 | 0 | -0.7382981 | 0 |
702 | Mitotic Prometaphase | 184 | 0 | 0.3168575 | 0 |
1059 | Resolution of Sister Chromatid Cohesion | 104 | 0 | 0.4070778 | 0 |
1071 | Role of LAT2/NTAL/LAB on calcium mobilization | 42 | 0 | -0.6355963 | 0 |
1129 | Separation of Sister Chromatids | 167 | 0 | 0.3030886 | 0 |
704 | Mitotic Spindle Checkpoint | 108 | 0 | 0.3756511 | 0 |
698 | Mitotic Anaphase | 222 | 0 | 0.2610886 | 0 |
701 | Mitotic Metaphase and Anaphase | 223 | 0 | 0.2603206 | 0 |
write.table(mc$enrichment_result,file="mitch_capsule.tsv",quote=FALSE,sep='\t')
m <- mitch_import(dgef, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 19072
## Note: no. genes in output = 19042
## Note: estimated proportion of input genes in output = 0.998
mf <- mitch_calc(m, genesets, priority="significance")
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
head(mf$enrichment_result,20) %>% kbl(caption = "Top gene pathway correlates with QuickDASH in fat") %>% kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
199 | Classical antibody-mediated complement activation | 53 | 0 | -0.7386113 | 0 |
562 | Initial triggering of complement | 66 | 0 | -0.6591021 | 0 |
670 | Metabolism of RNA | 679 | 0 | -0.2082095 | 0 |
1448 | rRNA processing in the nucleus and cytosol | 190 | 0 | -0.3744877 | 0 |
227 | Creation of C4 and C2 activators | 59 | 0 | -0.6656491 | 0 |
152 | Cap-dependent Translation Initiation | 118 | 0 | -0.4694532 | 0 |
372 | Eukaryotic Translation Initiation | 118 | 0 | -0.4694532 | 0 |
1014 | Regulation of Complement cascade | 78 | 0 | -0.5736497 | 0 |
114 | Binding and Uptake of Ligands by Scavenger Receptors | 82 | 0 | -0.5581957 | 0 |
660 | Major pathway of rRNA processing in the nucleolus and cytosol | 180 | 0 | -0.3769113 | 0 |
1358 | Translation | 294 | 0 | -0.2946056 | 0 |
420 | Formation of a pool of free 40S subunits | 100 | 0 | -0.5009133 | 0 |
371 | Eukaryotic Translation Elongation | 93 | 0 | -0.5167050 | 0 |
620 | L13a-mediated translational silencing of Ceruloplasmin expression | 110 | 0 | -0.4712416 | 0 |
211 | Complement cascade | 86 | 0 | -0.5278466 | 0 |
844 | Peptide chain elongation | 88 | 0 | -0.5205426 | 0 |
682 | Metabolism of proteins | 1690 | 0 | -0.1240629 | 0 |
554 | Infectious disease | 772 | 0 | -0.1762982 | 0 |
462 | GTP hydrolysis and joining of the 60S ribosomal subunit | 111 | 0 | -0.4537003 | 0 |
373 | Eukaryotic Translation Termination | 92 | 0 | -0.4921361 | 0 |
write.table(mf$enrichment_result,file="mitch_fat.tsv",quote=FALSE,sep='\t')
m <- mitch_import(dgem, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 15920
## Note: no. genes in output = 15898
## Note: estimated proportion of input genes in output = 0.999
mm <- mitch_calc(m, genesets, priority="significance")
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
head(mm$enrichment_result,20) %>% kbl(caption = "Top gene pathway correlates with QuickDASH in muscle") %>% kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
193 | Classical antibody-mediated complement activation | 49 | 0 | -0.8113652 | 0 |
221 | Creation of C4 and C2 activators | 53 | 0 | -0.7679597 | 0 |
1086 | Scavenging of heme from plasma | 49 | 0 | -0.7959106 | 0 |
371 | FCGR activation | 54 | 0 | -0.7529127 | 0 |
124 | CD22 mediated BCR regulation | 46 | 0 | -0.7751840 | 0 |
542 | Initial triggering of complement | 59 | 0 | -0.6536141 | 0 |
977 | Regulation of Complement cascade | 72 | 0 | -0.5870193 | 0 |
1044 | Role of LAT2/NTAL/LAB on calcium mobilization | 53 | 0 | -0.6759218 | 0 |
205 | Complement cascade | 77 | 0 | -0.5557015 | 0 |
111 | Binding and Uptake of Ligands by Scavenger Receptors | 75 | 0 | -0.5564212 | 0 |
1045 | Role of phospholipids in phagocytosis | 65 | 0 | -0.5774065 | 0 |
76 | Antigen activates B Cell Receptor (BCR) leading to generation of second messengers | 69 | 0 | -0.5554097 | 0 |
644 | Metabolism of RNA | 676 | 0 | 0.1727597 | 0 |
368 | FCERI mediated Ca+2 mobilization | 67 | 0 | -0.5331755 | 0 |
372 | FCGR3A-mediated IL10 synthesis | 77 | 0 | -0.4871341 | 0 |
373 | FCGR3A-mediated phagocytosis | 102 | 0 | -0.3995477 | 0 |
609 | Leishmania phagocytosis | 102 | 0 | -0.3995477 | 0 |
810 | Parasite infection | 102 | 0 | -0.3995477 | 0 |
369 | FCERI mediated MAPK activation | 69 | 0 | -0.4829413 | 0 |
163 | Cell surface interactions at the vascular wall | 146 | 0 | -0.3128196 | 0 |
write.table(mm$enrichment_result,file="mitch_muscle.tsv",quote=FALSE,sep='\t')
m <- mitch_import(dges, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 19361
## Note: no. genes in output = 19327
## Note: estimated proportion of input genes in output = 0.998
ms <- mitch_calc(m, genesets, priority="significance")
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
head(ms$enrichment_result,20) %>% kbl(caption = "Top gene pathway correlates with QuickDASH in synovium") %>% kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
664 | Metabolism | 1829 | 0 | -0.1915636 | 0 |
561 | Innate Immune System | 939 | 0 | -0.2522998 | 0 |
678 | Metabolism of proteins | 1695 | 0 | -0.1721900 | 0 |
543 | Immune System | 1744 | 0 | -0.1653825 | 0 |
302 | Disease | 1446 | 0 | -0.1774727 | 0 |
200 | Classical antibody-mediated complement activation | 45 | 0 | -0.9325842 | 0 |
383 | FCGR activation | 52 | 0 | -0.8437035 | 0 |
558 | Initial triggering of complement | 57 | 0 | -0.8032102 | 0 |
550 | Infectious disease | 761 | 0 | -0.2204582 | 0 |
228 | Creation of C4 and C2 activators | 50 | 0 | -0.8390206 | 0 |
128 | CD22 mediated BCR regulation | 43 | 0 | -0.9023531 | 0 |
765 | Neutrophil degranulation | 441 | 0 | -0.2813883 | 0 |
1008 | Regulation of Complement cascade | 74 | 0 | -0.6729074 | 0 |
1355 | Translation | 294 | 0 | -0.3341908 | 0 |
212 | Complement cascade | 81 | 0 | -0.6265326 | 0 |
1077 | Role of LAT2/NTAL/LAB on calcium mobilization | 49 | 0 | -0.7993568 | 0 |
1120 | Scavenging of heme from plasma | 46 | 0 | -0.8195701 | 0 |
385 | FCGR3A-mediated phagocytosis | 99 | 0 | -0.5504704 | 0 |
627 | Leishmania phagocytosis | 99 | 0 | -0.5504704 | 0 |
836 | Parasite infection | 99 | 0 | -0.5504704 | 0 |
write.table(ms$enrichment_result,file="mitch_synovium.tsv",quote=FALSE,sep='\t')
Here I’m using the mitch package to identify the most enriched pathways across all tissues.
There is a heatmap showing the QuickDASH association correlation across tissues.
Then there is the results of the multi-contrast pathway analysis with mitch in the form of a table and heatmap of pathway enrichment.
x <- list("bone"=dgeb, "capsule"=dgec, "fat"=dgef , "muscle"=dgem, "synovium"=dges)
y <- mitch_import(x, DEtype="deseq2",geneTable=gt)
## Note: Mean no. genes in input = 18664
## Note: no. genes in output = 14927
## Note: estimated proportion of input genes in output = 0.8
heatmap.2(cor(y,method="s"),trace="none",scale="none",margins = c(10,10),main="Spearman correlation across tissues")
res <- mitch_calc(y, genesets, priority="significance")
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
head(res$enrichment_result,30) %>% kbl(caption = "Top gene pathway correlates with QuickDASH across all tissues") %>% kable_paper("hover", full_width = F)
set | setSize | pMANOVA | s.bone | s.capsule | s.fat | s.muscle | s.synovium | p.bone | p.capsule | p.fat | p.muscle | p.synovium | s.dist | SD | p.adjustMANOVA | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
215 | Creation of C4 and C2 activators | 40 | 0 | -0.8335729 | -0.8741519 | -0.6901961 | -0.7715423 | -0.8582690 | 0e+00 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 1.8075562 | 0.0753887 | 0 |
187 | Classical antibody-mediated complement activation | 37 | 0 | -0.8376200 | -0.9215000 | -0.7383697 | -0.7861834 | -0.9195578 | 0e+00 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 1.8866982 | 0.0809324 | 0 |
534 | Initial triggering of complement | 46 | 0 | -0.7683594 | -0.8811294 | -0.7002422 | -0.6236900 | -0.8183561 | 0e+00 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 1.7075714 | 0.1003563 | 0 |
365 | FCGR activation | 42 | 0 | -0.7696147 | -0.7424317 | -0.5174944 | -0.7138154 | -0.8511349 | 0e+00 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 1.6264302 | 0.1236950 | 0 |
1075 | Scavenging of heme from plasma | 37 | 0 | -0.8238687 | -0.8111049 | -0.6080373 | -0.7658360 | -0.8069664 | 0e+00 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 1.7158262 | 0.0894056 | 0 |
199 | Complement cascade | 63 | 0 | -0.5514058 | -0.6799938 | -0.5282135 | -0.5084000 | -0.6710792 | 0e+00 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 1.3244875 | 0.0815699 | 0 |
118 | CD22 mediated BCR regulation | 35 | 0 | -0.8247726 | -0.7361613 | -0.6163041 | -0.7391428 | -0.8806262 | 0e+00 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 1.7099265 | 0.1004941 | 0 |
966 | Regulation of Complement cascade | 59 | 0 | -0.5370583 | -0.6942472 | -0.5424025 | -0.5201160 | -0.7008454 | 0e+00 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 1.3514127 | 0.0904257 | 0 |
354 | Eukaryotic Translation Elongation | 92 | 0 | 0.4986694 | -0.2785671 | -0.4854604 | 0.3302853 | -0.2194048 | 0e+00 | 0.0000039 | 0.0000000 | 0.0000000 | 0.0002788 | 0.8480386 | 0.4226100 | 0 |
396 | Formation of a pool of free 40S subunits | 99 | 0 | 0.4486993 | -0.2671495 | -0.4691070 | 0.3195047 | -0.1837964 | 0e+00 | 0.0000044 | 0.0000000 | 0.0000000 | 0.0015927 | 0.7928592 | 0.3949728 | 0 |
805 | Peptide chain elongation | 87 | 0 | 0.4875329 | -0.2849010 | -0.4898720 | 0.3149689 | -0.2080274 | 0e+00 | 0.0000044 | 0.0000000 | 0.0000004 | 0.0008043 | 0.8374439 | 0.4167766 | 0 |
537 | Innate Immune System | 877 | 0 | -0.1087803 | 0.0175375 | -0.0373746 | -0.0426511 | -0.2098180 | 1e-07 | 0.3828449 | 0.0629070 | 0.0337995 | 0.0000000 | 0.2436807 | 0.0870846 | 0 |
732 | Neutrophil degranulation | 424 | 0 | -0.1427639 | 0.1054821 | -0.0091764 | -0.0585129 | -0.2418029 | 5e-07 | 0.0002083 | 0.7470223 | 0.0396870 | 0.0000000 | 0.3057526 | 0.1317518 | 0 |
591 | L13a-mediated translational silencing of Ceruloplasmin expression | 109 | 0 | 0.4178082 | -0.2187855 | -0.4373840 | 0.2992666 | -0.1539264 | 0e+00 | 0.0000806 | 0.0000000 | 0.0000001 | 0.0055471 | 0.7259403 | 0.3623737 | 0 |
105 | Binding and Uptake of Ligands by Scavenger Receptors | 63 | 0 | -0.4506168 | -0.6016710 | -0.5267291 | -0.4918478 | -0.5404835 | 0e+00 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 1.1732620 | 0.0563822 | 0 |
436 | GTP hydrolysis and joining of the 60S ribosomal subunit | 110 | 0 | 0.4180088 | -0.2274206 | -0.4186911 | 0.2965844 | -0.1601355 | 0e+00 | 0.0000384 | 0.0000000 | 0.0000001 | 0.0037503 | 0.7178853 | 0.3583571 | 0 |
1350 | Viral mRNA Translation | 87 | 0 | 0.4770595 | -0.2867460 | -0.4576293 | 0.3044227 | -0.2030564 | 0e+00 | 0.0000038 | 0.0000000 | 0.0000009 | 0.0010709 | 0.8081699 | 0.4023776 | 0 |
1033 | Role of LAT2/NTAL/LAB on calcium mobilization | 41 | 0 | -0.6536834 | -0.6300698 | -0.4592398 | -0.6124465 | -0.7492586 | 0e+00 | 0.0000000 | 0.0000004 | 0.0000000 | 0.0000000 | 1.4041618 | 0.1046903 | 0 |
140 | Cap-dependent Translation Initiation | 117 | 0 | 0.3775135 | -0.2054225 | -0.4355846 | 0.2977452 | -0.1627879 | 0e+00 | 0.0001260 | 0.0000000 | 0.0000000 | 0.0023810 | 0.6997149 | 0.3486749 | 0 |
355 | Eukaryotic Translation Initiation | 117 | 0 | 0.3775135 | -0.2054225 | -0.4355846 | 0.2977452 | -0.1627879 | 0e+00 | 0.0001260 | 0.0000000 | 0.0000000 | 0.0023810 | 0.6997149 | 0.3486749 | 0 |
356 | Eukaryotic Translation Termination | 91 | 0 | 0.4554573 | -0.2872549 | -0.4585386 | 0.3001090 | -0.2214409 | 0e+00 | 0.0000022 | 0.0000000 | 0.0000008 | 0.0002641 | 0.7995723 | 0.3969746 | 0 |
1079 | Selenocysteine synthesis | 91 | 0 | 0.4582883 | -0.2732068 | -0.4579742 | 0.2924858 | -0.1902856 | 0e+00 | 0.0000067 | 0.0000000 | 0.0000014 | 0.0017204 | 0.7849631 | 0.3906212 | 0 |
1306 | Translation | 293 | 0 | 0.2013213 | -0.0993404 | -0.2478766 | 0.2183181 | -0.2793551 | 0e+00 | 0.0035414 | 0.0000000 | 0.0000000 | 0.0000000 | 0.4873845 | 0.2392590 | 0 |
151 | Cell Cycle | 558 | 0 | -0.1812352 | 0.2030000 | 0.0004268 | 0.0162591 | -0.0007608 | 0e+00 | 0.0000000 | 0.9863320 | 0.5139935 | 0.9756382 | 0.2726175 | 0.1360480 | 0 |
740 | Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC) | 93 | 0 | 0.4445730 | -0.2749916 | -0.4450181 | 0.2950806 | -0.2036313 | 0e+00 | 0.0000046 | 0.0000000 | 0.0000009 | 0.0006962 | 0.7744965 | 0.3850567 | 0 |
647 | Metabolism of proteins | 1576 | 0 | 0.0908130 | 0.0116023 | -0.0916274 | 0.0477584 | -0.1330572 | 0e+00 | 0.4505906 | 0.0000000 | 0.0018968 | 0.0000000 | 0.1917349 | 0.0944086 | 0 |
1034 | Role of phospholipids in phagocytosis | 53 | 0 | -0.5412433 | -0.5436180 | -0.4362963 | -0.5052732 | -0.6211954 | 0e+00 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 1.1916392 | 0.0671177 | 0 |
1058 | SRP-dependent cotranslational protein targeting to membrane | 110 | 0 | 0.4145042 | -0.2333609 | -0.3888310 | 0.2232792 | -0.2585703 | 0e+00 | 0.0000240 | 0.0000000 | 0.0000530 | 0.0000029 | 0.7029743 | 0.3472625 | 0 |
153 | Cell Cycle, Mitotic | 454 | 0 | -0.1943062 | 0.2097745 | 0.0075367 | 0.0150300 | 0.0025431 | 0e+00 | 0.0000000 | 0.7841918 | 0.5849686 | 0.9263743 | 0.2864427 | 0.1429336 | 0 |
1025 | Response of EIF2AK4 (GCN2) to amino acid deficiency | 99 | 0 | 0.4399403 | -0.1960310 | -0.3987896 | 0.2760461 | -0.1512304 | 0e+00 | 0.0007586 | 0.0000000 | 0.0000021 | 0.0093857 | 0.7000578 | 0.3499643 | 0 |
pw <- res$enrichment_result[1:30,4:8]
rownames(pw) <- res$enrichment_result[1:30,1]
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(as.matrix(pw),trace="none",col=colfunc(25),scale="none", margins = c(10,25), cexRow=0.6,cexCol=0.7,
main="Top ranked pathways")
Now I will look at the expression of the top ranked pathway genes (Creation of C4 and C2 activators) across the whole experiment. It will give us a good idea about what’s going on.
set <- head(res$enrichment_result$set,1)
genes <- genesets[[set]]
genes <- gt[match(genes,gt$gene),1]
xxn <- xx/rowSums(xx)*1000000
mx <- as.matrix(xxn[which(rownames(xxn) %in% genes),])
rownames(mx) <- sapply(strsplit(rownames(mx)," "),"[[",2)
colfunc <- colorRampPalette(c("blue", "white", "red"))
parti <- substr(colnames(mx), 1, 2)
values <- as.numeric(sss[match(parti,sss$ParticipantID),"quickdash"])
f <- colorRamp(c("yellow", "orange"))
rr <- range(values)
svals <- (values-rr[1])/diff(rr)
colcols <- rgb(f(svals)/255)
heatmap.2(mx,trace="none",col=colfunc(25),scale="row", margins = c(5,10),
cexRow=0.7, cexCol=0.7, main=set , ColSideColors = colcols )
mtext("yellow=low quickdash, orange=high quickdash")
pdf("c4c2activators_heatmap.pdf")
heatmap.2(mx,trace="none",col=colfunc(25),scale="row", margins = c(5,10),
cexRow=0.7, cexCol=0.7, main=set , ColSideColors = colcols )
mtext("yellow=low quickdash, orange=high quickdash")
dev.off()
## png
## 2
It looks like expression of immunoglubulins in some of the bone samples could explain the observed pathway enrichment.
The number of differentially expressed genes was relatively small. This is to be expected given that the sample number (n) is very small. Still, there were some statistically significant (FDR<0.05) differentially expressed genes in each tissue type (except in muscle). Some of the top ranked genes appear to be involved in inflammation and response to stress including interleukins, chemokines, complement components, nuclear response factors and immediate early response genes. Some of these were up and down regulated, which suggests a shifting balance between inflammatory and antiinflammatory signaling pathways. Using a gene expression heatmap of one of the top pathways, we see it is dominated by expression of innumoglobulins in bone. This could be an important finding or it might be due to contamination of bone samples with blood.
For reproducibility.
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] pkgload_1.3.2 GGally_2.1.2
## [3] beeswarm_0.4.0 gtools_3.9.4
## [5] echarts4r_0.4.4 kableExtra_1.3.4
## [7] topconfects_1.12.0 limma_3.52.4
## [9] eulerr_6.1.1 mitch_1.11.1
## [11] MASS_7.3-58.1 fgsea_1.22.0
## [13] gplots_3.1.3 DESeq2_1.36.0
## [15] SummarizedExperiment_1.26.1 Biobase_2.56.0
## [17] MatrixGenerics_1.8.1 matrixStats_0.63.0
## [19] GenomicRanges_1.48.0 GenomeInfoDb_1.32.4
## [21] IRanges_2.30.1 S4Vectors_0.34.0
## [23] BiocGenerics_0.42.0 reshape2_1.4.4
## [25] forcats_0.5.2 stringr_1.5.0
## [27] dplyr_1.0.10 purrr_0.3.5
## [29] readr_2.1.3 tidyr_1.2.1
## [31] tibble_3.1.8 ggplot2_3.4.0
## [33] tidyverse_1.3.2 zoo_1.8-11
##
## loaded via a namespace (and not attached):
## [1] readxl_1.4.1 backports_1.4.1 fastmatch_1.1-3
## [4] systemfonts_1.0.4 plyr_1.8.8 splines_4.2.1
## [7] BiocParallel_1.30.4 digest_0.6.30 htmltools_0.5.3
## [10] fansi_1.0.3 magrittr_2.0.3 memoise_2.0.1
## [13] googlesheets4_1.0.1 tzdb_0.3.0 Biostrings_2.64.1
## [16] annotate_1.74.0 modelr_0.1.10 svglite_2.1.0
## [19] timechange_0.1.1 prettyunits_1.1.1 colorspace_2.0-3
## [22] blob_1.2.3 rvest_1.0.3 haven_2.5.1
## [25] xfun_0.35 crayon_1.5.2 RCurl_1.98-1.9
## [28] jsonlite_1.8.4 genefilter_1.78.0 survival_3.4-0
## [31] glue_1.6.2 gtable_0.3.1 gargle_1.2.1
## [34] zlibbioc_1.42.0 XVector_0.36.0 webshot_0.5.4
## [37] DelayedArray_0.22.0 scales_1.2.1 DBI_1.1.3
## [40] Rcpp_1.0.9 viridisLite_0.4.1 xtable_1.8-4
## [43] progress_1.2.2 bit_4.0.5 htmlwidgets_1.5.4
## [46] httr_1.4.4 RColorBrewer_1.1-3 ellipsis_0.3.2
## [49] farver_2.1.1 pkgconfig_2.0.3 reshape_0.8.9
## [52] XML_3.99-0.13 sass_0.4.4 dbplyr_2.2.1
## [55] locfit_1.5-9.6 utf8_1.2.2 labeling_0.4.2
## [58] tidyselect_1.2.0 rlang_1.0.6 later_1.3.0
## [61] AnnotationDbi_1.58.0 munsell_0.5.0 cellranger_1.1.0
## [64] tools_4.2.1 cachem_1.0.6 cli_3.4.1
## [67] generics_0.1.3 RSQLite_2.2.19 broom_1.0.1
## [70] evaluate_0.18 fastmap_1.1.0 yaml_2.3.6
## [73] knitr_1.41 bit64_4.0.5 fs_1.5.2
## [76] caTools_1.18.2 KEGGREST_1.36.3 mime_0.12
## [79] xml2_1.3.3 compiler_4.2.1 rstudioapi_0.14
## [82] png_0.1-8 reprex_2.0.2 geneplotter_1.74.0
## [85] bslib_0.4.1 stringi_1.7.8 highr_0.9
## [88] lattice_0.20-45 Matrix_1.5-3 vctrs_0.5.1
## [91] pillar_1.8.1 lifecycle_1.0.3 jquerylib_0.1.4
## [94] data.table_1.14.6 bitops_1.0-7 httpuv_1.6.6
## [97] R6_2.5.1 promises_1.2.0.1 KernSmooth_2.23-20
## [100] gridExtra_2.3 codetools_0.2-18 assertthat_0.2.1
## [103] withr_2.5.0 GenomeInfoDbData_1.2.8 parallel_4.2.1
## [106] hms_1.1.2 grid_4.2.1 rmarkdown_2.18
## [109] googledrive_2.0.0 shiny_1.7.3 lubridate_1.9.0