Source: https://github.com/markziemann/shoulder-instability-osteroarthritis

Introduction

Here we are analysing the gene expression of six patients in these tissues.

B: Bone

C: Capsule

F: Fat

M: Muscle

S: Synovium

They have a range of QuickDASH scores which I will examine.

suppressPackageStartupMessages({
    library("zoo")
    library("tidyverse")
    library("reshape2")
    library("DESeq2")
    library("gplots")
    library("fgsea")
    library("MASS")
    library("mitch")
    library("eulerr")
    library("limma")
    library("topconfects")
    library("kableExtra")
})

Import read counts

tmp <- read.table("3col.tsv.gz",header=F)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)
accession <- sapply((strsplit(rownames(x),"\\|")),"[[",2)
symbol<-sapply((strsplit(rownames(x),"\\|")),"[[",6)
x$geneid <- paste(accession,symbol)
xx <- aggregate(. ~ geneid,x,sum)
rownames(xx) <- xx$geneid
colnames <- gsub("T0R","T0",colnames(xx))
xx$geneid = NULL
xx <- round(xx)
colnames(xx) <- sapply(strsplit(colnames(xx),"-"),"[[",2)
head(xx)
##                              41B 41C 41F 41M 41S 42B  42C 42F 42M 42S  43B  43C
## ENSG00000000003.15 TSPAN6    327 231 613  27 346 432  301 579  46 336  279  683
## ENSG00000000005.6 TNMD         9  45 477   1  30  52    2 275   1 308   17  356
## ENSG00000000419.13 DPM1      304 497 485 132 502 349  333 422 126 339  851  815
## ENSG00000000457.14 SCYL3     245 245 280  83 231 202  185 183  93 168  287  281
## ENSG00000000460.17 C1orf112  101  91  79  15  82  86   71  53   9  49  229  105
## ENSG00000000938.13 FGR      2962 828 661  91 736 562 1012 713  95 479 1294 1404
##                             43F 43M 43S  44B 44C 44F 44M 44S  45B 45C 45F 45M
## ENSG00000000003.15 TSPAN6   885  69 370  526 386 428  57 301   85 371 368  78
## ENSG00000000005.6 TNMD      772  12 387    3  22 480   2  10   60 157 246   4
## ENSG00000000419.13 DPM1     486 328 617  366 529 218 281 595  395 300 168 493
## ENSG00000000457.14 SCYL3    326 138 278  214 199 177 141 262  261 149 140 157
## ENSG00000000460.17 C1orf112  68  21  89  149  58  32  38 103  179  60  58  38
## ENSG00000000938.13 FGR      421  69 605 4440 775 406  74 803 8028 533 450  72
##                              45S 46B 46C 46F 46M  46S
## ENSG00000000003.15 TSPAN6    231 722 342 385  55  363
## ENSG00000000005.6 TNMD       139  40   4 171  13   11
## ENSG00000000419.13 DPM1      414 525 434 273 238  516
## ENSG00000000457.14 SCYL3     184 300 242 147 113  237
## ENSG00000000460.17 C1orf112   70 134  60  47  19  116
## ENSG00000000938.13 FGR      1392 620 968 362 364 1021

QC analysis

Here I’ll look at a few different quality control measures.

par(mar=c(5,8,3,1))
barplot(colSums(xx),horiz=TRUE,las=1,xlab="num reads")

sums <- colSums(xx)
sums <- sums[order(sums)]
barplot(sums,horiz=TRUE,las=1,xlab="num reads")
abline(v=15000000,col="red")

Sample sheet

URL="https://raw.githubusercontent.com/markziemann/shoulder-instability-osteroarthritis/main/OA_samplesheet.tsv"
ss <- read.table(URL,header=TRUE)
ss$col <- as.numeric(factor(ss$TissueType))
ss$id <- sapply(strsplit(ss$Dataset,"-"),"[[",1)
ss$Dataset <- sapply(strsplit(ss$Dataset,"-"),"[[",2)

URL="https://raw.githubusercontent.com/markziemann/shoulder-instability-osteroarthritis/main/OA_patients.tsv"
pat <- read.table(URL,header=TRUE,sep="\t")
pat$id <- sapply(strsplit(pat$Record_number," "),"[[",2)

MDS plot

Multidimensional scaling plot to show the variation between samples, very similar to PCA.

colvec <- c("orange2","pink","purple1","yellow","lightblue")
cols <- rep(colvec,6)

plot(cmdscale(dist(t(xx))), xlab="Coordinate 1", ylab="Coordinate 2", 
  type = "p",bty="n",pch=19, cex=4, col=cols)
text(cmdscale(dist(t(xx))), labels=ss$ParticipantID )

legend("topright", inset=0.1, title="tissue type", c("Bone","Capsule","Fat","Muscle","Synovium"),
  pch=19,col=colvec, cex=1.5)

Correlation heatmap

heatmap.2(cor(xx),trace="n",main="Pearson correlation heatmap")

There were 4 samples with fewer than 15M reads and this might impact data quality: 4002-42S, 4006-46B, 4005-42F and 4006-46C.

MDS plot shows samples clustered by tissue type as expected. For example M samples to the right, F samples in the centre, C samples in the lower right and S samples between C and F. However the B samples were somewhat not clustered, which is something to look into.

The result of the MDS is confirmed in the correlation heatmap.

Separate data by tissue

Also a good point to filter out any genes with low expression (average < 10 counts).

Below I show the dimensions of each dataset (separated by tissue type), in terms of number of genes detected and samples.

ssb <- subset(ss,TissueType=="B")
xxb <- xx[,which(colnames(xx) %in% ssb$Dataset)]
xxb <- xxb[which(rowMeans(xxb)>10),]

ssc <- subset(ss,TissueType=="C")
xxc <- xx[,which(colnames(xx) %in% ssc$Dataset)]
xxc <- xxc[which(rowMeans(xxc)>10),]

ssf <- subset(ss,TissueType=="F")
xxf <- xx[,which(colnames(xx) %in% ssf$Dataset)]
xxf <- xxf[which(rowMeans(xxf)>10),]

ssm <- subset(ss,TissueType=="M")
xxm <- xx[,which(colnames(xx) %in% ssm$Dataset)]
xxm <- xxm[which(rowMeans(xxm)>10),]

sss <- subset(ss,TissueType=="S")
xxs <- xx[,which(colnames(xx) %in% sss$Dataset)]
xxs <- xxs[which(rowMeans(xxs)>10),]

lapply(list(xx,xxb,xxc,xxf,xxm,xxs),dim)
## [[1]]
## [1] 60651    30
## 
## [[2]]
## [1] 20241     6
## 
## [[3]]
## [1] 18726     6
## 
## [[4]]
## [1] 19072     6
## 
## [[5]]
## [1] 15920     6
## 
## [[6]]
## [1] 19361     6

Analysis of gene expression correlates with basline quickdash scores

ssb$quickdash <- scale(pat[match(ssb$id,pat$id),"QuickDASH_baseline"])
dds <- DESeqDataSetFromMatrix(countData = xxb , colData = ssb, design = ~ quickdash )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge,20) %>% kbl(caption = "Top gene expression correlates with QuickDASH in bone") %>% kable_paper("hover", full_width = F)
Top gene expression correlates with QuickDASH in bone
baseMean log2FoldChange lfcSE stat pvalue padj 41B 42B 43B 44B 45B 46B
ENSG00000211638.2 IGLV8-61 440.91837 -2.931254 0.5159649 -5.681111 0.0000000 0.0002709 8.930659 4.205568 8.597901 3.501237 10.671973 7.232449
ENSG00000274419.6 TBC1D3D 46.71931 -3.506396 0.7067664 -4.961181 0.0000007 0.0048521 5.716401 1.861318 5.328218 1.861318 7.103516 6.290270
ENSG00000145423.5 SFRP2 460.55116 -1.786447 0.3604527 -4.956120 0.0000007 0.0048521 9.225523 5.445464 6.876516 6.747061 10.415843 9.162634
ENSG00000196436.8 NPIPB15 83.47206 -2.540972 0.5571418 -4.560727 0.0000051 0.0257955 7.177856 3.405667 3.060644 3.501237 7.693135 7.305542
ENSG00000271130.1 IGHV3OR16-8 27.99582 -3.564167 0.8218232 -4.336903 0.0000145 0.0584983 5.040078 1.861318 3.840140 1.861318 7.123073 2.839618
ENSG00000095752.7 IL11 298.67564 3.846244 0.9014247 4.266851 0.0000198 0.0668802 3.780852 3.079033 2.721216 10.789836 2.586595 4.535245
ENSG00000073756.13 PTGS2 141.73199 1.285224 0.3072010 4.183659 0.0000287 0.0829461 5.815436 8.399398 6.955978 8.123029 5.620540 5.551745
ENSG00000270149.5 AL591806.2 105.30108 -1.121805 0.2727571 -4.112835 0.0000391 0.0988847 7.021321 5.213977 6.373073 5.544574 8.085718 6.729005
ENSG00000281383.1 FP671120.7 60.01779 1.433140 0.3545168 4.042517 0.0000529 0.1189281 4.910451 6.903751 6.396072 7.003666 4.151981 4.419797
ENSG00000214940.8 NPIPA8 88.24011 -2.685878 0.6790047 -3.955611 0.0000763 0.1545185 4.767596 3.079033 3.514704 3.342451 8.967258 3.220658
ENSG00000130720.13 FIBCD1 12.86486 3.555247 0.9047775 3.929416 0.0000852 0.1566884 2.583587 3.767650 1.861318 6.248497 1.861318 1.861318
ENSG00000119508.18 NR4A3 210.56959 2.481732 0.6488879 3.824593 0.0001310 0.2106384 4.663511 10.190786 4.810847 4.675312 3.982721 5.350149
ENSG00000153993.13 SEMA3D 263.02297 1.762272 0.4617345 3.816634 0.0001353 0.2106384 7.681316 8.579345 7.144141 9.615365 3.787985 6.539845
ENSG00000229183.9 PGA4 13.52739 -3.644256 0.9753663 -3.736295 0.0001868 0.2700033 3.780852 1.861318 3.060644 1.861318 4.070076 6.083706
ENSG00000039537.14 C6 98.93901 -1.159004 0.3170880 -3.655151 0.0002570 0.3468372 6.489877 5.475657 6.147608 4.947782 7.775736 7.497997
ENSG00000134184.13 GSTM1 42.95966 -7.719138 2.1431726 -3.601734 0.0003161 0.3899222 1.861318 1.861318 1.861318 1.861318 1.861318 8.049712
ENSG00000223931.2 AC142381.1 57.50480 -1.931489 0.5376410 -3.592526 0.0003275 0.3899222 5.268256 3.538885 3.514704 3.984908 8.153817 4.478738
ENSG00000136634.7 IL10 70.99991 2.008719 0.5632486 3.566310 0.0003620 0.4071179 4.663511 8.387616 5.465747 4.335672 3.270504 5.001029
ENSG00000137077.8 CCL21 15.70882 -1.701534 0.4914766 -3.462085 0.0005360 0.5710177 3.975064 2.864993 4.098614 3.157099 6.005978 4.225850
ENSG00000153266.13 FEZF2 26.86972 2.060832 0.5992079 3.439260 0.0005833 0.5903348 1.861318 6.677080 3.840140 5.810519 3.270504 3.499068
dgeb <- dge
write.table(dgeb,file="deseq2_bone.tsv",quote=FALSE,sep='\t')

ssc$quickdash <- scale(pat[match(ssc$id,pat$id),"QuickDASH_baseline"])
dds <- DESeqDataSetFromMatrix(countData = xxc , colData = ssc, design = ~ quickdash )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge,20) %>% kbl(caption = "Top gene expression correlates with QuickDASH in capsule") %>% kable_paper("hover", full_width = F)
Top gene expression correlates with QuickDASH in capsule
baseMean log2FoldChange lfcSE stat pvalue padj 41C 42C 43C 44C 45C 46C
ENSG00000248144.6 ADH1C 49.68522 -2.0385840 0.3247349 -6.277686 0.00e+00 0.0000034 6.248232 4.879783 5.703559 5.255560 7.706001 6.636002
ENSG00000274419.6 TBC1D3D 73.36200 -4.1811328 0.6672278 -6.266425 0.00e+00 0.0000034 6.658670 4.403795 5.678897 4.403795 7.624139 7.922841
ENSG00000039537.14 C6 299.22535 -2.4279663 0.4208040 -5.769827 0.00e+00 0.0000486 10.016795 5.449903 7.719263 5.867846 9.028609 7.267074
ENSG00000142178.9 SIK1 110.70007 2.3193982 0.4383154 5.291619 1.00e-07 0.0004610 5.965574 7.119941 5.355726 9.086798 5.067327 5.773043
ENSG00000125730.17 C3 2208.32354 -1.8826918 0.3562169 -5.285240 1.00e-07 0.0004610 12.832108 8.257105 10.190717 8.351984 11.663371 10.183791
ENSG00000186431.19 FCAR 30.38540 2.5646086 0.4913965 5.219021 2.00e-07 0.0004781 5.078262 7.325994 5.242080 6.292669 4.403795 5.254083
ENSG00000166482.12 MFAP4 3157.73936 -0.9271698 0.1777349 -5.216589 2.00e-07 0.0004781 12.438460 10.259131 11.298717 10.421206 12.450246 11.519873
ENSG00000176945.17 MUC20 344.82717 -1.1779858 0.2346290 -5.020632 5.00e-07 0.0011821 9.574743 7.107541 8.080757 7.126881 9.084261 8.841379
ENSG00000211669.3 IGLV3-10 19.79640 -3.9906631 0.8137022 -4.904329 9.00e-07 0.0019127 5.008137 4.403795 4.403795 4.403795 7.260605 4.729155
ENSG00000159263.16 SIM2 117.11826 -1.1230524 0.2331657 -4.816542 1.50e-06 0.0026821 7.279374 5.919030 7.111892 6.451796 8.469127 7.102895
ENSG00000124107.5 SLPI 135.25645 -1.2871914 0.2741228 -4.695674 2.70e-06 0.0044357 8.295381 5.886992 7.401278 6.214895 7.919685 7.529725
ENSG00000188257.12 PLA2G2A 32861.23105 -1.5584845 0.3479723 -4.478760 7.50e-06 0.0113600 15.972762 11.078605 15.408404 12.933800 15.194721 15.362567
ENSG00000197766.8 CFD 10476.63157 -1.2618786 0.2833101 -4.454055 8.40e-06 0.0113600 14.658820 10.791169 12.998718 11.843394 13.934758 12.957715
ENSG00000163735.7 CXCL5 57.91874 2.7383282 0.6156688 4.447730 8.70e-06 0.0113600 4.707618 8.423334 5.281803 5.754112 4.737747 5.590062
ENSG00000128383.13 APOBEC3A 68.97410 1.2533513 0.2827128 4.433303 9.30e-06 0.0113600 5.915336 7.408607 6.106610 7.773314 5.873228 5.838137
ENSG00000136689.19 IL1RN 78.20007 1.3717917 0.3116023 4.402381 1.07e-05 0.0122876 5.989888 8.106177 6.824687 6.787031 5.439799 6.402780
ENSG00000145824.13 CXCL14 1562.57168 -1.2371931 0.2835391 -4.363396 1.28e-05 0.0138319 11.450332 8.940588 11.098540 8.177586 10.959583 10.863248
ENSG00000162747.12 FCGR3B 332.89923 1.4785624 0.3458081 4.275673 1.91e-05 0.0189012 7.158334 9.732960 6.338019 9.844083 6.942508 6.875852
ENSG00000205221.12 VIT 242.40661 -1.5450981 0.3618600 -4.269878 1.96e-05 0.0189012 9.380209 5.545030 7.375552 6.862148 8.900090 7.750909
ENSG00000122121.12 XPNPEP2 92.06925 -1.4711424 0.3566169 -4.125274 3.70e-05 0.0339967 8.059610 5.343401 6.918292 5.894526 7.590016 6.568593
dgec <- dge
write.table(dgec,file="deseq2_capsule.tsv",quote=FALSE,sep='\t')

ssf$quickdash <- scale(pat[match(ssf$id,pat$id),"QuickDASH_baseline"])
dds <- DESeqDataSetFromMatrix(countData = xxf , colData = ssf, design = ~ quickdash )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge,20) %>% kbl(caption = "Top gene expression correlates with QuickDASH in fat") %>% kable_paper("hover", full_width = F)
Top gene expression correlates with QuickDASH in fat
baseMean log2FoldChange lfcSE stat pvalue padj 41F 42F 43F 44F 45F 46F
ENSG00000073756.13 PTGS2 119.90779 3.7022183 0.6818779 5.429445 1.00e-07 0.0010781 4.734032 9.468892 4.825193 4.775833 3.498061 4.413838
ENSG00000278299.5 TBC1D3C 21.01376 -3.8365212 0.7254307 -5.288612 1.00e-07 0.0011753 5.291506 3.498061 3.498061 3.498061 6.857765 4.913062
ENSG00000196436.8 NPIPB15 52.46229 -2.5979241 0.4991609 -5.204582 2.00e-07 0.0012361 6.745880 4.262497 4.251850 4.152412 7.328100 6.680265
ENSG00000273294.1 C1QTNF3-AMACR 25.76986 -3.9396329 0.7653204 -5.147691 3.00e-07 0.0012574 5.349143 3.498061 3.498061 3.498061 6.857765 5.889474
ENSG00000019169.11 MARCO 1154.06922 -1.0710154 0.2205026 -4.857155 1.20e-06 0.0045424 10.428500 8.474129 9.269474 9.092977 11.571676 10.126887
ENSG00000205502.4 C2CD4B 204.79684 2.4598164 0.5102746 4.820574 1.40e-06 0.0045501 5.134134 10.143152 5.379069 6.061604 4.891409 5.473738
ENSG00000214940.8 NPIPA8 80.18742 -2.6254709 0.5669613 -4.630776 3.60e-06 0.0099255 5.064806 4.477605 4.338507 4.152412 8.850932 5.094199
ENSG00000258430.1 AL583722.1 21.01845 -3.2268797 0.7033708 -4.587736 4.50e-06 0.0104211 5.134134 3.498061 4.613647 3.498061 6.261418 6.082966
ENSG00000164694.17 FNDC1 1731.21142 -1.3540428 0.2964017 -4.568269 4.90e-06 0.0104211 10.629144 8.177412 9.233994 9.428736 12.584622 10.246329
ENSG00000164825.4 DEFB1 158.38918 -1.3044346 0.2959097 -4.408218 1.04e-05 0.0181199 7.372966 5.552629 7.900770 5.893925 8.427900 7.802637
ENSG00000215914.4 MMP23A 17.82400 -2.9069569 0.6595285 -4.407629 1.05e-05 0.0181199 4.991015 3.498061 4.725032 3.498061 6.346294 5.473738
ENSG00000169429.11 CXCL8 93.23866 2.4216015 0.5614329 4.313252 1.61e-05 0.0255677 4.826800 9.026023 5.287173 5.348505 4.593292 4.150954
ENSG00000153266.13 FEZF2 25.54163 2.5841825 0.6045138 4.274811 1.91e-05 0.0280651 4.225928 7.068360 4.416223 5.667402 4.140369 3.498061
ENSG00000261654.1 AL360270.2 356.86548 2.1074011 0.4975544 4.235519 2.28e-05 0.0310634 6.879139 10.852957 7.096235 5.833049 5.561161 5.889474
ENSG00000274419.6 TBC1D3D 48.89398 -2.3660608 0.5609489 -4.217961 2.47e-05 0.0313443 5.028511 4.477605 5.644417 3.498061 6.828880 7.440265
ENSG00000106976.21 DNM1 1568.41911 -0.9708034 0.2322853 -4.179358 2.92e-05 0.0338734 11.424041 9.081865 10.265546 9.475019 11.629322 10.161399
ENSG00000277027.1 RMRP 17.32159 -2.8477992 0.6825976 -4.172003 3.02e-05 0.0338734 5.099987 3.498061 4.871869 3.498061 5.942595 5.828673
ENSG00000100473.18 COCH 128.93323 -1.3349220 0.3223252 -4.141538 3.45e-05 0.0353936 7.835377 6.094936 6.324899 5.348505 8.640548 6.382682
ENSG00000215246.5 AC116351.1 47.55904 2.3707474 0.5731249 4.136528 3.53e-05 0.0353936 5.134134 8.023617 5.116596 4.775833 3.954101 4.413838
ENSG00000104921.15 FCER2 130.89946 -1.2035532 0.2934860 -4.100888 4.12e-05 0.0375994 8.151345 5.402904 6.809562 6.187101 8.053827 7.085475
dgef <- dge
write.table(dgef,file="deseq2_fat.tsv",quote=FALSE,sep='\t')

ssm$quickdash <- scale(pat[match(ssm$id,pat$id),"QuickDASH_baseline"])
dds <- DESeqDataSetFromMatrix(countData = xxm , colData = ssm, design = ~ quickdash )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge,20) %>% kbl(caption = "Top gene expression correlates with QuickDASH in muscle") %>% kable_paper("hover", full_width = F)
Top gene expression correlates with QuickDASH in muscle
baseMean log2FoldChange lfcSE stat pvalue padj 41M 42M 43M 44M 45M 46M
ENSG00000274419.6 TBC1D3D 19.41891 -3.6142012 0.7764588 -4.654724 0.0000032 0.0516470 5.482788 4.094096 4.710388 4.094096 5.934661 6.578833
ENSG00000160888.7 IER2 518.83277 1.4210313 0.3193194 4.450188 0.0000086 0.0612357 7.714334 11.051849 7.892072 8.332358 7.098796 8.135541
ENSG00000081041.9 CXCL2 76.78205 1.7761791 0.4104752 4.327128 0.0000151 0.0612357 4.808246 8.520217 5.875837 6.418142 5.287051 5.776348
ENSG00000171617.15 ENC1 190.11873 0.8976600 0.2076431 4.323090 0.0000154 0.0612357 6.962449 8.411764 7.872623 8.627597 6.851619 6.965453
ENSG00000196436.8 NPIPB15 16.76047 -2.9244111 0.7262633 -4.026654 0.0000566 0.1610387 5.398903 4.460127 4.451708 4.094096 6.337441 5.969007
ENSG00000134184.13 GSTM1 119.57051 -8.5934024 2.1429395 -4.010100 0.0000607 0.1610387 4.094096 4.094096 4.094096 4.094096 4.094096 9.553037
ENSG00000154734.16 ADAMTS1 666.03357 0.9346697 0.2388152 3.913778 0.0000909 0.2066490 8.827198 10.886483 9.145379 9.225506 8.121563 8.668636
ENSG00000204460.3 LINC01854 381.98127 1.0571224 0.2877602 3.673623 0.0002391 0.4709364 8.706825 8.866629 8.038808 10.070861 7.117544 6.996709
ENSG00000224687.2 RASAL2-AS1 38.21144 2.0203818 0.5541188 3.646117 0.0002662 0.4709364 4.808246 4.904082 4.885817 7.868738 4.819832 5.117292
ENSG00000081181.8 ARG2 139.55961 -1.4436456 0.3990482 -3.617723 0.0002972 0.4731530 5.941149 5.726977 5.957945 6.267292 9.412878 6.399891
ENSG00000168621.15 GDNF 610.01979 0.9797612 0.2741525 3.573782 0.0003519 0.5092399 8.931970 10.869537 8.249185 9.371930 8.121563 8.000207
ENSG00000136244.12 IL6 13.43905 2.4760095 0.7132823 3.471290 0.0005180 0.6871673 4.453894 6.656734 4.803969 4.698627 4.421385 4.559315
ENSG00000122877.16 EGR2 27.52304 1.7208326 0.5036746 3.416556 0.0006342 0.7766346 4.601627 7.254736 5.251880 5.278482 4.744556 5.410203
ENSG00000162772.17 ATF3 180.42876 1.0884805 0.3249612 3.349570 0.0008094 0.9203697 5.734328 8.688426 6.844778 8.616434 6.432825 7.739545
ENSG00000125740.14 FOSB 727.74879 3.5458419 1.0869717 3.262129 0.0011058 0.9999305 4.453894 12.028100 7.467668 4.870848 4.819832 6.646910
ENSG00000170345.10 FOS 5286.85345 3.0332649 0.9371373 3.236735 0.0012091 0.9999305 6.094375 14.861025 8.847656 8.446097 6.432825 10.221335
ENSG00000177606.8 JUN 7279.99453 0.9818296 0.3081936 3.185756 0.0014438 0.9999305 11.723421 14.584456 11.885568 11.831670 11.273073 12.558502
ENSG00000267247.1 AP005264.2 43.98595 1.1211783 0.3547236 3.160710 0.0015739 0.9999305 5.797137 7.470043 5.931180 5.926055 5.364608 5.410203
ENSG00000159399.10 HK2 766.44623 -0.7636722 0.2417055 -3.159515 0.0015803 0.9999305 9.865551 7.884404 9.131868 9.139829 10.107042 10.454723
ENSG00000128965.13 CHAC1 31.98198 1.4449630 0.4574674 3.158614 0.0015852 0.9999305 5.207028 7.296406 5.251880 5.409718 4.744556 5.776348
dgem <- dge
write.table(dgem,file="deseq2_muscle.tsv",quote=FALSE,sep='\t')

sss$quickdash <- scale(pat[match(sss$id,pat$id),"QuickDASH_baseline"])
dds <- DESeqDataSetFromMatrix(countData = xxs , colData = sss, design = ~ quickdash )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge,20) %>% kbl(caption = "Top gene expression correlates with QuickDASH in synovium") %>% kable_paper("hover", full_width = F)
Top gene expression correlates with QuickDASH in synovium
baseMean log2FoldChange lfcSE stat pvalue padj 41S 42S 43S 44S 45S 46S
ENSG00000119508.18 NR4A3 481.25337 2.5912836 0.4224586 6.133817 0.0e+00 0.0000089 6.342950 11.312129 7.252746 7.529529 5.661256 6.794226
ENSG00000211893.4 IGHG2 610.05143 -2.7334236 0.4464040 -6.123206 0.0e+00 0.0000089 6.594382 5.936293 5.962233 5.715193 11.567969 9.184508
ENSG00000198435.4 NRARP 381.16544 1.1179232 0.1899825 5.884347 0.0e+00 0.0000258 8.244739 9.925151 8.107532 9.270060 7.120048 7.842529
ENSG00000137962.13 ARHGAP29 654.33270 0.7928283 0.1461255 5.425668 1.0e-07 0.0002795 9.006135 10.173490 9.329650 10.002710 8.044563 9.176952
ENSG00000069122.19 ADGRF5 958.81660 0.6851472 0.1322419 5.181016 2.0e-07 0.0008545 9.642179 10.460410 9.784922 10.690402 8.798755 9.682777
ENSG00000160200.18 CBS 153.06842 -1.2082613 0.2348533 -5.144749 3.0e-07 0.0008644 7.266678 6.662419 7.209423 6.090989 8.967252 7.535559
ENSG00000172201.12 ID4 1039.66904 0.8189104 0.1620821 5.052442 4.0e-07 0.0011039 9.646803 11.144889 9.912144 10.429460 8.931841 9.224151
ENSG00000091879.14 ANGPT2 263.11042 1.0103110 0.2003035 5.043900 5.0e-07 0.0011039 7.618551 8.864783 7.501996 9.301480 6.891241 7.861650
ENSG00000128917.8 DLL4 474.37653 0.8229100 0.1672282 4.920881 9.0e-07 0.0018534 8.392112 9.962565 9.172649 9.315911 7.897936 8.290204
ENSG00000255422.3 AP002954.1 34.26502 -1.4750875 0.3026374 -4.874108 1.1e-06 0.0021162 6.048514 5.284400 5.839404 5.209793 6.984841 6.504942
ENSG00000102755.12 FLT1 765.33467 0.6504951 0.1366575 4.760038 1.9e-06 0.0033008 9.274835 10.300166 9.337561 10.250167 8.628099 9.466493
ENSG00000153721.19 CNKSR3 270.61973 0.9462839 0.1992663 4.748841 2.0e-06 0.0033008 7.282919 9.416852 8.112175 8.592940 7.154720 7.829637
ENSG00000211660.3 IGLV2-23 127.58748 -4.7808741 1.0122016 -4.723243 2.3e-06 0.0034569 4.842938 4.439336 4.849674 4.439336 9.585722 6.197751
ENSG00000142178.9 SIK1 83.51102 2.5309590 0.5568599 4.545056 5.5e-06 0.0075952 5.961863 6.699988 6.249406 8.681734 4.897052 4.439336
ENSG00000270550.1 IGHV3-30 64.50448 -3.2358005 0.7200406 -4.493914 7.0e-06 0.0088325 5.133949 4.439336 4.729976 4.947109 8.607821 6.059306
ENSG00000239922.2 AC092958.1 52.08835 -1.0543928 0.2353789 -4.479555 7.5e-06 0.0088325 6.746456 5.663994 6.231031 5.801409 7.346255 6.398354
ENSG00000224799.1 AL139397.1 22.12071 3.9014647 0.8728998 4.469545 7.8e-06 0.0088325 4.439336 7.360643 5.200730 4.947109 4.439336 4.439336
ENSG00000196436.8 NPIPB15 94.67156 -2.2311487 0.5003033 -4.459592 8.2e-06 0.0088325 7.322710 5.570767 4.849674 4.947109 8.510659 7.204041
ENSG00000164035.10 EMCN 921.78699 0.7818082 0.1766971 4.424567 9.7e-06 0.0092012 9.262829 10.632730 9.374555 10.648705 8.599629 9.858255
ENSG00000120279.6 MYCT1 300.89766 0.8354745 0.1890875 4.418453 9.9e-06 0.0092012 8.014715 9.307625 8.354564 8.697591 6.997701 8.193281
dges <- dge
write.table(dges,file="deseq2_synovium.tsv",quote=FALSE,sep='\t')

Here let’s look at some plots.

MA plot shows the avrage level and fold change of all detected genes. Volcano plot shows the fold change and the significance, as measured by -log(p-value). Significant genes are shown as red points.

There are heatmaps of the top ranked genes by p-value. Above the gene expression values there is a bar in yellow/orange colours. Yellow shows relatively low QuickDASH score and orange shows high score.

maplot <- function(de,contrast_name) {
  sig <-subset(de, padj < 0.05 )
  up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
  dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
  GENESUP <- length(up)
  GENESDN <- length(dn)
  DET=nrow(de)
  SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
  ns <-subset(de, padj > 0.05 )
  plot(log2(de$baseMean),de$log2FoldChange, 
       xlab="log2 basemean", ylab="log2 foldchange",
       pch=19, cex=0.5, col="dark gray",
       main=contrast_name, cex.main=1)
  points(log2(sig$baseMean),sig$log2FoldChange,
         pch=19, cex=0.5, col="red")
  mtext(SUBHEADER,cex = 1)
}

make_volcano <- function(de,name) {
    sig <- subset(de,padj<0.05)
    N_SIG=nrow(sig)
    N_UP=nrow(subset(sig,log2FoldChange>0))
    N_DN=nrow(subset(sig,log2FoldChange<0))
    DET=nrow(de)
    HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
    plot(de$log2FoldChange,-log10(de$pval),cex=0.5,pch=19,col="darkgray",
        main=name, xlab="log2 FC", ylab="-log10 pval", xlim=c(-6,6))
    mtext(HEADER)
    grid()
    points(sig$log2FoldChange,-log10(sig$pval),cex=0.5,pch=19,col="red")
}

make_heatmap <- function(de,name,myss,mx,n=30){
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  values <- myss$quickdash
  f <- colorRamp(c("yellow", "orange"))
  rr <- range(values)
  svals <- (values-rr[1])/diff(rr)
  colcols <- rgb(f(svals)/255)
  mxn <- mx/rowSums(mx)*1000000
  x <- mxn[which(rownames(mxn) %in% rownames(head(de,n))),]
  heatmap.2(as.matrix(x),trace="none",col=colfunc(25),scale="row", margins = c(10,25), cexRow=1.5, 
    main=paste("Top ranked",n,"genes in",name) , ColSideColors = colcols )
}

maplot(dgeb,"bone")

make_volcano(dgeb,"bone")

make_heatmap(dgeb,"bone",ssb,xxb,n=20)

maplot(dgec,"capsule")

make_volcano(dgec,"capsule")

make_heatmap(dgec,"capsule",ssc,xxc,n=20)

maplot(dgef,"fat")

make_volcano(dgef,"fat")

make_heatmap(dgef,"fat",ssf,xxf,n=20)

maplot(dgem,"muscle")

make_volcano(dgem,"muscle")

make_heatmap(dgem,"muscle",ssm,xxm,n=20)

maplot(dges,"synovium")

make_volcano(dges,"synovium")

make_heatmap(dges,"synovium",sss,xxs,n=20)

Pathway enrichment

Here I’m using the mitch package and gene pathways from Reactome to understand the affected pathways separately for each tissue type.

Reactome pathways downloaded 24th Nov 2021.

#download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip", destfile="ReactomePathways.gmt.zip")
#unzip("ReactomePathways.gmt.zip")
genesets <- gmt_import("ReactomePathways.gmt")

# gene table
gt <- as.data.frame(rownames(xx))
gt$gene <- sapply(strsplit(gt[,1]," "),"[[",2)

m <- mitch_import(dgeb, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 20241
## Note: no. genes in output = 20208
## Note: estimated proportion of input genes in output = 0.998
mb <- mitch_calc(m, genesets, priority="significance")
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
head(mb$enrichment_result,20) %>% kbl(caption = "Top gene pathway correlates with QuickDASH in bone") %>% kable_paper("hover", full_width = F)
Top gene pathway correlates with QuickDASH in bone
set setSize pANOVA s.dist p.adjustANOVA
197 Classical antibody-mediated complement activation 73 0 -0.8228846 0
379 FCGR activation 80 0 -0.7774034 0
1120 Scavenging of heme from plasma 74 0 -0.8016828 0
225 Creation of C4 and C2 activators 78 0 -0.7756990 0
555 Initial triggering of complement 85 0 -0.7419084 0
1077 Role of LAT2/NTAL/LAB on calcium mobilization 78 0 -0.7241558 0
125 CD22 mediated BCR regulation 61 0 -0.7912800 0
1078 Role of phospholipids in phagocytosis 93 0 -0.6392111 0
541 Immunoregulatory interactions between a Lymphoid and a non-Lymphoid cell 178 0 -0.4475225 0
377 FCERI mediated MAPK activation 94 0 -0.5995284 0
1008 Regulation of Complement cascade 102 0 -0.5698153 0
209 Complement cascade 109 0 -0.5505979 0
376 FCERI mediated Ca+2 mobilization 93 0 -0.5870080 0
112 Binding and Uptake of Ligands by Scavenger Receptors 102 0 -0.5573626 0
76 Antigen activates B Cell Receptor (BCR) leading to generation of second messengers 86 0 -0.5856265 0
381 FCGR3A-mediated phagocytosis 127 0 -0.4821218 0
625 Leishmania phagocytosis 127 0 -0.4821218 0
835 Parasite infection 127 0 -0.4821218 0
1036 Regulation of actin dynamics for phagocytic cup formation 129 0 -0.4707796 0
380 FCGR3A-mediated IL10 synthesis 104 0 -0.5238347 0
write.table(mb$enrichment_result,file="mitch_bone.tsv",quote=FALSE,sep='\t')

m <- mitch_import(dgec, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 18726
## Note: no. genes in output = 18694
## Note: estimated proportion of input genes in output = 0.998
mc <- mitch_calc(m, genesets, priority="significance")
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
head(mc$enrichment_result,20) %>% kbl(caption = "Top gene pathway correlates with QuickDASH in capsule") %>% kable_paper("hover", full_width = F)
Top gene pathway correlates with QuickDASH in capsule
set setSize pANOVA s.dist p.adjustANOVA
554 Initial triggering of complement 49 0 -0.8492346 0
199 Classical antibody-mediated complement activation 38 0 -0.9169140 0
227 Creation of C4 and C2 activators 42 0 -0.8686289 0
163 Cell Cycle 611 0 0.2275055 0
165 Cell Cycle, Mitotic 495 0 0.2394929 0
211 Complement cascade 72 0 -0.6135992 0
1003 Regulation of Complement cascade 66 0 -0.6355730 0
1113 Scavenging of heme from plasma 39 0 -0.8065687 0
114 Binding and Uptake of Ligands by Scavenger Receptors 66 0 -0.6089204 0
164 Cell Cycle Checkpoints 252 0 0.3122514 0
380 FCGR activation 45 0 -0.7008383 0
633 M Phase 354 0 0.2381895 0
127 CD22 mediated BCR regulation 36 0 -0.7382981 0
702 Mitotic Prometaphase 184 0 0.3168575 0
1059 Resolution of Sister Chromatid Cohesion 104 0 0.4070778 0
1071 Role of LAT2/NTAL/LAB on calcium mobilization 42 0 -0.6355963 0
1129 Separation of Sister Chromatids 167 0 0.3030886 0
704 Mitotic Spindle Checkpoint 108 0 0.3756511 0
698 Mitotic Anaphase 222 0 0.2610886 0
701 Mitotic Metaphase and Anaphase 223 0 0.2603206 0
write.table(mc$enrichment_result,file="mitch_capsule.tsv",quote=FALSE,sep='\t')

m <- mitch_import(dgef, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 19072
## Note: no. genes in output = 19042
## Note: estimated proportion of input genes in output = 0.998
mf <- mitch_calc(m, genesets, priority="significance")
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
head(mf$enrichment_result,20) %>% kbl(caption = "Top gene pathway correlates with QuickDASH in fat") %>% kable_paper("hover", full_width = F)
Top gene pathway correlates with QuickDASH in fat
set setSize pANOVA s.dist p.adjustANOVA
199 Classical antibody-mediated complement activation 53 0 -0.7386113 0
562 Initial triggering of complement 66 0 -0.6591021 0
670 Metabolism of RNA 679 0 -0.2082095 0
1448 rRNA processing in the nucleus and cytosol 190 0 -0.3744877 0
227 Creation of C4 and C2 activators 59 0 -0.6656491 0
152 Cap-dependent Translation Initiation 118 0 -0.4694532 0
372 Eukaryotic Translation Initiation 118 0 -0.4694532 0
1014 Regulation of Complement cascade 78 0 -0.5736497 0
114 Binding and Uptake of Ligands by Scavenger Receptors 82 0 -0.5581957 0
660 Major pathway of rRNA processing in the nucleolus and cytosol 180 0 -0.3769113 0
1358 Translation 294 0 -0.2946056 0
420 Formation of a pool of free 40S subunits 100 0 -0.5009133 0
371 Eukaryotic Translation Elongation 93 0 -0.5167050 0
620 L13a-mediated translational silencing of Ceruloplasmin expression 110 0 -0.4712416 0
211 Complement cascade 86 0 -0.5278466 0
844 Peptide chain elongation 88 0 -0.5205426 0
682 Metabolism of proteins 1690 0 -0.1240629 0
554 Infectious disease 772 0 -0.1762982 0
462 GTP hydrolysis and joining of the 60S ribosomal subunit 111 0 -0.4537003 0
373 Eukaryotic Translation Termination 92 0 -0.4921361 0
write.table(mf$enrichment_result,file="mitch_fat.tsv",quote=FALSE,sep='\t')

m <- mitch_import(dgem, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 15920
## Note: no. genes in output = 15898
## Note: estimated proportion of input genes in output = 0.999
mm <- mitch_calc(m, genesets, priority="significance")
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
head(mm$enrichment_result,20) %>% kbl(caption = "Top gene pathway correlates with QuickDASH in muscle") %>% kable_paper("hover", full_width = F)
Top gene pathway correlates with QuickDASH in muscle
set setSize pANOVA s.dist p.adjustANOVA
193 Classical antibody-mediated complement activation 49 0 -0.8113652 0
221 Creation of C4 and C2 activators 53 0 -0.7679597 0
1086 Scavenging of heme from plasma 49 0 -0.7959106 0
371 FCGR activation 54 0 -0.7529127 0
124 CD22 mediated BCR regulation 46 0 -0.7751840 0
542 Initial triggering of complement 59 0 -0.6536141 0
977 Regulation of Complement cascade 72 0 -0.5870193 0
1044 Role of LAT2/NTAL/LAB on calcium mobilization 53 0 -0.6759218 0
205 Complement cascade 77 0 -0.5557015 0
111 Binding and Uptake of Ligands by Scavenger Receptors 75 0 -0.5564212 0
1045 Role of phospholipids in phagocytosis 65 0 -0.5774065 0
76 Antigen activates B Cell Receptor (BCR) leading to generation of second messengers 69 0 -0.5554097 0
644 Metabolism of RNA 676 0 0.1727597 0
368 FCERI mediated Ca+2 mobilization 67 0 -0.5331755 0
372 FCGR3A-mediated IL10 synthesis 77 0 -0.4871341 0
373 FCGR3A-mediated phagocytosis 102 0 -0.3995477 0
609 Leishmania phagocytosis 102 0 -0.3995477 0
810 Parasite infection 102 0 -0.3995477 0
369 FCERI mediated MAPK activation 69 0 -0.4829413 0
163 Cell surface interactions at the vascular wall 146 0 -0.3128196 0
write.table(mm$enrichment_result,file="mitch_muscle.tsv",quote=FALSE,sep='\t')

m <- mitch_import(dges, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 19361
## Note: no. genes in output = 19327
## Note: estimated proportion of input genes in output = 0.998
ms <- mitch_calc(m, genesets, priority="significance")
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
head(ms$enrichment_result,20) %>% kbl(caption = "Top gene pathway correlates with QuickDASH in synovium") %>% kable_paper("hover", full_width = F)
Top gene pathway correlates with QuickDASH in synovium
set setSize pANOVA s.dist p.adjustANOVA
664 Metabolism 1829 0 -0.1915636 0
561 Innate Immune System 939 0 -0.2522998 0
678 Metabolism of proteins 1695 0 -0.1721900 0
543 Immune System 1744 0 -0.1653825 0
302 Disease 1446 0 -0.1774727 0
200 Classical antibody-mediated complement activation 45 0 -0.9325842 0
383 FCGR activation 52 0 -0.8437035 0
558 Initial triggering of complement 57 0 -0.8032102 0
550 Infectious disease 761 0 -0.2204582 0
228 Creation of C4 and C2 activators 50 0 -0.8390206 0
128 CD22 mediated BCR regulation 43 0 -0.9023531 0
765 Neutrophil degranulation 441 0 -0.2813883 0
1008 Regulation of Complement cascade 74 0 -0.6729074 0
1355 Translation 294 0 -0.3341908 0
212 Complement cascade 81 0 -0.6265326 0
1077 Role of LAT2/NTAL/LAB on calcium mobilization 49 0 -0.7993568 0
1120 Scavenging of heme from plasma 46 0 -0.8195701 0
385 FCGR3A-mediated phagocytosis 99 0 -0.5504704 0
627 Leishmania phagocytosis 99 0 -0.5504704 0
836 Parasite infection 99 0 -0.5504704 0
write.table(ms$enrichment_result,file="mitch_synovium.tsv",quote=FALSE,sep='\t')

Mitch 1D reports

Here I’m using the mitch package to identify the most enriched pathways across all tissues.

There is a heatmap showing the QuickDASH association correlation across tissues.

Then there is the results of the multi-contrast pathway analysis with mitch in the form of a table and heatmap of pathway enrichment.

x <- list("bone"=dgeb, "capsule"=dgec, "fat"=dgef , "muscle"=dgem, "synovium"=dges)

y <- mitch_import(x, DEtype="deseq2",geneTable=gt)
## Note: Mean no. genes in input = 18664
## Note: no. genes in output = 14927
## Note: estimated proportion of input genes in output = 0.8
heatmap.2(cor(y,method="s"),trace="none",scale="none",margins = c(10,10),main="Spearman correlation across tissues") 

res <- mitch_calc(y, genesets, priority="significance")
## Note: When prioritising by significance (ie: small 
##             p-values), large effect sizes might be missed.
head(res$enrichment_result,30) %>% kbl(caption = "Top gene pathway correlates with QuickDASH across all tissues") %>% kable_paper("hover", full_width = F)
Top gene pathway correlates with QuickDASH across all tissues
set setSize pMANOVA s.bone s.capsule s.fat s.muscle s.synovium p.bone p.capsule p.fat p.muscle p.synovium s.dist SD p.adjustMANOVA
215 Creation of C4 and C2 activators 40 0 -0.8335729 -0.8741519 -0.6901961 -0.7715423 -0.8582690 0e+00 0.0000000 0.0000000 0.0000000 0.0000000 1.8075562 0.0753887 0
187 Classical antibody-mediated complement activation 37 0 -0.8376200 -0.9215000 -0.7383697 -0.7861834 -0.9195578 0e+00 0.0000000 0.0000000 0.0000000 0.0000000 1.8866982 0.0809324 0
534 Initial triggering of complement 46 0 -0.7683594 -0.8811294 -0.7002422 -0.6236900 -0.8183561 0e+00 0.0000000 0.0000000 0.0000000 0.0000000 1.7075714 0.1003563 0
365 FCGR activation 42 0 -0.7696147 -0.7424317 -0.5174944 -0.7138154 -0.8511349 0e+00 0.0000000 0.0000000 0.0000000 0.0000000 1.6264302 0.1236950 0
1075 Scavenging of heme from plasma 37 0 -0.8238687 -0.8111049 -0.6080373 -0.7658360 -0.8069664 0e+00 0.0000000 0.0000000 0.0000000 0.0000000 1.7158262 0.0894056 0
199 Complement cascade 63 0 -0.5514058 -0.6799938 -0.5282135 -0.5084000 -0.6710792 0e+00 0.0000000 0.0000000 0.0000000 0.0000000 1.3244875 0.0815699 0
118 CD22 mediated BCR regulation 35 0 -0.8247726 -0.7361613 -0.6163041 -0.7391428 -0.8806262 0e+00 0.0000000 0.0000000 0.0000000 0.0000000 1.7099265 0.1004941 0
966 Regulation of Complement cascade 59 0 -0.5370583 -0.6942472 -0.5424025 -0.5201160 -0.7008454 0e+00 0.0000000 0.0000000 0.0000000 0.0000000 1.3514127 0.0904257 0
354 Eukaryotic Translation Elongation 92 0 0.4986694 -0.2785671 -0.4854604 0.3302853 -0.2194048 0e+00 0.0000039 0.0000000 0.0000000 0.0002788 0.8480386 0.4226100 0
396 Formation of a pool of free 40S subunits 99 0 0.4486993 -0.2671495 -0.4691070 0.3195047 -0.1837964 0e+00 0.0000044 0.0000000 0.0000000 0.0015927 0.7928592 0.3949728 0
805 Peptide chain elongation 87 0 0.4875329 -0.2849010 -0.4898720 0.3149689 -0.2080274 0e+00 0.0000044 0.0000000 0.0000004 0.0008043 0.8374439 0.4167766 0
537 Innate Immune System 877 0 -0.1087803 0.0175375 -0.0373746 -0.0426511 -0.2098180 1e-07 0.3828449 0.0629070 0.0337995 0.0000000 0.2436807 0.0870846 0
732 Neutrophil degranulation 424 0 -0.1427639 0.1054821 -0.0091764 -0.0585129 -0.2418029 5e-07 0.0002083 0.7470223 0.0396870 0.0000000 0.3057526 0.1317518 0
591 L13a-mediated translational silencing of Ceruloplasmin expression 109 0 0.4178082 -0.2187855 -0.4373840 0.2992666 -0.1539264 0e+00 0.0000806 0.0000000 0.0000001 0.0055471 0.7259403 0.3623737 0
105 Binding and Uptake of Ligands by Scavenger Receptors 63 0 -0.4506168 -0.6016710 -0.5267291 -0.4918478 -0.5404835 0e+00 0.0000000 0.0000000 0.0000000 0.0000000 1.1732620 0.0563822 0
436 GTP hydrolysis and joining of the 60S ribosomal subunit 110 0 0.4180088 -0.2274206 -0.4186911 0.2965844 -0.1601355 0e+00 0.0000384 0.0000000 0.0000001 0.0037503 0.7178853 0.3583571 0
1350 Viral mRNA Translation 87 0 0.4770595 -0.2867460 -0.4576293 0.3044227 -0.2030564 0e+00 0.0000038 0.0000000 0.0000009 0.0010709 0.8081699 0.4023776 0
1033 Role of LAT2/NTAL/LAB on calcium mobilization 41 0 -0.6536834 -0.6300698 -0.4592398 -0.6124465 -0.7492586 0e+00 0.0000000 0.0000004 0.0000000 0.0000000 1.4041618 0.1046903 0
140 Cap-dependent Translation Initiation 117 0 0.3775135 -0.2054225 -0.4355846 0.2977452 -0.1627879 0e+00 0.0001260 0.0000000 0.0000000 0.0023810 0.6997149 0.3486749 0
355 Eukaryotic Translation Initiation 117 0 0.3775135 -0.2054225 -0.4355846 0.2977452 -0.1627879 0e+00 0.0001260 0.0000000 0.0000000 0.0023810 0.6997149 0.3486749 0
356 Eukaryotic Translation Termination 91 0 0.4554573 -0.2872549 -0.4585386 0.3001090 -0.2214409 0e+00 0.0000022 0.0000000 0.0000008 0.0002641 0.7995723 0.3969746 0
1079 Selenocysteine synthesis 91 0 0.4582883 -0.2732068 -0.4579742 0.2924858 -0.1902856 0e+00 0.0000067 0.0000000 0.0000014 0.0017204 0.7849631 0.3906212 0
1306 Translation 293 0 0.2013213 -0.0993404 -0.2478766 0.2183181 -0.2793551 0e+00 0.0035414 0.0000000 0.0000000 0.0000000 0.4873845 0.2392590 0
151 Cell Cycle 558 0 -0.1812352 0.2030000 0.0004268 0.0162591 -0.0007608 0e+00 0.0000000 0.9863320 0.5139935 0.9756382 0.2726175 0.1360480 0
740 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC) 93 0 0.4445730 -0.2749916 -0.4450181 0.2950806 -0.2036313 0e+00 0.0000046 0.0000000 0.0000009 0.0006962 0.7744965 0.3850567 0
647 Metabolism of proteins 1576 0 0.0908130 0.0116023 -0.0916274 0.0477584 -0.1330572 0e+00 0.4505906 0.0000000 0.0018968 0.0000000 0.1917349 0.0944086 0
1034 Role of phospholipids in phagocytosis 53 0 -0.5412433 -0.5436180 -0.4362963 -0.5052732 -0.6211954 0e+00 0.0000000 0.0000000 0.0000000 0.0000000 1.1916392 0.0671177 0
1058 SRP-dependent cotranslational protein targeting to membrane 110 0 0.4145042 -0.2333609 -0.3888310 0.2232792 -0.2585703 0e+00 0.0000240 0.0000000 0.0000530 0.0000029 0.7029743 0.3472625 0
153 Cell Cycle, Mitotic 454 0 -0.1943062 0.2097745 0.0075367 0.0150300 0.0025431 0e+00 0.0000000 0.7841918 0.5849686 0.9263743 0.2864427 0.1429336 0
1025 Response of EIF2AK4 (GCN2) to amino acid deficiency 99 0 0.4399403 -0.1960310 -0.3987896 0.2760461 -0.1512304 0e+00 0.0007586 0.0000000 0.0000021 0.0093857 0.7000578 0.3499643 0
pw <- res$enrichment_result[1:30,4:8]
rownames(pw) <- res$enrichment_result[1:30,1]
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(as.matrix(pw),trace="none",col=colfunc(25),scale="none", margins = c(10,25), cexRow=0.6,cexCol=0.7,
    main="Top ranked pathways")

Mitch 5D report

Now I will look at the expression of the top ranked pathway genes (Creation of C4 and C2 activators) across the whole experiment. It will give us a good idea about what’s going on.

set <- head(res$enrichment_result$set,1)
genes <- genesets[[set]]
genes <- gt[match(genes,gt$gene),1]
xxn <- xx/rowSums(xx)*1000000
mx <- as.matrix(xxn[which(rownames(xxn) %in% genes),])
rownames(mx) <- sapply(strsplit(rownames(mx)," "),"[[",2)
colfunc <- colorRampPalette(c("blue", "white", "red"))

parti <-  substr(colnames(mx), 1, 2)
values <- as.numeric(sss[match(parti,sss$ParticipantID),"quickdash"])
f <- colorRamp(c("yellow", "orange"))
rr <- range(values)
svals <- (values-rr[1])/diff(rr)
colcols <- rgb(f(svals)/255)

heatmap.2(mx,trace="none",col=colfunc(25),scale="row", margins = c(5,10), 
    cexRow=0.7, cexCol=0.7, main=set , ColSideColors = colcols )
mtext("yellow=low quickdash, orange=high quickdash")

pdf("c4c2activators_heatmap.pdf")

heatmap.2(mx,trace="none",col=colfunc(25),scale="row", margins = c(5,10), 
    cexRow=0.7, cexCol=0.7, main=set , ColSideColors = colcols )
mtext("yellow=low quickdash, orange=high quickdash")

dev.off()
## png 
##   2

It looks like expression of immunoglubulins in some of the bone samples could explain the observed pathway enrichment.

Conclusions

The number of differentially expressed genes was relatively small. This is to be expected given that the sample number (n) is very small. Still, there were some statistically significant (FDR<0.05) differentially expressed genes in each tissue type (except in muscle). Some of the top ranked genes appear to be involved in inflammation and response to stress including interleukins, chemokines, complement components, nuclear response factors and immediate early response genes. Some of these were up and down regulated, which suggests a shifting balance between inflammatory and antiinflammatory signaling pathways. Using a gene expression heatmap of one of the top pathways, we see it is dominated by expression of innumoglobulins in bone. This could be an important finding or it might be due to contamination of bone samples with blood.

Session information

For reproducibility.

sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] pkgload_1.3.2               GGally_2.1.2               
##  [3] beeswarm_0.4.0              gtools_3.9.4               
##  [5] echarts4r_0.4.4             kableExtra_1.3.4           
##  [7] topconfects_1.12.0          limma_3.52.4               
##  [9] eulerr_6.1.1                mitch_1.11.1               
## [11] MASS_7.3-58.1               fgsea_1.22.0               
## [13] gplots_3.1.3                DESeq2_1.36.0              
## [15] SummarizedExperiment_1.26.1 Biobase_2.56.0             
## [17] MatrixGenerics_1.8.1        matrixStats_0.63.0         
## [19] GenomicRanges_1.48.0        GenomeInfoDb_1.32.4        
## [21] IRanges_2.30.1              S4Vectors_0.34.0           
## [23] BiocGenerics_0.42.0         reshape2_1.4.4             
## [25] forcats_0.5.2               stringr_1.5.0              
## [27] dplyr_1.0.10                purrr_0.3.5                
## [29] readr_2.1.3                 tidyr_1.2.1                
## [31] tibble_3.1.8                ggplot2_3.4.0              
## [33] tidyverse_1.3.2             zoo_1.8-11                 
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.4.1           backports_1.4.1        fastmatch_1.1-3       
##   [4] systemfonts_1.0.4      plyr_1.8.8             splines_4.2.1         
##   [7] BiocParallel_1.30.4    digest_0.6.30          htmltools_0.5.3       
##  [10] fansi_1.0.3            magrittr_2.0.3         memoise_2.0.1         
##  [13] googlesheets4_1.0.1    tzdb_0.3.0             Biostrings_2.64.1     
##  [16] annotate_1.74.0        modelr_0.1.10          svglite_2.1.0         
##  [19] timechange_0.1.1       prettyunits_1.1.1      colorspace_2.0-3      
##  [22] blob_1.2.3             rvest_1.0.3            haven_2.5.1           
##  [25] xfun_0.35              crayon_1.5.2           RCurl_1.98-1.9        
##  [28] jsonlite_1.8.4         genefilter_1.78.0      survival_3.4-0        
##  [31] glue_1.6.2             gtable_0.3.1           gargle_1.2.1          
##  [34] zlibbioc_1.42.0        XVector_0.36.0         webshot_0.5.4         
##  [37] DelayedArray_0.22.0    scales_1.2.1           DBI_1.1.3             
##  [40] Rcpp_1.0.9             viridisLite_0.4.1      xtable_1.8-4          
##  [43] progress_1.2.2         bit_4.0.5              htmlwidgets_1.5.4     
##  [46] httr_1.4.4             RColorBrewer_1.1-3     ellipsis_0.3.2        
##  [49] farver_2.1.1           pkgconfig_2.0.3        reshape_0.8.9         
##  [52] XML_3.99-0.13          sass_0.4.4             dbplyr_2.2.1          
##  [55] locfit_1.5-9.6         utf8_1.2.2             labeling_0.4.2        
##  [58] tidyselect_1.2.0       rlang_1.0.6            later_1.3.0           
##  [61] AnnotationDbi_1.58.0   munsell_0.5.0          cellranger_1.1.0      
##  [64] tools_4.2.1            cachem_1.0.6           cli_3.4.1             
##  [67] generics_0.1.3         RSQLite_2.2.19         broom_1.0.1           
##  [70] evaluate_0.18          fastmap_1.1.0          yaml_2.3.6            
##  [73] knitr_1.41             bit64_4.0.5            fs_1.5.2              
##  [76] caTools_1.18.2         KEGGREST_1.36.3        mime_0.12             
##  [79] xml2_1.3.3             compiler_4.2.1         rstudioapi_0.14       
##  [82] png_0.1-8              reprex_2.0.2           geneplotter_1.74.0    
##  [85] bslib_0.4.1            stringi_1.7.8          highr_0.9             
##  [88] lattice_0.20-45        Matrix_1.5-3           vctrs_0.5.1           
##  [91] pillar_1.8.1           lifecycle_1.0.3        jquerylib_0.1.4       
##  [94] data.table_1.14.6      bitops_1.0-7           httpuv_1.6.6          
##  [97] R6_2.5.1               promises_1.2.0.1       KernSmooth_2.23-20    
## [100] gridExtra_2.3          codetools_0.2-18       assertthat_0.2.1      
## [103] withr_2.5.0            GenomeInfoDbData_1.2.8 parallel_4.2.1        
## [106] hms_1.1.2              grid_4.2.1             rmarkdown_2.18        
## [109] googledrive_2.0.0      shiny_1.7.3            lubridate_1.9.0