plot

[1]:
%load_ext autoreload
%autoreload 2
[1]:
import greatpy as great
import bindome as bd

import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
import numpy as np

import time

import warnings
warnings.filterwarnings("ignore")
[2]:
test = "../data/tests/test_data/input/01_random.bed"
regdom = "../data/human/hg38/regulatory_domain.bed"

Count plot

[3]:
fig,ax = plt.subplots(1,3,figsize = (20,6),dpi=200)
great.pl.graph_nb_asso_per_peaks(test,regdom,ax[0])
great.pl.graph_dist_tss(test,regdom,ax[1])
great.pl.graph_absolute_dist_tss(test,regdom,ax[2])
plt.show()
../_images/notebooks_07_plot_5_0.png

Dotplot

Dotplot of the enrichment

[5]:
enrichment = great.tl.enrichment(
    test,
    regdom,
    "../data/human/hg38/chr_size.bed",
    "../data/human/ontologies.csv"
)
[6]:
plot = enrichment.rename(columns = {"binom_p_value" : "p_value", "go_term":"name"})
plt.figure(figsize = (8,7))
great.pl.plot_enrich(plot)
../_images/notebooks_07_plot_9_0.png

Dotoplot of multi-enrichment sample

[7]:
test = [
    "SRF:Ishikawa,A-673-clone-Asp114,K-562,MCF-7,Hep-G2",
    "MAX:K-562,WA01,HeLa-S3", "BACH1:A-549,GM12878",
    "CDK9:A-375,MM1-S,MV4-11,P493-6,BT-474,HEK293T",
    "GATA1:erythroblast,HUDEP-2,K-562", "IKZF1:K-562,GM12878,HSPC",
    "SP1:liver,A-375,Hep-G2,HEK293,GM12878,A-549,K-562,HEK293T,WA01",
    "TCF7:Hep-G2,GM12878,K-562", "ZBTB40:MCF-7,Hep-G2,GM12878",
    "AFF1:MV4-11,K-562"
    ]

results = great.tl.enrichment_multiple(
    tests = test,
    regdom_file = "../data/human/hg38/regulatory_domain.bed",
    chr_size_file = "../data/human/hg38/chr_size.bed",
    annotation_file = "../data/human/ontologies.csv",
    binom = True,
    hypergeom = True,
)
[11]:
fig = plt.figure(figsize = (15, 12))
p_val,odd_ratio,df = great.pl.dotplot_multi_sample(results,fig = fig,show_term_name = True,term_name_nchars = 20)
../_images/notebooks_07_plot_12_0.png
[16]:
df.head(10)
[16]:
id go_term binom_p_value binom_fold_enrichment hypergeom_p_value hypergeometric_fold_enrichment intersection_size recall index
0 GO:0051292 nuclear pore complex assembly 8.97009e-04 3.18950e+03 6.28405e+00 4.94165e+00 2.00000e+00 1.53846e-01 0
1 GO:0030261 chromosome condensation 1.27056e-03 2.67271e+03 5.51863e+00 4.39416e+00 2.00000e+00 1.05263e-01 0
2 GO:0001650 fibrillar center 5.87653e-03 3.85300e+02 5.30190e+00 2.54406e+00 4.00000e+00 2.91971e-02 0
3 GO:0090096 positive regulation of metanephric cap mesench... 2.80032e-02 2.42984e+03 5.29709e+00 7.64209e+00 1.00000e+00 1.00000e+00 0
4 GO:0099637 neurotransmitter receptor transport 6.30720e-03 1.09058e+04 5.29709e+00 7.64209e+00 1.00000e+00 1.00000e+00 0
0 GO:0004896 cytokine receptor activity 3.31342e-03 8.66841e+02 7.98072e+00 3.58059e+00 3.00000e+00 6.97674e-02 1
1 GO:0038165 oncostatin-M-mediated signaling pathway 5.28500e-02 1.56594e+03 7.93887e+00 6.00685e+00 1.00000e+00 2.50000e-01 1
2 GO:0030883 endogenous lipid antigen binding 1.45583e-02 5.79649e+03 7.03282e+00 5.42189e+00 1.00000e+00 1.66667e-01 1
3 GO:0048006 antigen processing and presentation, endogenou... 1.45583e-02 5.79649e+03 7.03282e+00 5.42189e+00 1.00000e+00 1.66667e-01 1
4 GO:0004924 oncostatin-M receptor activity 5.19264e-02 1.59456e+03 7.03282e+00 5.42189e+00 1.00000e+00 1.66667e-01 1
[17]:
p_val.head()
[17]:
index SRF MAX BACH1 CDK9 GATA1 IKZF1 SP1 TCF7 ZBTB40 AFF1
test
GO:0051292 nuclear pore complex 6.28405e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00
GO:0030261 chromosome condensat 5.51863e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00
GO:0001650 fibrillar center 5.30190e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00
GO:0090096 positive regulation 5.29709e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00
GO:0099637 neurotransmitter rec 5.29709e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00 1.00000e+00
[18]:
odd_ratio.head()
[18]:
index SRF MAX BACH1 CDK9 GATA1 IKZF1 SP1 TCF7 ZBTB40 AFF1
test
GO:0051292 nuclear pore complex 4.94165e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
GO:0030261 chromosome condensat 4.39416e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
GO:0001650 fibrillar center 2.54406e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
GO:0090096 positive regulation 7.64209e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
GO:0099637 neurotransmitter rec 7.64209e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00