create regulatory domains
This is an example of code enables to create gene regulatory domains from several files.
Both files must be in .bed format documentation here: http://genome.ucsc.edu/FAQ/FAQformat#format1
Columns for tss_file should be : chr \t start \t end
association_rule could be : "one_closet" , "two_closet" and "basal_plus_extention"
[1]:
%load_ext autoreload
%autoreload 2
[2]:
import greatpy as great
For hg38
[3]:
regdom_hg38 = great.tl.create_regdom(
tss_file = "../data/human/hg38/tss_from_great.bed",
chr_sizes_file = "../data/human/hg38/chr_size.bed",
association_rule = "basal_plus_extention",
out_path = None
)
regdom_hg38
[3]:
| chr | chr_start | chr_end | name | tss | strand | |
|---|---|---|---|---|---|---|
| ENSG00000186092 | chr1 | 0 | 450697 | OR4F5 | 65418 | + |
| ENSG00000284733 | chr1 | 66418 | 685673 | OR4F29 | 451697 | - |
| ENSG00000284662 | chr1 | 456697 | 920737 | OR4F16 | 686673 | - |
| ENSG00000187634 | chr1 | 691673 | 958290 | SAMD11 | 925737 | + |
| ENSG00000188976 | chr1 | 926737 | 964290 | NOC2L | 959290 | - |
| ... | ... | ... | ... | ... | ... | ... |
| ENSG00000183795 | chrY | 24052969 | 24812479 | BPY2B | 24618003 | + |
| ENSG00000187191 | chrY | 24619003 | 24828916 | DAZ3 | 24813479 | - |
| ENSG00000205916 | chrY | 24818479 | 25051104 | DAZ4 | 24833916 | + |
| ENSG00000185894 | chrY | 24834916 | 25617161 | BPY2C | 25052104 | - |
| ENSG00000172288 | chrY | 25057104 | 26622161 | CDY1 | 25622161 | + |
18777 rows × 6 columns
For hg19
[4]:
regdom_hg19 = great.tl.create_regdom(
tss_file = "../data/human/hg19/tss.bed",
chr_sizes_file = "../data/human/hg19/chr_size.bed",
association_rule = "basal_plus_extention",
out_path = None
)
regdom_hg19
[4]:
| chr | chr_start | chr_end | name | tss | strand | |
|---|---|---|---|---|---|---|
| ENSG00000186092 | chr1 | 0 | 362639 | OR4F5 | 69090 | + |
| ENSG00000235249 | chr1 | 70090 | 621053 | OR4F29 | 367639 | + |
| ENSG00000185097 | chr1 | 368639 | 856117 | OR4F16 | 622053 | - |
| ENSG00000187634 | chr1 | 627053 | 893670 | SAMD11 | 861117 | + |
| ENSG00000188976 | chr1 | 862117 | 899670 | NOC2L | 894670 | - |
| ... | ... | ... | ... | ... | ... | ... |
| ENSG00000183795 | chrY | 26199116 | 26958626 | BPY2B | 26764150 | + |
| ENSG00000187191 | chrY | 26765150 | 26975080 | DAZ3 | 26959626 | - |
| ENSG00000205916 | chrY | 26964626 | 27197251 | DAZ4 | 26980080 | + |
| ENSG00000185894 | chrY | 26981080 | 27763308 | BPY2C | 27198251 | - |
| ENSG00000172288 | chrY | 27203251 | 28768308 | CDY1 | 27768308 | + |
18549 rows × 6 columns