Example BioMart hosts (historical and alternative servers). In practice, most users connect to Ensembl’s BioMart (default host), but other institutions may host specialized BioMart

# Load the biomaRt package for accessing biological databases via the BioMart API
library(biomaRt)

# host="caprica.caltech.edu"        # Example of a custom BioMart installation
# host="www.sanger.ac.uk"           # Sanger Institute (Ensembl developer site)
# host="biomart.informatics.jax.org" # Jackson Lab (mouse-specific resources)
# host="biomart.intogen.org"        # IntOGen cancer genomics portal
# host="ensembl.gramene.org"        # Gramene plant comparative genomics resource

# Archived biomart version for mm9 
# mart=useMart(host='may2009.archive.ensembl.org', biomart='ENSEMBL_MART_ENSEMBL', dataset="mmusculus_gene_ensembl")
# or the latest biomart version for mm
# mart <- useMart("ensembl", dataset="mmusculus_gene_ensembl") # rnorvegicus
# or the latest for hs
# mart <- useMart("ensembl", dataset="hsapiens_gene_ensembl", host="www.biomart.org")

mart <- useMart("ensembl", dataset="hsapiens_gene_ensembl")

1 Information: Lists of Filters and Attributes in biomaRt

When using biomaRt for gene ID conversion, we need to know:

  • Datasets: species- or genome-specific datasets available in the BioMart server
  • Filters: the input identifiers we provide (e.g., Ensembl ID, Entrez ID, Illumina probe ID)
  • Attributes: the output identifiers or annotations we want to retrieve (e.g., gene symbols, GO terms)

The following code demonstrates how to explore these:

# List available datasets for the selected BioMart (e.g., human, mouse, etc.)
db_list <- listDatasets(mart)
head(db_list)
##                        dataset                           description
## 1 abrachyrhynchus_gene_ensembl Pink-footed goose genes (ASM259213v1)
## 2     acalliptera_gene_ensembl      Eastern happy genes (fAstCal1.3)
## 3   acarolinensis_gene_ensembl       Green anole genes (AnoCar2.0v2)
## 4    acchrysaetos_gene_ensembl       Golden eagle genes (bAquChr1.2)
## 5    acitrinellus_gene_ensembl        Midas cichlid genes (Midas_v5)
## 6    amelanoleuca_gene_ensembl       Giant panda genes (ASM200744v2)
##       version
## 1 ASM259213v1
## 2  fAstCal1.3
## 3 AnoCar2.0v2
## 4  bAquChr1.2
## 5    Midas_v5
## 6 ASM200744v2
# List available filters (possible input IDs)
filters <- listFilters(mart)
head(filters)
##              name              description
## 1 chromosome_name Chromosome/scaffold name
## 2           start                    Start
## 3             end                      End
## 4      band_start               Band Start
## 5        band_end                 Band End
## 6    marker_start             Marker Start
# Example: find filters related to RefSeq identifiers
filters[grep("refseq", filters[, "name"]),]
##                              name
## 36               with_refseq_mrna
## 37     with_refseq_mrna_predicted
## 38              with_refseq_ncrna
## 39    with_refseq_ncrna_predicted
## 40            with_refseq_peptide
## 41  with_refseq_peptide_predicted
## 96                    refseq_mrna
## 97          refseq_mrna_predicted
## 98                   refseq_ncrna
## 99         refseq_ncrna_predicted
## 100                refseq_peptide
## 101      refseq_peptide_predicted
##                                            description
## 36                              With RefSeq mRNA ID(s)
## 37                    With RefSeq mRNA predicted ID(s)
## 38                             With RefSeq ncRNA ID(s)
## 39                   With RefSeq ncRNA predicted ID(s)
## 40                           With RefSeq peptide ID(s)
## 41                 With RefSeq peptide predicted ID(s)
## 96                  RefSeq mRNA ID(s) [e.g. NM_000014]
## 97     RefSeq mRNA predicted ID(s) [e.g. XM_003403597]
## 98                 RefSeq ncRNA ID(s) [e.g. NR_000005]
## 99    RefSeq ncRNA predicted ID(s) [e.g. XR_001736914]
## 100              RefSeq peptide ID(s) [e.g. NP_000005]
## 101 RefSeq peptide predicted ID(s) [e.g. XP_003403645]
# List available attributes (possible outputs to retrieve)
attr <- listAttributes(mart)
head(attr)
##                            name                  description         page
## 1               ensembl_gene_id               Gene stable ID feature_page
## 2       ensembl_gene_id_version       Gene stable ID version feature_page
## 3         ensembl_transcript_id         Transcript stable ID feature_page
## 4 ensembl_transcript_id_version Transcript stable ID version feature_page
## 5            ensembl_peptide_id            Protein stable ID feature_page
## 6    ensembl_peptide_id_version    Protein stable ID version feature_page
# Example: find attributes related to gene symbols
attr[grep("symbol", attr[,1]),]
##                 name                description         page
## 63       hgnc_symbol                HGNC symbol feature_page
## 99 uniprot_gn_symbol UniProtKB Gene Name symbol feature_page
# Example: find attributes related to Gene Ontology (GO)
attr[grep("^go_", attr[,1]),]
##               name           description         page
## 45           go_id     GO term accession feature_page
## 48 go_linkage_type GO term evidence code feature_page

2 Gene ID Conversions with biomaRt

The getBM() function in biomaRt is the workhorse for converting between different gene identifiers.
By specifying the attributes (the information you want), the filters (the type of ID you are providing), and the values (your list of IDs), you can flexibly translate across databases.

Below are several examples demonstrating common conversions.

2.1 Convert from Ensembl Gene IDs to HGNC Symbols and Descriptions

IDs <- c("ENSG00000205420", "ENSG00000206075")

genes <- getBM(
  attributes = c("ensembl_gene_id", "hgnc_symbol", "description"), 
  filters = "ensembl_gene_id", 
  values = IDs, 
  mart = mart
)

genes
##   ensembl_gene_id hgnc_symbol
## 1 ENSG00000205420       KRT6A
## 2 ENSG00000206075    SERPINB5
##                                                   description
## 1               keratin 6A [Source:HGNC Symbol;Acc:HGNC:6443]
## 2 serpin family B member 5 [Source:HGNC Symbol;Acc:HGNC:8949]

2.2 Convert from HGNC Gene Symbols to Ensembl IDs and Descriptions

IDs <- c("AKT1", "IFNG", "STAT1")

genes <- getBM(
  attributes = c("wikigene_name", "hgnc_symbol", "description", "ensembl_gene_id"), 
  filters = "hgnc_symbol", 
  values = IDs, 
  mart = mart
)

genes
##   wikigene_name hgnc_symbol
## 1          AKT1        AKT1
## 2          IFNG        IFNG
## 3         STAT1       STAT1
##                                                                              description
## 1                        AKT serine/threonine kinase 1 [Source:HGNC Symbol;Acc:HGNC:391]
## 2                                    interferon gamma [Source:HGNC Symbol;Acc:HGNC:5438]
## 3 signal transducer and activator of transcription 1 [Source:HGNC Symbol;Acc:HGNC:11362]
##   ensembl_gene_id
## 1 ENSG00000142208
## 2 ENSG00000111537
## 3 ENSG00000115415

2.3 Convert from Entrez Gene IDs to HGNC Symbols and Descriptions

genes <- getBM(
  attributes = c("hgnc_symbol", "description"),
  filters = "entrezgene_id",
  values = "1",
  mart = mart,
  uniqueRows = TRUE
)
genes
##   hgnc_symbol                                            description
## 1        A1BG alpha-1-B glycoprotein [Source:HGNC Symbol;Acc:HGNC:5]

2.4 5. Convert from RefSeq mRNA IDs to HGNC Symbols and Descriptions

IDs <- c("NM_001006946")

genes <- getBM(
  attributes = c("refseq_mrna", "hgnc_symbol", "description"), 
  filters = "refseq_mrna", 
  values = IDs, 
  mart = mart
)

genes
##    refseq_mrna hgnc_symbol                                    description
## 1 NM_001006946        SDC1 syndecan 1 [Source:HGNC Symbol;Acc:HGNC:10658]

2.5 6. Retrieve GO Annotations for Genes by Symbol

IDs <- c("AKT1", "IFNG", "STAT1")

go_annotations <- getBM(
  attributes = c("hgnc_symbol", "go_id"), 
  filters = "hgnc_symbol", 
  values = IDs, 
  mart = mart
)

go_annotations
##     hgnc_symbol      go_id
## 1          AKT1 GO:0005524
## 2          AKT1 GO:0006468
## 3          AKT1 GO:0004674
## 4          AKT1 GO:0004672
## 5          AKT1 GO:0005515
## 6          AKT1 GO:0010628
## 7          AKT1 GO:0005634
## 8          AKT1 GO:0005737
## 9          AKT1 GO:0005886
## 10         AKT1 GO:0016020
## 11         AKT1 GO:0006915
## 12         AKT1 GO:0007399
## 13         AKT1 GO:0016740
## 14         AKT1 GO:0005654
## 15         AKT1 GO:0000166
## 16         AKT1 GO:0005516
## 17         AKT1 GO:0016301
## 18         AKT1 GO:0005739
## 19         AKT1 GO:0030335
## 20         AKT1 GO:0005829
## 21         AKT1 GO:0007165
## 22         AKT1 GO:0043066
## 23         AKT1 GO:0006417
## 24         AKT1 GO:0036064
## 25         AKT1 GO:0005929
## 26         AKT1 GO:0015630
## 27         AKT1 GO:0006094
## 28         AKT1 GO:0030154
## 29         AKT1 GO:0098794
## 30         AKT1 GO:0016477
## 31         AKT1 GO:0031982
## 32         AKT1 GO:0045948
## 33         AKT1 GO:0006511
## 34         AKT1 GO:0042803
## 35         AKT1 GO:0030336
## 36         AKT1 GO:0030334
## 37         AKT1 GO:0042981
## 38         AKT1 GO:0034198
## 39         AKT1 GO:0032991
## 40         AKT1 GO:0106310
## 41         AKT1 GO:0042802
## 42         AKT1 GO:0006413
## 43         AKT1 GO:0005758
## 44         AKT1 GO:0005978
## 45         AKT1 GO:0005977
## 46         AKT1 GO:0031146
## 47         AKT1 GO:0032436
## 48         AKT1 GO:0043161
## 49         AKT1 GO:0051604
## 50         AKT1 GO:0071889
## 51         AKT1 GO:0006888
## 52         AKT1 GO:0001649
## 53         AKT1 GO:0061512
## 54         AKT1 GO:0005547
## 55         AKT1 GO:0007173
## 56         AKT1 GO:0071364
## 57         AKT1 GO:0070979
## 58         AKT1 GO:0036444
## 59         AKT1 GO:0071277
## 60         AKT1 GO:1904262
## 61         AKT1 GO:0085020
## 62         AKT1 GO:0043491
## 63         AKT1 GO:0010975
## 64         AKT1 GO:0032869
## 65         AKT1 GO:0048009
## 66         AKT1 GO:0006006
## 67         AKT1 GO:0006606
## 68         AKT1 GO:0031669
## 69         AKT1 GO:1900182
## 70         AKT1 GO:0008283
## 71         AKT1 GO:0140896
## 72         AKT1 GO:0038202
## 73         AKT1 GO:0045542
## 74         AKT1 GO:0033138
## 75         AKT1 GO:0160049
## 76         AKT1 GO:0009267
## 77         AKT1 GO:0035556
## 78         AKT1 GO:0043325
## 79         AKT1 GO:0060416
## 80         AKT1 GO:1990418
## 81         AKT1 GO:0043001
## 82         AKT1 GO:0001938
## 83         AKT1 GO:0006893
## 84         AKT1 GO:0004712
## 85         AKT1 GO:1901796
## 86         AKT1 GO:0007186
## 87         AKT1 GO:0045944
## 88         AKT1 GO:0030027
## 89         AKT1 GO:0034142
## 90         AKT1 GO:0045746
## 91         AKT1 GO:0010595
## 92         AKT1 GO:0031295
## 93         AKT1 GO:0006607
## 94         AKT1 GO:0046777
## 95         AKT1 GO:0008286
## 96         AKT1 GO:0030307
## 97         AKT1 GO:0031145
## 98         AKT1 GO:0019899
## 99         AKT1 GO:0036416
## 100        AKT1 GO:2001240
## 101        AKT1 GO:0018107
## 102        AKT1 GO:0002042
## 103        AKT1 GO:0030291
## 104        AKT1 GO:0060644
## 105        AKT1 GO:0018105
## 106        AKT1 GO:0002181
## 107        AKT1 GO:0005783
## 108        AKT1 GO:0045947
## 109        AKT1 GO:1904263
## 110        AKT1 GO:0051247
## 111        AKT1 GO:0036316
## 112        AKT1 GO:0097700
## 113        AKT1 GO:1903078
## 114        AKT1 GO:0045724
## 115        AKT1 GO:1902018
## 116        AKT1 GO:0045429
## 117        AKT1 GO:0090201
## 118        AKT1 GO:0006979
## 119        AKT1 GO:0046889
## 120        AKT1 GO:0019221
## 121        AKT1 GO:0030235
## 122        AKT1 GO:0005938
## 123        AKT1 GO:0060627
## 124        AKT1 GO:0045600
## 125        AKT1 GO:0034123
## 126        AKT1 GO:0046209
## 127        AKT1 GO:0035519
## 128        AKT1 GO:0016310
## 129        AKT1 GO:0031397
## 130        AKT1 GO:0072350
## 131        AKT1 GO:1900087
## 132        AKT1 GO:0019900
## 133        AKT1 GO:0070141
## 134        AKT1 GO:0045861
## 135        AKT1 GO:1903038
## 136        AKT1 GO:0070848
## 137        AKT1 GO:0035655
## 138        AKT1 GO:0048661
## 139        AKT1 GO:0045725
## 140        AKT1 GO:0046326
## 141        AKT1 GO:1902176
## 142        AKT1 GO:1990090
## 143        AKT1 GO:0036499
## 144        AKT1 GO:0010748
## 145        AKT1 GO:0043536
## 146        AKT1 GO:0034122
## 147        AKT1 GO:1903898
## 148        AKT1 GO:0060079
## 149        AKT1 GO:0160213
## 150        AKT1 GO:0006809
## 151        AKT1 GO:0043488
## 152        AKT1 GO:0072655
## 153        AKT1 GO:0072656
## 154        AKT1 GO:0032079
## 155        AKT1 GO:0034405
## 156        AKT1 GO:0006924
## 157        AKT1 GO:0070972
## 158        AKT1 GO:0106004
## 159        AKT1 GO:0099104
## 160        AKT1 GO:1904841
## 161        AKT1 GO:0002430
## 162        AKT1 GO:0003376
## 163        AKT1 GO:0005979
## 164        AKT1 GO:0009408
## 165        AKT1 GO:0010507
## 166        AKT1 GO:0010761
## 167        AKT1 GO:0010907
## 168        AKT1 GO:0016242
## 169        AKT1 GO:0031929
## 170        AKT1 GO:0031999
## 171        AKT1 GO:0033554
## 172        AKT1 GO:0043276
## 173        AKT1 GO:0072752
## 174        AKT1 GO:0110002
## 175        AKT1 GO:0140052
## 176        AKT1 GO:0150033
## 177        AKT1 GO:1903318
## 178        AKT1 GO:1903384
## 179        AKT1 GO:1905552
## 180        AKT1 GO:1905786
## 181        AKT1 GO:2000074
## 182        AKT1 GO:2000402
## 183        AKT1 GO:0001893
## 184        AKT1 GO:0006954
## 185        AKT1 GO:0007281
## 186        AKT1 GO:0008637
## 187        AKT1 GO:0010763
## 188        AKT1 GO:0010765
## 189        AKT1 GO:0016567
## 190        AKT1 GO:0022605
## 191        AKT1 GO:0030163
## 192        AKT1 GO:0031641
## 193        AKT1 GO:0031663
## 194        AKT1 GO:0032094
## 195        AKT1 GO:0032287
## 196        AKT1 GO:0032880
## 197        AKT1 GO:0035924
## 198        AKT1 GO:0036294
## 199        AKT1 GO:0042593
## 200        AKT1 GO:0046622
## 201        AKT1 GO:0051146
## 202        AKT1 GO:0060716
## 203        AKT1 GO:0071356
## 204        AKT1 GO:0071363
## 205        AKT1 GO:0071380
## 206        AKT1 GO:0097011
## 207        AKT1 GO:0097194
## 208        AKT1 GO:1901653
## 209        AKT1 GO:0010467
## 210        AKT1 GO:0019901
## 211        AKT1 GO:0009725
## 212        AKT1 GO:0010629
## 213        AKT1 GO:0048266
## 214        AKT1 GO:0099175
## 215        AKT1 GO:1904515
## 216        AKT1 GO:2000010
## 217        AKT1 GO:2001243
## 218        AKT1 GO:0098978
## 219        AKT1 GO:0005819
## 220        AKT1 GO:0005911
## 221        AKT1           
## 222        IFNG GO:0005576
## 223        IFNG GO:0006955
## 224        IFNG GO:0005133
## 225        IFNG GO:0050729
## 226        IFNG GO:0010629
## 227        IFNG GO:0005615
## 228        IFNG GO:0005515
## 229        IFNG GO:0051607
## 230        IFNG GO:0005125
## 231        IFNG GO:0010508
## 232        IFNG GO:0010628
## 233        IFNG GO:0007259
## 234        IFNG GO:0001774
## 235        IFNG GO:0051050
## 236        IFNG GO:0060333
## 237        IFNG GO:0045785
## 238        IFNG GO:1903543
## 239        IFNG GO:0008284
## 240        IFNG GO:0045944
## 241        IFNG GO:1903078
## 242        IFNG GO:0098586
## 243        IFNG GO:0032735
## 244        IFNG GO:0032834
## 245        IFNG GO:0032700
## 246        IFNG GO:0032755
## 247        IFNG GO:0032760
## 248        IFNG GO:0007166
## 249        IFNG GO:0000122
## 250        IFNG GO:0006915
## 251        IFNG GO:0045672
## 252        IFNG GO:0097191
## 253        IFNG GO:0002726
## 254        IFNG GO:0045429
## 255        IFNG GO:0006959
## 256        IFNG GO:0051044
## 257        IFNG GO:0032731
## 258        IFNG GO:0045348
## 259        IFNG GO:1902004
## 260        IFNG GO:0002250
## 261        IFNG GO:1901857
## 262        IFNG GO:0097696
## 263        IFNG GO:0048143
## 264        IFNG GO:0050796
## 265        IFNG GO:0060557
## 266        IFNG GO:1900222
## 267        IFNG GO:1900451
## 268        IFNG GO:0009615
## 269        IFNG GO:0033141
## 270        IFNG GO:0038110
## 271        IFNG GO:0038196
## 272        IFNG GO:0042531
## 273        IFNG GO:0001819
## 274        IFNG GO:0002281
## 275        IFNG GO:0010634
## 276        IFNG GO:0030225
## 277        IFNG GO:0030857
## 278        IFNG GO:0031334
## 279        IFNG GO:0032722
## 280        IFNG GO:0032747
## 281        IFNG GO:0034393
## 282        IFNG GO:0038096
## 283        IFNG GO:0042307
## 284        IFNG GO:0045821
## 285        IFNG GO:0045892
## 286        IFNG GO:0048662
## 287        IFNG GO:0050766
## 288        IFNG GO:0050769
## 289        IFNG GO:0060552
## 290        IFNG GO:0071902
## 291        IFNG GO:0090312
## 292        IFNG GO:0150076
## 293        IFNG GO:1904440
## 294        IFNG GO:1904798
## 295        IFNG GO:2000309
## 296       STAT1 GO:0003677
## 297       STAT1 GO:0006355
## 298       STAT1 GO:0007165
## 299       STAT1 GO:0003700
## 300       STAT1 GO:0005634
## 301       STAT1 GO:0005737
## 302       STAT1 GO:0051093
## 303       STAT1 GO:0005829
## 304       STAT1 GO:0005654
## 305       STAT1 GO:0051607
## 306       STAT1 GO:0000981
## 307       STAT1 GO:0005730
## 308       STAT1 GO:0042981
## 309       STAT1 GO:0006357
## 310       STAT1 GO:0007259
## 311       STAT1 GO:0060337
## 312       STAT1 GO:0060333
## 313       STAT1 GO:0090575
## 314       STAT1 GO:0019899
## 315       STAT1 GO:0005515
## 316       STAT1 GO:0030425
## 317       STAT1 GO:0030424
## 318       STAT1 GO:0045893
## 319       STAT1 GO:0001222
## 320       STAT1 GO:0042803
## 321       STAT1 GO:0034097
## 322       STAT1 GO:0000122
## 323       STAT1 GO:0042393
## 324       STAT1 GO:0045944
## 325       STAT1 GO:0032991
## 326       STAT1 GO:0042802
## 327       STAT1 GO:0000978
## 328       STAT1 GO:0003690
## 329       STAT1 GO:0034341
## 330       STAT1 GO:0043124
## 331       STAT1 GO:0000977
## 332       STAT1 GO:0045648
## 333       STAT1 GO:0008015
## 334       STAT1 GO:0016525
## 335       STAT1 GO:0005164
## 336       STAT1 GO:0033209
## 337       STAT1 GO:0000785
## 338       STAT1 GO:0048661
## 339       STAT1 GO:0001223
## 340       STAT1 GO:0043434
## 341       STAT1 GO:0002230
## 342       STAT1 GO:1990841
## 343       STAT1 GO:0032727
## 344       STAT1 GO:0045296
## 345       STAT1 GO:0006952
## 346       STAT1 GO:0046427
## 347       STAT1 GO:0035458
## 348       STAT1 GO:0071346
## 349       STAT1 GO:0048471
## 350       STAT1 GO:0042127
## 351       STAT1 GO:0001937
## 352       STAT1 GO:0044389
## 353       STAT1 GO:0035456
## 354       STAT1 GO:0051591
## 355       STAT1 GO:0002053
## 356       STAT1 GO:0072162
## 357       STAT1 GO:0046725
## 358       STAT1 GO:0035035
## 359       STAT1 GO:0070106
## 360       STAT1 GO:0097696
## 361       STAT1 GO:0000979
## 362       STAT1 GO:0038111
## 363       STAT1 GO:0038113
## 364       STAT1 GO:0003340
## 365       STAT1 GO:0061326
## 366       STAT1 GO:0072136
## 367       STAT1 GO:0072308
## 368       STAT1 GO:0070721
## 369       STAT1           
## 370       STAT1 GO:0071345
## 371       STAT1 GO:0031730
## 372       STAT1 GO:0043565
## 373       STAT1 GO:0051721
## 374       STAT1 GO:0007584
## 375       STAT1 GO:0008284
## 376       STAT1 GO:0009410
## 377       STAT1 GO:0009612
## 378       STAT1 GO:0032869
## 379       STAT1 GO:0042542
## 380       STAT1 GO:0045429
## 381       STAT1 GO:0019221
## 382       STAT1 GO:0006955