2015 |
Moreno, Pablo ; Beisken, Stephan ; Harsha, Bhavana ; Muthukrishnan, Venkatesh ; Tudose, Ilinca ; Dekker, Adriano ; Dornfeldt, Stefanie ; Taruttis, Franziska ; Grosse, Ivo ; Hastings, Janna ; Neumann, Steffen ; Steinbeck, Christoph BiNChE: A web tool and library for chemical enrichment analysis based on the ChEBI ontology Journal Article BMC Bioinformatics, 16 (1), pp. 56, 2015. @article{Moreno:2015gx, title = {BiNChE: A web tool and library for chemical enrichment analysis based on the ChEBI ontology}, author = {Moreno, Pablo and Beisken, Stephan and Harsha, Bhavana and Muthukrishnan, Venkatesh and Tudose, Ilinca and Dekker, Adriano and Dornfeldt, Stefanie and Taruttis, Franziska and Grosse, Ivo and Hastings, Janna and Neumann, Steffen and Steinbeck, Christoph}, url = {http://www.biomedcentral.com/1471-2105/16/56}, doi = {10.1186/s12859-015-0486-3}, year = {2015}, date = {2015-01-01}, journal = {BMC Bioinformatics}, volume = {16}, number = {1}, pages = {56}, publisher = {BioMed Central Ltd}, abstract = {Ontology-based enrichment analysis aids in the interpretation and understanding of large-scale biological data. Ontologies are hierarchies of biologically relevant groupings. Using ontology annotations, which link ontology classes to biological entities, enrichment analysis methods assess whether there is a significant over or under representation of entities for ontology classes. While many tools exist that run enrichment analysis for protein sets annotated with the Gene Ontology, there are only a few that can be used for small molecules enrichment analysis.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Ontology-based enrichment analysis aids in the interpretation and understanding of large-scale biological data. Ontologies are hierarchies of biologically relevant groupings. Using ontology annotations, which link ontology classes to biological entities, enrichment analysis methods assess whether there is a significant over or under representation of entities for ontology classes. While many tools exist that run enrichment analysis for protein sets annotated with the Gene Ontology, there are only a few that can be used for small molecules enrichment analysis. |
Salek, Reza M; Neumann, Steffen ; Schober, Daniel ; Hummel, Jan ; Billiau, Kenny ; Kopka, Joachim ; Correa, Elon ; Reijmers, Theo ; Rosato, Antonio ; Tenori, Leonardo ; Turano, Paola ; Marin, Silvia ; Deborde, Catherine ; Jacob, Daniel ; Rolin, Dominique ; Dartigues, Benjamin ; Conesa, Pablo ; Haug, Kenneth ; Rocca-Serra, Philippe ; O'Hagan, Steve ; Hao, Jie ; van Vliet, Michael ; Sysi-Aho, Marko ; Ludwig, Christian ; Bouwman, Jildau ; Cascante, Marta ; Ebbels, Timothy ; Griffin, Julian L; Moing, Annick ; Nikolski, Macha ; Oresic, Matej ; Sansone, Susanna-Assunta ; Viant, Mark R; Goodacre, Royston ; Günther, Ulrich L; Hankemeier, Thomas ; Luchinat, Claudio ; Walther, Dirk ; Steinbeck, Christoph COordination of Standards in MetabOlomicS (COSMOS): facilitating integrated metabolomics data access Journal Article Metabolomics, 11 (6), pp. 1–11, 2015. @article{Salek:2015eo, title = {COordination of Standards in MetabOlomicS (COSMOS): facilitating integrated metabolomics data access}, author = {Salek, Reza M and Neumann, Steffen and Schober, Daniel and Hummel, Jan and Billiau, Kenny and Kopka, Joachim and Correa, Elon and Reijmers, Theo and Rosato, Antonio and Tenori, Leonardo and Turano, Paola and Marin, Silvia and Deborde, Catherine and Jacob, Daniel and Rolin, Dominique and Dartigues, Benjamin and Conesa, Pablo and Haug, Kenneth and Rocca-Serra, Philippe and O'Hagan, Steve and Hao, Jie and van Vliet, Michael and Sysi-Aho, Marko and Ludwig, Christian and Bouwman, Jildau and Cascante, Marta and Ebbels, Timothy and Griffin, Julian L and Moing, Annick and Nikolski, Macha and Oresic, Matej and Sansone, Susanna-Assunta and Viant, Mark R and Goodacre, Royston and Günther, Ulrich L and Hankemeier, Thomas and Luchinat, Claudio and Walther, Dirk and Steinbeck, Christoph}, url = {http://link.springer.com/article/10.1007/s11306-015-0810-y/fulltext.html}, doi = {10.1007/s11306-015-0810-y}, year = {2015}, date = {2015-01-01}, journal = {Metabolomics}, volume = {11}, number = {6}, pages = {1--11}, publisher = {Springer US}, abstract = {Abstract Metabolomics has become a crucial phenotyping technique in a range of research fields including medicine, the life sciences, biotechnology and the environmental sciences. This necessitates the transfer of experimental information between research groups, as ...}, keywords = {}, pubstate = {published}, tppubtype = {article} } Abstract Metabolomics has become a crucial phenotyping technique in a range of research fields including medicine, the life sciences, biotechnology and the environmental sciences. This necessitates the transfer of experimental information between research groups, as ... |
Hastings, Janna ; Jeliazkova, Nina ; Owen, Gareth ; Tsiliki, Georgia ; Munteanu, Cristian R; Steinbeck, Christoph ; Willighagen, Egon eNanoMapper: harnessing ontologies to enable data integration for nanomaterial risk assessment. Journal Article Journal of biomedical semantics, 6 (1), pp. 10, 2015. @article{Hastings:2015jc, title = {eNanoMapper: harnessing ontologies to enable data integration for nanomaterial risk assessment.}, author = {Hastings, Janna and Jeliazkova, Nina and Owen, Gareth and Tsiliki, Georgia and Munteanu, Cristian R and Steinbeck, Christoph and Willighagen, Egon}, url = {http://www.jbiomedsem.com/content/6/1/10}, doi = {10.1186/s13326-015-0005-5}, year = {2015}, date = {2015-01-01}, journal = {Journal of biomedical semantics}, volume = {6}, number = {1}, pages = {10}, publisher = {BioMed Central Ltd}, abstract = {Engineered nanomaterials (ENMs) are being developed to meet specific application needs in diverse domains across the engineering and biomedical sciences (e.g. drug delivery). However, accompanying the exciting proliferation of novel nanomaterials is a challenging race to understand and predict their possibly detrimental effects on human health and the environment. The eNanoMapper project (www.enanomapper.net) is creating a pan-European computational infrastructure for toxicological data management for ENMs, based on semantic web standards and ontologies. Here, we describe the development of the eNanoMapper ontology based on adopting and extending existing ontologies of relevance for the nanosafety domain. The resulting eNanoMapper ontology is available at http://purl.enanomapper.net/onto/enanomapper.owl. We aim to make the re-use of external ontology content seamless and thus we have developed a library to automate the extraction of subsets of ontology content and the assembly of the subsets into an integrated whole. The library is available (open source) at http://github.com/enanomapper/slimmer/. Finally, we give a comprehensive survey of the domain content and identify gap areas. ENM safety is at the boundary between engineering and the life sciences, and at the boundary between molecular granularity and bulk granularity. This creates challenges for the definition of key entities in the domain, which we also discuss.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Engineered nanomaterials (ENMs) are being developed to meet specific application needs in diverse domains across the engineering and biomedical sciences (e.g. drug delivery). However, accompanying the exciting proliferation of novel nanomaterials is a challenging race to understand and predict their possibly detrimental effects on human health and the environment. The eNanoMapper project (www.enanomapper.net) is creating a pan-European computational infrastructure for toxicological data management for ENMs, based on semantic web standards and ontologies. Here, we describe the development of the eNanoMapper ontology based on adopting and extending existing ontologies of relevance for the nanosafety domain. The resulting eNanoMapper ontology is available at http://purl.enanomapper.net/onto/enanomapper.owl. We aim to make the re-use of external ontology content seamless and thus we have developed a library to automate the extraction of subsets of ontology content and the assembly of the subsets into an integrated whole. The library is available (open source) at http://github.com/enanomapper/slimmer/. Finally, we give a comprehensive survey of the domain content and identify gap areas. ENM safety is at the boundary between engineering and the life sciences, and at the boundary between molecular granularity and bulk granularity. This creates challenges for the definition of key entities in the domain, which we also discuss. |
Morgat, Anne ; Axelsen, Kristian B; Lombardot, Thierry ; Alcantara, Rafael ; Aimo, Lucila ; Zerara, Mohamed ; Niknejad, Anne ; Belda, Eugeni ; Hyka-Nouspikel, Nevila ; Coudert, Elisabeth ; Redaschi, Nicole ; Bougueleret, Lydie ; Steinbeck, Christoph ; Xenarios, Ioannis ; Bridge, Alan Updates in Rhea--a manually curated resource of biochemical reactions. Journal Article Nucleic Acids Research, 43 (Database issue), pp. D459–64, 2015. @article{Morgat:2015jo, title = {Updates in Rhea--a manually curated resource of biochemical reactions.}, author = {Morgat, Anne and Axelsen, Kristian B and Lombardot, Thierry and Alcantara, Rafael and Aimo, Lucila and Zerara, Mohamed and Niknejad, Anne and Belda, Eugeni and Hyka-Nouspikel, Nevila and Coudert, Elisabeth and Redaschi, Nicole and Bougueleret, Lydie and Steinbeck, Christoph and Xenarios, Ioannis and Bridge, Alan}, url = {http://nar.oxfordjournals.org/lookup/doi/10.1093/nar/gku961}, doi = {10.1093/nar/gku961}, year = {2015}, date = {2015-01-01}, journal = {Nucleic Acids Research}, volume = {43}, number = {Database issue}, pages = {D459--64}, publisher = {Oxford University Press}, abstract = {Rhea (http://www.ebi.ac.uk/rhea) is a comprehensive and non-redundant resource of expert-curated biochemical reactions described using species from the ChEBI (Chemical Entities of Biological Interest) ontology of small molecules. Rhea has been designed for the functional annotation of enzymes and the description of genome-scale metabolic networks, providing stoichiometrically balanced enzyme-catalyzed reactions (covering the IUBMB Enzyme Nomenclature list and additional reactions), transport reactions and spontaneously occurring reactions. Rhea reactions are extensively curated with links to source literature and are mapped to other publicly available enzyme and pathway databases such as Reactome, BioCyc, KEGG and UniPathway, through manual curation and computational methods. Here we describe developments in Rhea since our last report in the 2012 database issue of Nucleic Acids Research. These include significant growth in the number of Rhea reactions and the inclusion of reactions involving complex macromolecules such as proteins, nucleic acids and other polymers that lie outside the scope of ChEBI. Together these developments will significantly increase the utility of Rhea as a tool for the description, analysis and reconciliation of genome-scale metabolic models.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Rhea (http://www.ebi.ac.uk/rhea) is a comprehensive and non-redundant resource of expert-curated biochemical reactions described using species from the ChEBI (Chemical Entities of Biological Interest) ontology of small molecules. Rhea has been designed for the functional annotation of enzymes and the description of genome-scale metabolic networks, providing stoichiometrically balanced enzyme-catalyzed reactions (covering the IUBMB Enzyme Nomenclature list and additional reactions), transport reactions and spontaneously occurring reactions. Rhea reactions are extensively curated with links to source literature and are mapped to other publicly available enzyme and pathway databases such as Reactome, BioCyc, KEGG and UniPathway, through manual curation and computational methods. Here we describe developments in Rhea since our last report in the 2012 database issue of Nucleic Acids Research. These include significant growth in the number of Rhea reactions and the inclusion of reactions involving complex macromolecules such as proteins, nucleic acids and other polymers that lie outside the scope of ChEBI. Together these developments will significantly increase the utility of Rhea as a tool for the description, analysis and reconciliation of genome-scale metabolic models. |
2014 |
Griss, Johannes ; Jones, Andrew R; Sachsenberg, Timo ; Walzer, Mathias ; Gatto, Laurent ; Hartler, J{ü}rgen ; Thallinger, Gerhard G; Salek, Reza M; Steinbeck, Christoph ; Neuhauser, Nadin ; Cox, J{ü}rgen ; Neumann, Steffen ; Fan, Jun ; Reisinger, Florian ; Xu, Qing-Wei ; Del-Toro, Noemi ; P{'e}rez-Riverol, Yasset ; Ghali, Fawaz ; Bandeira, Nuno ; Xenarios, Ioannis ; Kohlbacher, Oliver ; Vizca{'i}no, Juan Antonio ; Hermjakob, Henning The mzTab data exchange format: communicating mass-spectrometry-based proteomics and metabolomics experimental results to a wider audience. Journal Article Molecular & Cellular Proteomics, 13 (10), pp. 2765–2775, 2014. @article{Griss:2014kc, title = {The mzTab data exchange format: communicating mass-spectrometry-based proteomics and metabolomics experimental results to a wider audience.}, author = {Griss, Johannes and Jones, Andrew R and Sachsenberg, Timo and Walzer, Mathias and Gatto, Laurent and Hartler, J{ü}rgen and Thallinger, Gerhard G and Salek, Reza M and Steinbeck, Christoph and Neuhauser, Nadin and Cox, J{ü}rgen and Neumann, Steffen and Fan, Jun and Reisinger, Florian and Xu, Qing-Wei and Del-Toro, Noemi and P{'e}rez-Riverol, Yasset and Ghali, Fawaz and Bandeira, Nuno and Xenarios, Ioannis and Kohlbacher, Oliver and Vizca{'i}no, Juan Antonio and Hermjakob, Henning}, url = {http://www.mcponline.org/cgi/doi/10.1074/mcp.O113.036681}, doi = {10.1074/mcp.O113.036681}, year = {2014}, date = {2014-10-01}, journal = {Molecular & Cellular Proteomics}, volume = {13}, number = {10}, pages = {2765--2775}, publisher = {American Society for Biochemistry and Molecular Biology}, abstract = {The HUPO Proteomics Standards Initiative has developed several standardized data formats to facilitate data sharing in mass spectrometry (MS)-based proteomics. These allow researchers to report their complete results in a unified way. However, at present, there is no format to describe the final qualitative and quantitative results for proteomics and metabolomics experiments in a simple tabular format. Many downstream analysis use cases are only concerned with the final results of an experiment and require an easily accessible format, compatible with tools such as Microsoft Excel or R. We developed the mzTab file format for MS-based proteomics and metabolomics results to meet this need. mzTab is intended as a lightweight supplement to the existing standard XML-based file formats (mzML, mzIdentML, mzQuantML), providing a comprehensive summary, similar in concept to the supplemental material of a scientific publication. mzTab files can contain protein, peptide, and small molecule identifications together with experimental metadata and basic quantitative information. The format is not intended to store the complete experimental evidence but provides mechanisms to report results at different levels of detail. These range from a simple summary of the final results to a representation of the results including the experimental design. This format is ideally suited to make MS-based proteomics and metabolomics results available to a wider biological community outside the field of MS. Several software tools for proteomics and metabolomics have already adapted the format as an output format. The comprehensive mzTab specification document and extensive additional documentation can be found online.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The HUPO Proteomics Standards Initiative has developed several standardized data formats to facilitate data sharing in mass spectrometry (MS)-based proteomics. These allow researchers to report their complete results in a unified way. However, at present, there is no format to describe the final qualitative and quantitative results for proteomics and metabolomics experiments in a simple tabular format. Many downstream analysis use cases are only concerned with the final results of an experiment and require an easily accessible format, compatible with tools such as Microsoft Excel or R. We developed the mzTab file format for MS-based proteomics and metabolomics results to meet this need. mzTab is intended as a lightweight supplement to the existing standard XML-based file formats (mzML, mzIdentML, mzQuantML), providing a comprehensive summary, similar in concept to the supplemental material of a scientific publication. mzTab files can contain protein, peptide, and small molecule identifications together with experimental metadata and basic quantitative information. The format is not intended to store the complete experimental evidence but provides mechanisms to report results at different levels of detail. These range from a simple summary of the final results to a representation of the results including the experimental design. This format is ideally suited to make MS-based proteomics and metabolomics results available to a wider biological community outside the field of MS. Several software tools for proteomics and metabolomics have already adapted the format as an output format. The comprehensive mzTab specification document and extensive additional documentation can be found online. |
Beisken, Stephan ; Earll, Mark ; Baxter, Charles ; Portwood, David ; Ament, Zsuzsanna ; Kende, Aniko ; Hodgman, Charlie ; Seymour, Graham ; Smith, Rebecca ; Fraser, Paul ; Seymour, Mark ; Salek, Reza M; Steinbeck, Christoph Metabolic differences in ripening of Solanum lycopersicum textquoteleftAilsa Craigtextquoteright and three monogenic mutants Journal Article Scientific Data, 1 , pp. 140029, 2014. @article{Beisken:2014fxa, title = {Metabolic differences in ripening of Solanum lycopersicum textquoteleftAilsa Craigtextquoteright and three monogenic mutants}, author = {Beisken, Stephan and Earll, Mark and Baxter, Charles and Portwood, David and Ament, Zsuzsanna and Kende, Aniko and Hodgman, Charlie and Seymour, Graham and Smith, Rebecca and Fraser, Paul and Seymour, Mark and Salek, Reza M and Steinbeck, Christoph}, url = {http://www.nature.com/articles/sdata201429}, doi = {10.1038/sdata.2014.29}, year = {2014}, date = {2014-09-01}, journal = {Scientific Data}, volume = {1}, pages = {140029}, publisher = {The Author(s) SN -}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Beisken, Stephan ; Earll, Mark ; Portwood, David ; Seymour, Mark ; Steinbeck, Christoph MassCascade: Visual Programming for LC-MS Data Processing in Metabolomics. Journal Article Molecular Informatics, 33 (4), pp. 307–310, 2014. @article{Beisken:2014ie, title = {MassCascade: Visual Programming for LC-MS Data Processing in Metabolomics.}, author = {Beisken, Stephan and Earll, Mark and Portwood, David and Seymour, Mark and Steinbeck, Christoph}, url = {http://onlinelibrary.wiley.com/doi/10.1002/minf.201400016/full}, doi = {10.1002/minf.201400016}, year = {2014}, date = {2014-04-01}, journal = {Molecular Informatics}, volume = {33}, number = {4}, pages = {307--310}, publisher = {WILEY-VCH Verlag}, abstract = {Liquid chromatography coupled to mass spectrometry (LC-MS) is commonly applied to investigate the small molecule complement of organisms. Several software tools are typically joined in custom pipelines to semi-automatically process and analyse the resulting data. General workflow environments like the Konstanz Information Miner (KNIME) offer the potential of an all-in-one solution to process LC-MS data by allowing easy integration of different tools and scripts. We describe MassCascade and its workflow plug-in for processing LC-MS data. The Java library integrates frequently used algorithms in a modular fashion, thus enabling it to serve as back-end for graphical front-ends. The functions available in MassCascade have been encapsulated in a plug-in for the workflow environment KNIME, allowing combined use with e.g. statistical workflow nodes from other providers and making the tool intuitive to use without knowledge of programming. The design of the software guarantees a high level of modularity where processing functions can be quickly replaced or concatenated. MassCascade is an open-source library for LC-MS data processing in metabolomics. It embraces the concept of visual programming through its KNIME plug-in, simplifying the process of building complex workflows. The library was validated using open data.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Liquid chromatography coupled to mass spectrometry (LC-MS) is commonly applied to investigate the small molecule complement of organisms. Several software tools are typically joined in custom pipelines to semi-automatically process and analyse the resulting data. General workflow environments like the Konstanz Information Miner (KNIME) offer the potential of an all-in-one solution to process LC-MS data by allowing easy integration of different tools and scripts. We describe MassCascade and its workflow plug-in for processing LC-MS data. The Java library integrates frequently used algorithms in a modular fashion, thus enabling it to serve as back-end for graphical front-ends. The functions available in MassCascade have been encapsulated in a plug-in for the workflow environment KNIME, allowing combined use with e.g. statistical workflow nodes from other providers and making the tool intuitive to use without knowledge of programming. The design of the software guarantees a high level of modularity where processing functions can be quickly replaced or concatenated. MassCascade is an open-source library for LC-MS data processing in metabolomics. It embraces the concept of visual programming through its KNIME plug-in, simplifying the process of building complex workflows. The library was validated using open data. |
Rueedi, Rico ; Ledda, Mirko ; Nicholls, Andrew W; Salek, Reza M; Marques-Vidal, Pedro ; Morya, Edgard ; Sameshima, Koichi ; Montoliu, Ivan ; Da Silva, Laeticia ; Collino, Sebastiano ; c}ois-Pierre Martin, Fran{c ; Rezzi, Serge ; Steinbeck, Christoph ; Waterworth, Dawn M; Waeber, G{'e}rard ; Vollenweider, Peter ; Beckmann, Jacques S; Le Coutre, Johannes ; Mooser, Vincent ; Bergmann, Sven ; Genick, Ulrich K; Kutalik, Zolt{'a}n Genome-wide association study of metabolic traits reveals novel gene-metabolite-disease links. Journal Article PLoS Genetics, 10 (2), pp. e1004132, 2014. @article{Rueedi:2014ej, title = {Genome-wide association study of metabolic traits reveals novel gene-metabolite-disease links.}, author = {Rueedi, Rico and Ledda, Mirko and Nicholls, Andrew W and Salek, Reza M and Marques-Vidal, Pedro and Morya, Edgard and Sameshima, Koichi and Montoliu, Ivan and Da Silva, Laeticia and Collino, Sebastiano and Martin, Fran{c c}ois-Pierre and Rezzi, Serge and Steinbeck, Christoph and Waterworth, Dawn M and Waeber, G{'e}rard and Vollenweider, Peter and Beckmann, Jacques S and Le Coutre, Johannes and Mooser, Vincent and Bergmann, Sven and Genick, Ulrich K and Kutalik, Zolt{'a}n}, url = {http://dx.plos.org/10.1371/journal.pgen.1004132}, doi = {10.1371/journal.pgen.1004132}, year = {2014}, date = {2014-02-01}, journal = {PLoS Genetics}, volume = {10}, number = {2}, pages = {e1004132}, publisher = {Public Library of Science}, abstract = {Metabolic traits are molecular phenotypes that can drive clinical phenotypes and may predict disease progression. Here, we report results from a metabolome- and genome-wide association study on (1)H-NMR urine metabolic profiles. The study was conducted within an untargeted approach, employing a novel method for compound identification. From our discovery cohort of 835 Caucasian individuals who participated in the CoLaus study, we identified 139 suggestively significant (P<5texttimes10(-8)) and independent associations between single nucleotide polymorphisms (SNP) and metabolome features. Fifty-six of these associations replicated in the TasteSensomics cohort, comprising 601 individuals from S~ao Paulo of vastly diverse ethnic background. They correspond to eleven gene-metabolite associations, six of which had been previously identified in the urine metabolome and three in the serum metabolome. Our key novel findings are the associations of two SNPs with NMR spectral signatures pointing to fucose (rs492602, P = 6.9texttimes10(-44)) and lysine (rs8101881, P = 1.2texttimes10(-33)), respectively. Fine-mapping of the first locus pinpointed the FUT2 gene, which encodes a fucosyltransferase enzyme and has previously been associated with Crohn's disease. This implicates fucose as a potential prognostic disease marker, for which there is already published evidence from a mouse model. The second SNP lies within the SLC7A9 gene, rare mutations of which have been linked to severe kidney damage. The replication of previous associations and our new discoveries demonstrate the potential of untargeted metabolomics GWAS to robustly identify molecular disease markers.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Metabolic traits are molecular phenotypes that can drive clinical phenotypes and may predict disease progression. Here, we report results from a metabolome- and genome-wide association study on (1)H-NMR urine metabolic profiles. The study was conducted within an untargeted approach, employing a novel method for compound identification. From our discovery cohort of 835 Caucasian individuals who participated in the CoLaus study, we identified 139 suggestively significant (P<5texttimes10(-8)) and independent associations between single nucleotide polymorphisms (SNP) and metabolome features. Fifty-six of these associations replicated in the TasteSensomics cohort, comprising 601 individuals from S~ao Paulo of vastly diverse ethnic background. They correspond to eleven gene-metabolite associations, six of which had been previously identified in the urine metabolome and three in the serum metabolome. Our key novel findings are the associations of two SNPs with NMR spectral signatures pointing to fucose (rs492602, P = 6.9texttimes10(-44)) and lysine (rs8101881, P = 1.2texttimes10(-33)), respectively. Fine-mapping of the first locus pinpointed the FUT2 gene, which encodes a fucosyltransferase enzyme and has previously been associated with Crohn's disease. This implicates fucose as a potential prognostic disease marker, for which there is already published evidence from a mouse model. The second SNP lies within the SLC7A9 gene, rare mutations of which have been linked to severe kidney damage. The replication of previous associations and our new discoveries demonstrate the potential of untargeted metabolomics GWAS to robustly identify molecular disease markers. |
Truszkowski, Andreas ; Daniel, Mirco ; Kuhn, Hubert ; Neumann, Stefan ; Steinbeck, Christoph ; Zielesny, Achim ; Epple, Matthias A molecular fragment cheminformatics roadmap for mesoscopic simulation. Journal Article Journal of cheminformatics, 6 (1), pp. 45, 2014. @article{Truszkowski:2014gd, title = {A molecular fragment cheminformatics roadmap for mesoscopic simulation.}, author = {Truszkowski, Andreas and Daniel, Mirco and Kuhn, Hubert and Neumann, Stefan and Steinbeck, Christoph and Zielesny, Achim and Epple, Matthias}, url = {http://www.jcheminf.com/content/6/1/45}, doi = {10.1186/s13321-014-0045-3}, year = {2014}, date = {2014-01-01}, journal = {Journal of cheminformatics}, volume = {6}, number = {1}, pages = {45}, publisher = {Springer International Publishing}, abstract = {BACKGROUND:Mesoscopic simulation studies the structure, dynamics and properties of large molecular ensembles with millions of atoms: Its basic interacting units (beads) are no longer the nuclei and electrons of quantum chemical ab-initio calculations or the atom types of molecular mechanics but molecular fragments, molecules or even larger molecular entities. For its simulation setup and output a mesoscopic simulation kernel software uses abstract matrix (array) representations for bead topology and connectivity. Therefore a pure kernel-based mesoscopic simulation task is a tedious, time-consuming and error-prone venture that limits its practical use and application. A consequent cheminformatics approach tackles these problems and provides solutions for a considerably enhanced accessibility. This study aims at outlining a complete cheminformatics roadmap that frames a mesoscopic Molecular Fragment Dynamics (MFD) simulation kernel to allow its efficient use and practical application. RESULTS:The molecular fragment cheminformatics roadmap consists of four consecutive building blocks: An adequate fragment structure representation (1), defined operations on these fragment structures (2), the description of compartments with defined compositions and structural alignments (3), and the graphical setup and analysis of a whole simulation box (4). The basis of the cheminformatics approach (i.e. building block 1) is a SMILES-like line notation (denoted fSMILES) with connected molecular fragments to represent a molecular structure. The fSMILES notation and the following concepts and methods for building blocks 2-4 are outlined with examples and practical usage scenarios. It is shown that the requirements of the roadmap may be partly covered by already existing open-source cheminformatics software. CONCLUSIONS:Mesoscopic simulation techniques like MFD may be considerably alleviated and broadened for practical use with a consequent cheminformatics layer that successfully tackles its setup subtleties and conceptual usage hurdles. Molecular Fragment Cheminformatics may be regarded as a crucial accelerator to propagate MFD and similar mesoscopic simulation techniques in the molecular sciences. Graphical abstractA molecular fragment cheminformatics roadmap for mesoscopic simulation.}, keywords = {}, pubstate = {published}, tppubtype = {article} } BACKGROUND:Mesoscopic simulation studies the structure, dynamics and properties of large molecular ensembles with millions of atoms: Its basic interacting units (beads) are no longer the nuclei and electrons of quantum chemical ab-initio calculations or the atom types of molecular mechanics but molecular fragments, molecules or even larger molecular entities. For its simulation setup and output a mesoscopic simulation kernel software uses abstract matrix (array) representations for bead topology and connectivity. Therefore a pure kernel-based mesoscopic simulation task is a tedious, time-consuming and error-prone venture that limits its practical use and application. A consequent cheminformatics approach tackles these problems and provides solutions for a considerably enhanced accessibility. This study aims at outlining a complete cheminformatics roadmap that frames a mesoscopic Molecular Fragment Dynamics (MFD) simulation kernel to allow its efficient use and practical application. RESULTS:The molecular fragment cheminformatics roadmap consists of four consecutive building blocks: An adequate fragment structure representation (1), defined operations on these fragment structures (2), the description of compartments with defined compositions and structural alignments (3), and the graphical setup and analysis of a whole simulation box (4). The basis of the cheminformatics approach (i.e. building block 1) is a SMILES-like line notation (denoted fSMILES) with connected molecular fragments to represent a molecular structure. The fSMILES notation and the following concepts and methods for building blocks 2-4 are outlined with examples and practical usage scenarios. It is shown that the requirements of the roadmap may be partly covered by already existing open-source cheminformatics software. CONCLUSIONS:Mesoscopic simulation techniques like MFD may be considerably alleviated and broadened for practical use with a consequent cheminformatics layer that successfully tackles its setup subtleties and conceptual usage hurdles. Molecular Fragment Cheminformatics may be regarded as a crucial accelerator to propagate MFD and similar mesoscopic simulation techniques in the molecular sciences. Graphical abstractA molecular fragment cheminformatics roadmap for mesoscopic simulation. |
Jayaseelan, Kalai Vanii ; Steinbeck, Christoph Building blocks for automated elucidation of metabolites: natural product-likeness for candidate ranking. Journal Article BMC Bioinformatics, 15 (1), pp. 234, 2014. @article{Jayaseelan:2014im, title = {Building blocks for automated elucidation of metabolites: natural product-likeness for candidate ranking.}, author = {Jayaseelan, Kalai Vanii and Steinbeck, Christoph}, url = {http://www.biomedcentral.com/1471-2105/15/234}, doi = {10.1186/1471-2105-15-234}, year = {2014}, date = {2014-01-01}, journal = {BMC Bioinformatics}, volume = {15}, number = {1}, pages = {234}, publisher = {BioMed Central Ltd}, abstract = {BACKGROUND:In metabolomics experiments, spectral fingerprints of metabolites with no known structural identityare detected routinely. Computer-assisted structure elucidation (CASE) has been used to determine thestructural identities of unknown compounds. It is generally accepted that a single 1D NMR spectrumor mass spectrum is usually not sufficient to establish the identity of a hitherto unknown compound.When a suite of spectra from 1D and 2D NMR experiments supplemented with a molecular formulaare available, the successful elucidation of the chemical structure for candidates with up to 30 heavyatoms has been reported previously by one of the authors. In high-throughput metabolomics, usually1D NMR or mass spectrometry experiments alone are conducted for rapid analysis of samples. Thismethod subsequently requires that the spectral patterns are analyzed automatically to quickly identifyknown and unknown structures. In this study, we investigated whether additional existing knowledge,such as the fact that the unknown compound is a natural product, can be used to improve the rankingof the correct structure in the result list after the structure elucidation process. RESULTS:To identify unknowns using as little spectroscopic information as possible, we implemented anevolutionary algorithm-based CASE mechanism to elucidate candidates in a fully automated fashion,with input of the molecular formula and 13C NMR spectrum of the isolated compound. Wealso tested how filters like natural product-likeness, a measure that calculates the similarity ofthe compounds to known natural product space, might enhance the performance and quality ofthe structure elucidation. The evolutionary algorithm is implemented within the SENECA packagefor CASE reported previously, and is available for free download under artistic license athttp://sourceforge.net/projects/seneca/. The natural product-likeness calculator is incorporated as aplugin within SENECA and is available as a GUI client and command-line executable. Significantimprovements in candidate ranking were demonstrated for 41 small test molecules when the CASEsystem was supplemented by a natural product-likeness filter. CONCLUSIONS:In spectroscopically underdetermined structure elucidation problems, natural product-likeness cancontribute to a better ranking of the correct structure in the results list.}, keywords = {}, pubstate = {published}, tppubtype = {article} } BACKGROUND:In metabolomics experiments, spectral fingerprints of metabolites with no known structural identityare detected routinely. Computer-assisted structure elucidation (CASE) has been used to determine thestructural identities of unknown compounds. It is generally accepted that a single 1D NMR spectrumor mass spectrum is usually not sufficient to establish the identity of a hitherto unknown compound.When a suite of spectra from 1D and 2D NMR experiments supplemented with a molecular formulaare available, the successful elucidation of the chemical structure for candidates with up to 30 heavyatoms has been reported previously by one of the authors. In high-throughput metabolomics, usually1D NMR or mass spectrometry experiments alone are conducted for rapid analysis of samples. Thismethod subsequently requires that the spectral patterns are analyzed automatically to quickly identifyknown and unknown structures. In this study, we investigated whether additional existing knowledge,such as the fact that the unknown compound is a natural product, can be used to improve the rankingof the correct structure in the result list after the structure elucidation process. RESULTS:To identify unknowns using as little spectroscopic information as possible, we implemented anevolutionary algorithm-based CASE mechanism to elucidate candidates in a fully automated fashion,with input of the molecular formula and 13C NMR spectrum of the isolated compound. Wealso tested how filters like natural product-likeness, a measure that calculates the similarity ofthe compounds to known natural product space, might enhance the performance and quality ofthe structure elucidation. The evolutionary algorithm is implemented within the SENECA packagefor CASE reported previously, and is available for free download under artistic license athttp://sourceforge.net/projects/seneca/. The natural product-likeness calculator is incorporated as aplugin within SENECA and is available as a GUI client and command-line executable. Significantimprovements in candidate ranking were demonstrated for 41 small test molecules when the CASEsystem was supplemented by a natural product-likeness filter. CONCLUSIONS:In spectroscopically underdetermined structure elucidation problems, natural product-likeness cancontribute to a better ranking of the correct structure in the results list. |
Tipton, Keith F; Armstrong, Richard N; Bakker, Barbara M; Bairoch, Amos ; Cornish-Bowden, Athel ; Halling, Peter J; Hofmeyr, Jan-Hendrik ; Leyh, Thomas S; Kettner, Carsten ; Raushel, Frank M; Rohwer, Johann ; Schomburg, Dietmar ; Steinbeck, Christoph Standards for Reporting Enzyme Data: The STRENDA Consortium: What it aims to do and why it should be helpful Journal Article Perspectives in Science, 1 (1-6), pp. 131–137, 2014. @article{Tipton:2014hp, title = {Standards for Reporting Enzyme Data: The STRENDA Consortium: What it aims to do and why it should be helpful}, author = {Tipton, Keith F and Armstrong, Richard N and Bakker, Barbara M and Bairoch, Amos and Cornish-Bowden, Athel and Halling, Peter J and Hofmeyr, Jan-Hendrik and Leyh, Thomas S and Kettner, Carsten and Raushel, Frank M and Rohwer, Johann and Schomburg, Dietmar and Steinbeck, Christoph}, url = {http://linkinghub.elsevier.com/retrieve/pii/S2213020914000135}, doi = {10.1016/j.pisc.2014.02.012}, year = {2014}, date = {2014-01-01}, journal = {Perspectives in Science}, volume = {1}, number = {1-6}, pages = {131--137}, abstract = {Abstract Data on enzyme activities and kinetics have often been reported with insufficient experimental detail to allow their repetition. This paper discusses the objectives and recommendations of the Standards for Reporting Enzyme Data ( STRENDA ) project to ...}, keywords = {}, pubstate = {published}, tppubtype = {article} } Abstract Data on enzyme activities and kinetics have often been reported with insufficient experimental detail to allow their repetition. This paper discusses the objectives and recommendations of the Standards for Reporting Enzyme Data ( STRENDA ) project to ... |
Hastings, Janna ; Haug, Kenneth ; Steinbeck, Christoph Ten recommendations for software engineering in research. Journal Article GigaScience, 3 (1), pp. 31, 2014. @article{Hastings:2014fa, title = {Ten recommendations for software engineering in research.}, author = {Hastings, Janna and Haug, Kenneth and Steinbeck, Christoph}, url = {http://www.gigasciencejournal.com/content/3/1/31}, doi = {10.1186/2047-217X-3-31}, year = {2014}, date = {2014-01-01}, journal = {GigaScience}, volume = {3}, number = {1}, pages = {31}, publisher = {BioMed Central}, abstract = {Research in the context of data-driven science requires a backbone of well-written software, but scientific researchers are typically not trained at length in software engineering, the principles for creating better software products. To address this gap, in particular for young researchers new to programming, we give ten recommendations to ensure the usability, sustainability and practicality of research software.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Research in the context of data-driven science requires a backbone of well-written software, but scientific researchers are typically not trained at length in software engineering, the principles for creating better software products. To address this gap, in particular for young researchers new to programming, we give ten recommendations to ensure the usability, sustainability and practicality of research software. |
May, John W; Steinbeck, Christoph Efficient ring perception for the Chemistry Development Kit. Journal Article Journal of cheminformatics, 6 (1), pp. 3, 2014. @article{Efficientringperce:2014hg, title = {Efficient ring perception for the Chemistry Development Kit.}, author = {May, John W and Steinbeck, Christoph}, url = {http://www.jcheminf.com/content/6/1/3/abstract}, doi = {10.1186/1758-2946-6-3}, year = {2014}, date = {2014-01-01}, journal = {Journal of cheminformatics}, volume = {6}, number = {1}, pages = {3}, publisher = {Chemistry Central Ltd}, abstract = {BACKGROUND:The Chemistry Development Kit (CDK) is an open source Java library for manipulating and processing chemical information. A key aspect in handling chemical structures is the determination of the chemical rings. The rings of a structure are used areas including descriptors, stereochemistry, similarity, screening and atom typing. The CDK includes multiple algorithms for determining the rings of a structure on demand. Non-unique descriptions of rings were often used due to the slower performance of the unique alternatives. RESULTS:Efficient algorithms for handling chemical ring perception have been implemented and optimised in the CDK. The algorithms provide much faster computation of new and existing types of rings. Several optimisation and implementation considerations are discussed which improve real case usage. The performance is measured on several publicly available data sets and in several cases the new implementations were found to be more than an order of magnitude faster. CONCLUSIONS:Algorithmic improvements allow handling of much larger datasets in reasonable time. Faster computation allows more appropriate rings to be utilised in procedures such as aromaticity. Several areas that require ring perception have also seen a noticeable improvement. The time taken to compute the unique rings is now comparable allowing a correct usage throughout the toolkit. All source code is open source and freely available.}, keywords = {}, pubstate = {published}, tppubtype = {article} } BACKGROUND:The Chemistry Development Kit (CDK) is an open source Java library for manipulating and processing chemical information. A key aspect in handling chemical structures is the determination of the chemical rings. The rings of a structure are used areas including descriptors, stereochemistry, similarity, screening and atom typing. The CDK includes multiple algorithms for determining the rings of a structure on demand. Non-unique descriptions of rings were often used due to the slower performance of the unique alternatives. RESULTS:Efficient algorithms for handling chemical ring perception have been implemented and optimised in the CDK. The algorithms provide much faster computation of new and existing types of rings. Several optimisation and implementation considerations are discussed which improve real case usage. The performance is measured on several publicly available data sets and in several cases the new implementations were found to be more than an order of magnitude faster. CONCLUSIONS:Algorithmic improvements allow handling of much larger datasets in reasonable time. Faster computation allows more appropriate rings to be utilised in procedures such as aromaticity. Several areas that require ring perception have also seen a noticeable improvement. The time taken to compute the unique rings is now comparable allowing a correct usage throughout the toolkit. All source code is open source and freely available. |
Venkata, Chandrasekhar ; Forster, Mark J; Howe, Peter W A; Steinbeck, Christoph PLoS ONE, 9 (11), pp. e111576, 2014. @article{Venkata:2014cq, title = {The potential utility of predicted one bond carbon-proton coupling constants in the structure elucidation of small organic molecules by NMR spectroscopy.}, author = {Venkata, Chandrasekhar and Forster, Mark J and Howe, Peter W A and Steinbeck, Christoph}, url = {http://dx.plos.org/10.1371/journal.pone.0111576}, doi = {10.1371/journal.pone.0111576}, year = {2014}, date = {2014-01-01}, journal = {PLoS ONE}, volume = {9}, number = {11}, pages = {e111576}, publisher = {Public Library of Science}, abstract = {NMR spectroscopy is the most popular technique used for structure elucidation of small organic molecules in solution, but incorrect structures are regularly reported. One-bond proton-carbon J-couplings provide additional information about chemical structure because they are determined by different features of molecular structure than are proton and carbon chemical shifts. However, these couplings are not routinely used to validate proposed structures because few software tools exist to predict them. This study assesses the accuracy of Density Functional Theory for predicting them using 396 published experimental observations from a diverse range of small organic molecules. With the B3LYP functional and the TZVP basis set, Density Functional Theory calculations using the open-source software package NWChem can predict one-bond CH J-couplings with good accuracy for most classes of small organic molecule. The root-mean-square deviation after correction is 1.5 Hz for most sp3 CH pairs and 1.9 Hz for sp2 pairs; larger errors are observed for sp3 pairs with multiple electronegative substituents and for sp pairs. These results suggest that prediction of one-bond CH J-couplings by Density Functional Theory is sufficiently accurate for structure validation. This will be of particular use in strained ring systems and heterocycles which have characteristic couplings and which pose challenges for structure elucidation.}, keywords = {}, pubstate = {published}, tppubtype = {article} } NMR spectroscopy is the most popular technique used for structure elucidation of small organic molecules in solution, but incorrect structures are regularly reported. One-bond proton-carbon J-couplings provide additional information about chemical structure because they are determined by different features of molecular structure than are proton and carbon chemical shifts. However, these couplings are not routinely used to validate proposed structures because few software tools exist to predict them. This study assesses the accuracy of Density Functional Theory for predicting them using 396 published experimental observations from a diverse range of small organic molecules. With the B3LYP functional and the TZVP basis set, Density Functional Theory calculations using the open-source software package NWChem can predict one-bond CH J-couplings with good accuracy for most classes of small organic molecule. The root-mean-square deviation after correction is 1.5 Hz for most sp3 CH pairs and 1.9 Hz for sp2 pairs; larger errors are observed for sp3 pairs with multiple electronegative substituents and for sp pairs. These results suggest that prediction of one-bond CH J-couplings by Density Functional Theory is sufficiently accurate for structure validation. This will be of particular use in strained ring systems and heterocycles which have characteristic couplings and which pose challenges for structure elucidation. |
2013 |
Tudose, Ilinca ; Hastings, Janna ; Muthukrishnan, Venkatesh ; Owen, Gareth ; Turner, Steve ; Dekker, Adriano ; Kale, Namrata ; Ennis, Marcus ; Steinbeck, Christoph OntoQuery: easy-to-use web-based OWL querying. Journal Article Bioinformatics, 29 (22), pp. 2955–2957, 2013. @article{Tudose:2013iea, title = {OntoQuery: easy-to-use web-based OWL querying.}, author = {Tudose, Ilinca and Hastings, Janna and Muthukrishnan, Venkatesh and Owen, Gareth and Turner, Steve and Dekker, Adriano and Kale, Namrata and Ennis, Marcus and Steinbeck, Christoph}, url = {http://bioinformatics.oxfordjournals.org/cgi/doi/10.1093/bioinformatics/btt514}, doi = {10.1093/bioinformatics/btt514}, year = {2013}, date = {2013-11-01}, journal = {Bioinformatics}, volume = {29}, number = {22}, pages = {2955--2957}, publisher = {Oxford University Press}, abstract = {SUMMARY:The Web Ontology Language (OWL) provides a sophisticated language for building complex domain ontologies and is widely used in bio-ontologies such as the Gene Ontology. The Prot'eg'e-OWL ontology editing tool provides a query facility that allows composition and execution of queries with the human-readable Manchester OWL syntax, with syntax checking and entity label lookup. No equivalent query facility such as the Prot'eg'e Description Logics (DL) query yet exists in web form. However, many users interact with bio-ontologies such as chemical entities of biological interest and the Gene Ontology using their online Web sites, within which DL-based querying functionality is not available. To address this gap, we introduce the OntoQuery web-based query utility. AVAILABILITY AND IMPLEMENTATION: The source code for this implementation together with instructions for installation is available at http://github.com/IlincaTudose/OntoQuery. OntoQuery software is fully compatible with all OWL-based ontologies and is available for download (CC-0 license). The ChEBI installation, ChEBI OntoQuery, is available at http://www.ebi.ac.uk/chebi/tools/ontoquery. CONTACT:hastings@ebi.ac.uk.}, keywords = {}, pubstate = {published}, tppubtype = {article} } SUMMARY:The Web Ontology Language (OWL) provides a sophisticated language for building complex domain ontologies and is widely used in bio-ontologies such as the Gene Ontology. The Prot'eg'e-OWL ontology editing tool provides a query facility that allows composition and execution of queries with the human-readable Manchester OWL syntax, with syntax checking and entity label lookup. No equivalent query facility such as the Prot'eg'e Description Logics (DL) query yet exists in web form. However, many users interact with bio-ontologies such as chemical entities of biological interest and the Gene Ontology using their online Web sites, within which DL-based querying functionality is not available. To address this gap, we introduce the OntoQuery web-based query utility. AVAILABILITY AND IMPLEMENTATION: The source code for this implementation together with instructions for installation is available at http://github.com/IlincaTudose/OntoQuery. OntoQuery software is fully compatible with all OWL-based ontologies and is available for download (CC-0 license). The ChEBI installation, ChEBI OntoQuery, is available at http://www.ebi.ac.uk/chebi/tools/ontoquery. CONTACT:hastings@ebi.ac.uk. |
Hastings, Janna ; Steinbeck, Christoph Chemical Ontologies for Standardization, Knowledge Discovery, and Data Mining Book Wiley-VCH Verlag GmbH & Co. KGaA, Weinheim, Germany, 2013, ISBN: 9783527655984. @book{Hastings:2013by, title = {Chemical Ontologies for Standardization, Knowledge Discovery, and Data Mining}, author = {Hastings, Janna and Steinbeck, Christoph}, url = {http://doi.wiley.com/10.1002/9783527655984.ch03}, doi = {10.1002/9783527655984.ch03}, isbn = {9783527655984}, year = {2013}, date = {2013-09-01}, volume = {7}, publisher = {Wiley-VCH Verlag GmbH & Co. KGaA}, address = {Weinheim, Germany}, keywords = {}, pubstate = {published}, tppubtype = {book} } |
Beisken, Stephan ; Meinl, Thorsten ; Wiswedel, Bernd ; de Figueiredo, Luis F; Berthold, Michael ; Steinbeck, Christoph KNIME-CDK: Workflow-driven cheminformatics Journal Article BMC Bioinformatics, 14 (1), pp. 257, 2013. @article{KNIMECDKWorkflow:2013fk, title = {KNIME-CDK: Workflow-driven cheminformatics}, author = {Beisken, Stephan and Meinl, Thorsten and Wiswedel, Bernd and de Figueiredo, Luis F and Berthold, Michael and Steinbeck, Christoph}, url = {http://www.biomedcentral.com/1471-2105/14/257}, doi = {10.1186/1471-2105-14-257}, year = {2013}, date = {2013-08-01}, journal = {BMC Bioinformatics}, volume = {14}, number = {1}, pages = {257}, publisher = {BioMed Central Ltd}, abstract = {Cheminformaticians have to routinely process and analyse libraries of small molecules. Among other things, that includes the standardization of molecules, calculation of various descriptors, visualisation of molecular structures, and downstream analysis. For this purpose, scientific workflow platforms such as the Konstanz Information Miner can be used if provided with the right plug-in. A workflow-based cheminformatics tool provides the advantage of ease-of-use and interoperability between complementary cheminformatics packages within the same framework, hence facilitating the analysis process.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Cheminformaticians have to routinely process and analyse libraries of small molecules. Among other things, that includes the standardization of molecules, calculation of various descriptors, visualisation of molecular structures, and downstream analysis. For this purpose, scientific workflow platforms such as the Konstanz Information Miner can be used if provided with the right plug-in. A workflow-based cheminformatics tool provides the advantage of ease-of-use and interoperability between complementary cheminformatics packages within the same framework, hence facilitating the analysis process. |
Salek, Reza M; Steinbeck, Christoph ; Viant, Mark R; Goodacre, Royston ; Dunn, Warwick B The role of reporting standards for metabolite annotation and identification in metabolomic studies Journal Article GigaScience, 2 (1), pp. 13, 2013. @article{Salek:2013gv, title = {The role of reporting standards for metabolite annotation and identification in metabolomic studies}, author = {Salek, Reza M and Steinbeck, Christoph and Viant, Mark R and Goodacre, Royston and Dunn, Warwick B}, url = {http://www.gigasciencejournal.com/content/2/1/13}, doi = {10.1186/2047-217X-2-13}, year = {2013}, date = {2013-01-01}, journal = {GigaScience}, volume = {2}, number = {1}, pages = {13}, publisher = {BioMed Central Ltd}, abstract = {The application of reporting standards in metabolomics allow data from different laboratories to be shared, integrated and interpreted. Although minimum reporting standards related to metabolite identification were published in 2007, it is clear that significant efforts are required to ensure their continuous update and appropriate use by the metabolomics community. These include their use in metabolomics data submission (e.g., MetaboLights) and as a requirement for publication in peer-reviewed journals (e.g., Metabolomics). The Data Standards and Metabolite Identification Task Groups of the international Metabolomics Society are actively working to develop and promote these standards and educate the community on their use.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The application of reporting standards in metabolomics allow data from different laboratories to be shared, integrated and interpreted. Although minimum reporting standards related to metabolite identification were published in 2007, it is clear that significant efforts are required to ensure their continuous update and appropriate use by the metabolomics community. These include their use in metabolomics data submission (e.g., MetaboLights) and as a requirement for publication in peer-reviewed journals (e.g., Metabolomics). The Data Standards and Metabolite Identification Task Groups of the international Metabolomics Society are actively working to develop and promote these standards and educate the community on their use. |
Haug, Kenneth ; Salek, Reza M; Conesa, Pablo ; Hastings, Janna ; De Matos, Paula ; Rijnbeek, Mark ; Mahendraker, Tejasvi ; Williams, Mark ; Neumann, Steffen ; Rocca-Serra, Philippe ; Maguire, Eamonn ; Gonz{'a}lez-Beltr{'a}n, Alejandra ; Sansone, Susanna-Assunta ; Griffin, Julian L; Steinbeck, Christoph MetaboLights--an open-access general-purpose repository for metabolomics studies and associated meta-data. Journal Article Nucleic Acids Research, 41 (Database issue), pp. D781–6, 2013. @article{haug2012metabolights, title = {MetaboLights--an open-access general-purpose repository for metabolomics studies and associated meta-data.}, author = {Haug, Kenneth and Salek, Reza M and Conesa, Pablo and Hastings, Janna and De Matos, Paula and Rijnbeek, Mark and Mahendraker, Tejasvi and Williams, Mark and Neumann, Steffen and Rocca-Serra, Philippe and Maguire, Eamonn and Gonz{'a}lez-Beltr{'a}n, Alejandra and Sansone, Susanna-Assunta and Griffin, Julian L and Steinbeck, Christoph}, url = {http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=23109552&retmode=ref&cmd=prlinks}, doi = {10.1093/nar/gks1004}, year = {2013}, date = {2013-01-01}, journal = {Nucleic Acids Research}, volume = {41}, number = {Database issue}, pages = {D781--6}, abstract = {MetaboLights (http://www.ebi.ac.uk/metabolights) is the first general-purpose, open-access repository for metabolomics studies, their raw experimental data and associated metadata, maintained by one of the major open-access data providers in molecular biology. Metabolomic profiling is an important tool for research into biological functioning and into the systemic perturbations caused by diseases, diet and the environment. The effectiveness of such methods depends on the availability of public open data across a broad range of experimental methods and conditions. The MetaboLights repository, powered by the open source ISA framework, is cross-species and cross-technique. It will cover metabolite structures and their reference spectra as well as their biological roles, locations, concentrations and raw data from metabolic experiments. Studies automatically receive a stable unique accession number that can be used as a publication reference (e.g. MTBLS1). At present, the repository includes 15 submitted studies, encompassing 93 protocols for 714 assays, and span over 8 different species including human, Caenorhabditis elegans, Mus musculus and Arabidopsis thaliana. Eight hundred twenty-seven of the metabolites identified in these studies have been mapped to ChEBI. These studies cover a variety of techniques, including NMR spectroscopy and mass spectrometry.}, keywords = {}, pubstate = {published}, tppubtype = {article} } MetaboLights (http://www.ebi.ac.uk/metabolights) is the first general-purpose, open-access repository for metabolomics studies, their raw experimental data and associated metadata, maintained by one of the major open-access data providers in molecular biology. Metabolomic profiling is an important tool for research into biological functioning and into the systemic perturbations caused by diseases, diet and the environment. The effectiveness of such methods depends on the availability of public open data across a broad range of experimental methods and conditions. The MetaboLights repository, powered by the open source ISA framework, is cross-species and cross-technique. It will cover metabolite structures and their reference spectra as well as their biological roles, locations, concentrations and raw data from metabolic experiments. Studies automatically receive a stable unique accession number that can be used as a publication reference (e.g. MTBLS1). At present, the repository includes 15 submitted studies, encompassing 93 protocols for 714 assays, and span over 8 different species including human, Caenorhabditis elegans, Mus musculus and Arabidopsis thaliana. Eight hundred twenty-seven of the metabolites identified in these studies have been mapped to ChEBI. These studies cover a variety of techniques, including NMR spectroscopy and mass spectrometry. |
Alcantara, Rafael ; Onwubiko, Joseph ; Cao, Hong ; de Matos, Paula ; Cham, Jennifer A; Jacobsen, Jules ; Holliday, Gemma L; Fischer, Julia D; Rahman, Syed Asad ; Jassal, Bijay ; Goujon, Mikael ; Rowland, Francis ; Velankar, Sameer ; Lopez, Rodrigo ; Overington, John P; Kleywegt, Gerard J; Hermjakob, Henning ; O'Donovan, Claire ; Mart{'i}n, Mar{'i}a Jes{'u}s ; Thornton, Janet M; Steinbeck, Christoph The EBI enzyme portal. Journal Article Nucleic Acids Research, 41 (Database issue), pp. D773–80, 2013. @article{alcantara2012ebi, title = {The EBI enzyme portal.}, author = {Alcantara, Rafael and Onwubiko, Joseph and Cao, Hong and Matos, Paula de and Cham, Jennifer A and Jacobsen, Jules and Holliday, Gemma L and Fischer, Julia D and Rahman, Syed Asad and Jassal, Bijay and Goujon, Mikael and Rowland, Francis and Velankar, Sameer and Lopez, Rodrigo and Overington, John P and Kleywegt, Gerard J and Hermjakob, Henning and O'Donovan, Claire and Mart{'i}n, Mar{'i}a Jes{'u}s and Thornton, Janet M and Steinbeck, Christoph}, url = {http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=23175605&retmode=ref&cmd=prlinks}, doi = {10.1093/nar/gks1112}, year = {2013}, date = {2013-01-01}, journal = {Nucleic Acids Research}, volume = {41}, number = {Database issue}, pages = {D773--80}, abstract = {The availability of comprehensive information about enzymes plays an important role in answering questions relevant to interdisciplinary fields such as biochemistry, enzymology, biofuels, bioengineering and drug discovery. At the EMBL European Bioinformatics Institute, we have developed an enzyme portal (http://www.ebi.ac.uk/enzymeportal) to provide this wealth of information on enzymes from multiple in-house resources addressing particular data classes: protein sequence and structure, reactions, pathways and small molecules. The fact that these data reside in separate databases makes information discovery cumbersome. The main goal of the portal is to simplify this process for end users.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The availability of comprehensive information about enzymes plays an important role in answering questions relevant to interdisciplinary fields such as biochemistry, enzymology, biofuels, bioengineering and drug discovery. At the EMBL European Bioinformatics Institute, we have developed an enzyme portal (http://www.ebi.ac.uk/enzymeportal) to provide this wealth of information on enzymes from multiple in-house resources addressing particular data classes: protein sequence and structure, reactions, pathways and small molecules. The fact that these data reside in separate databases makes information discovery cumbersome. The main goal of the portal is to simplify this process for end users. |
May, John W; James, Gordon A; Steinbeck, Christoph Metingear: a development environment for annotating genome-scale metabolic models. Journal Article Bioinformatics, 29 (17), pp. 2213–2215, 2013. @article{May:2013kc, title = {Metingear: a development environment for annotating genome-scale metabolic models.}, author = {May, John W and James, A Gordon and Steinbeck, Christoph}, url = {https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/btt342}, doi = {10.1093/bioinformatics/btt342}, year = {2013}, date = {2013-01-01}, journal = {Bioinformatics}, volume = {29}, number = {17}, pages = {2213--2215}, abstract = {SUMMARY:Genome-scale metabolic models often lack annotations that would allow them to be used for further analysis. Previous efforts have focused on associating metabolites in the model with a cross reference, but this can be problematic if the reference is not freely available, multiple resources are used or the metabolite is added from a literature review. Associating each metabolite with chemical structure provides unambiguous identification of the components and a more detailed view of the metabolism. We have developed an open-source desktop application that simplifies the process of adding database cross references and chemical structures to genome-scale metabolic models. Annotated models can be exported to the Systems Biology Markup Language open interchange format. AVAILABILITY:Source code, binaries, documentation and tutorials are freely available at http://johnmay.github.com/metingear. The application is implemented in Java with bundles available for MS Windows and Macintosh OS X. CONTACT:johnmay@ebi.ac.uk SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.}, keywords = {}, pubstate = {published}, tppubtype = {article} } SUMMARY:Genome-scale metabolic models often lack annotations that would allow them to be used for further analysis. Previous efforts have focused on associating metabolites in the model with a cross reference, but this can be problematic if the reference is not freely available, multiple resources are used or the metabolite is added from a literature review. Associating each metabolite with chemical structure provides unambiguous identification of the components and a more detailed view of the metabolism. We have developed an open-source desktop application that simplifies the process of adding database cross references and chemical structures to genome-scale metabolic models. Annotated models can be exported to the Systems Biology Markup Language open interchange format. AVAILABILITY:Source code, binaries, documentation and tutorials are freely available at http://johnmay.github.com/metingear. The application is implemented in Java with bundles available for MS Windows and Macintosh OS X. CONTACT:johnmay@ebi.ac.uk SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online. |
Salek, Reza M; Haug, Kenneth ; Steinbeck, Christoph Dissemination of metabolomics results: role of MetaboLights and COSMOS. Journal Article GigaScience, 2 (1), pp. 8, 2013. @article{Salek:2013gja, title = {Dissemination of metabolomics results: role of MetaboLights and COSMOS.}, author = {Salek, Reza M and Haug, Kenneth and Steinbeck, Christoph}, url = {http://www.gigasciencejournal.com/content/2/1/8}, doi = {10.1186/2047-217X-2-8}, year = {2013}, date = {2013-01-01}, journal = {GigaScience}, volume = {2}, number = {1}, pages = {8}, publisher = {BioMed Central}, abstract = {With ever-increasing amounts of metabolomics data produced each year, there is an even greater need to disseminate data and knowledge produced in a standard and reproducible way. To assist with this a general purpose, open source metabolomics repository, MetaboLights, was launched in 2012. To promote a community standard, initially culminated as metabolomics standards initiative (MSI), COordination of Standards in MetabOlomicS (COSMOS) was introduced. COSMOS aims to link life science e-infrastructures within the worldwide metabolomics community as well as develop and maintain open source exchange formats for raw and processed data, ensuring better flow of metabolomics information.}, keywords = {}, pubstate = {published}, tppubtype = {article} } With ever-increasing amounts of metabolomics data produced each year, there is an even greater need to disseminate data and knowledge produced in a standard and reproducible way. To assist with this a general purpose, open source metabolomics repository, MetaboLights, was launched in 2012. To promote a community standard, initially culminated as metabolomics standards initiative (MSI), COordination of Standards in MetabOlomicS (COSMOS) was introduced. COSMOS aims to link life science e-infrastructures within the worldwide metabolomics community as well as develop and maintain open source exchange formats for raw and processed data, ensuring better flow of metabolomics information. |
Salek, R M; Haug, K; Conesa, P; Hastings, J K; Williams, M; Steinbeck, C; Sansone, S A; Mahendraker, T; Maguire, E; Gonzalez-Beltran, A; Rocca-Serra, P The MetaboLights repository: curation challenges in metabolomics Journal Article Database: The Journal of Biological Databases and Curation, 2013 (0), pp. bat029–bat029, 2013. @article{RezaMSalek:2013bm, title = {The MetaboLights repository: curation challenges in metabolomics}, author = {Salek, R M and Haug, K and Conesa, P and Hastings, J K and Williams, M and Steinbeck, C and Sansone, S A and Mahendraker, T and Maguire, E and Gonzalez-Beltran, A and Rocca-Serra, P}, url = {/pmc/articles/PMC3638156/?report=abstract}, doi = {10.1093/database/bat029}, year = {2013}, date = {2013-01-01}, journal = {Database: The Journal of Biological Databases and Curation}, volume = {2013}, number = {0}, pages = {bat029--bat029}, publisher = {Oxford University Press}, abstract = {Abstract MetaboLights is the first general-purpose open-access curated repository for metabolomic studies, their raw experimental data and associated metadata, maintained by one of the major open-access data providers in molecular biology. Increases in the ...}, keywords = {}, pubstate = {published}, tppubtype = {article} } Abstract MetaboLights is the first general-purpose open-access curated repository for metabolomic studies, their raw experimental data and associated metadata, maintained by one of the major open-access data providers in molecular biology. Increases in the ... |
Hill, David P; Adams, Nico ; Bada, Mike ; Batchelor, Colin ; Berardini, Tanya Z; Dietze, Heiko ; Drabkin, Harold J; Ennis, Marcus ; Foulger, Rebecca E; Harris, Midori A; Hastings, Janna ; Kale, Namrata S; De Matos, Paula ; Mungall, Christopher J; Owen, Gareth ; Roncaglia, Paola ; Steinbeck, Christoph ; Turner, Steve ; Lomax, Jane Dovetailing biology and chemistry: integrating the Gene Ontology with the ChEBI chemical ontology. Journal Article BMC Genomics, 14 (1), pp. 513, 2013. @article{Hill:2013gi, title = {Dovetailing biology and chemistry: integrating the Gene Ontology with the ChEBI chemical ontology.}, author = {Hill, David P and Adams, Nico and Bada, Mike and Batchelor, Colin and Berardini, Tanya Z and Dietze, Heiko and Drabkin, Harold J and Ennis, Marcus and Foulger, Rebecca E and Harris, Midori A and Hastings, Janna and Kale, Namrata S and De Matos, Paula and Mungall, Christopher J and Owen, Gareth and Roncaglia, Paola and Steinbeck, Christoph and Turner, Steve and Lomax, Jane}, url = {http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=23895341&retmode=ref&cmd=prlinks}, doi = {10.1186/1471-2164-14-513}, year = {2013}, date = {2013-01-01}, journal = {BMC Genomics}, volume = {14}, number = {1}, pages = {513}, abstract = {BACKGROUND:The Gene Ontology (GO) facilitates the description of the action of gene products in a biological context. Many GO terms refer to chemical entities that participate in biological processes. To facilitate accurate and consistent systems-wide biological representation, it is necessary to integrate the chemical view of these entities with the biological view of GO functions and processes. We describe a collaborative effort between the GO and the Chemical Entities of Biological Interest (ChEBI) ontology developers to ensure that the representation of chemicals in the GO is both internally consistent and in alignment with the chemical expertise captured in ChEBI. RESULTS:We have examined and integrated the ChEBI structural hierarchy into the GO resource through computationally-assisted manual curation of both GO and ChEBI. Our work has resulted in the creation of computable definitions of GO terms that contain fully defined semantic relationships to corresponding chemical terms in ChEBI. CONCLUSIONS:The set of logical definitions using both the GO and ChEBI has already been used to automate aspects of GO development and has the potential to allow the integration of data across the domains of biology and chemistry. These logical definitions are available as an extended version of the ontology from http://purl.obolibrary.org/obo/go/extensions/go-plus.owl.}, keywords = {}, pubstate = {published}, tppubtype = {article} } BACKGROUND:The Gene Ontology (GO) facilitates the description of the action of gene products in a biological context. Many GO terms refer to chemical entities that participate in biological processes. To facilitate accurate and consistent systems-wide biological representation, it is necessary to integrate the chemical view of these entities with the biological view of GO functions and processes. We describe a collaborative effort between the GO and the Chemical Entities of Biological Interest (ChEBI) ontology developers to ensure that the representation of chemicals in the GO is both internally consistent and in alignment with the chemical expertise captured in ChEBI. RESULTS:We have examined and integrated the ChEBI structural hierarchy into the GO resource through computationally-assisted manual curation of both GO and ChEBI. Our work has resulted in the creation of computable definitions of GO terms that contain fully defined semantic relationships to corresponding chemical terms in ChEBI. CONCLUSIONS:The set of logical definitions using both the GO and ChEBI has already been used to automate aspects of GO development and has the potential to allow the integration of data across the domains of biology and chemistry. These logical definitions are available as an extended version of the ontology from http://purl.obolibrary.org/obo/go/extensions/go-plus.owl. |
Hastings, Janna ; De Matos, Paula ; Dekker, Adriano ; Ennis, Marcus ; Harsha, Bhavana ; Kale, Namrata ; Muthukrishnan, Venkatesh ; Owen, Gareth ; Turner, Steve ; Williams, Mark ; Steinbeck, Christoph The ChEBI reference database and ontology for biologically relevant chemistry: enhancements for 2013. Journal Article Nucleic Acids Research, 41 (Database issue), pp. D456–63, 2013. @article{Hastings:2012jx, title = {The ChEBI reference database and ontology for biologically relevant chemistry: enhancements for 2013.}, author = {Hastings, Janna and De Matos, Paula and Dekker, Adriano and Ennis, Marcus and Harsha, Bhavana and Kale, Namrata and Muthukrishnan, Venkatesh and Owen, Gareth and Turner, Steve and Williams, Mark and Steinbeck, Christoph}, url = {http://nar.oxfordjournals.org/lookup/doi/10.1093/nar/gks1146}, doi = {10.1093/nar/gks1146}, year = {2013}, date = {2013-01-01}, journal = {Nucleic Acids Research}, volume = {41}, number = {Database issue}, pages = {D456--63}, publisher = {Oxford University Press}, abstract = {ChEBI (http://www.ebi.ac.uk/chebi) is a database and ontology of chemical entities of biological interest. Over the past few years, ChEBI has continued to grow steadily in content, and has added several new features. In addition to incorporating all user-requested compounds, our annotation efforts have emphasized immunology, natural products and metabolites in many species. All database entries are now 'is_a' classified within the ontology, meaning that all of the chemicals are available to semantic reasoning tools that harness the classification hierarchy. We have completely aligned the ontology with the Open Biomedical Ontologies (OBO) Foundry-recommended upper level Basic Formal Ontology. Furthermore, we have aligned our chemical classification with the classification of chemical-involving processes in the Gene Ontology (GO), and as a result of this effort, the majority of chemical-involving processes in GO are now defined in terms of the ChEBI entities that participate in them. This effort necessitated incorporating many additional biologically relevant compounds. We have incorporated additional data types including reference citations, and the species and component for metabolites. Finally, our website and web services have had several enhancements, most notably the provision of a dynamic new interactive graph-based ontology visualization.}, keywords = {}, pubstate = {published}, tppubtype = {article} } ChEBI (http://www.ebi.ac.uk/chebi) is a database and ontology of chemical entities of biological interest. Over the past few years, ChEBI has continued to grow steadily in content, and has added several new features. In addition to incorporating all user-requested compounds, our annotation efforts have emphasized immunology, natural products and metabolites in many species. All database entries are now 'is_a' classified within the ontology, meaning that all of the chemicals are available to semantic reasoning tools that harness the classification hierarchy. We have completely aligned the ontology with the Open Biomedical Ontologies (OBO) Foundry-recommended upper level Basic Formal Ontology. Furthermore, we have aligned our chemical classification with the classification of chemical-involving processes in the Gene Ontology (GO), and as a result of this effort, the majority of chemical-involving processes in GO are now defined in terms of the ChEBI entities that participate in them. This effort necessitated incorporating many additional biologically relevant compounds. We have incorporated additional data types including reference citations, and the species and component for metabolites. Finally, our website and web services have had several enhancements, most notably the provision of a dynamic new interactive graph-based ontology visualization. |
Foster, Joseph M; Moreno, Pablo ; Fabregat, Antonio ; Hermjakob, Henning ; Steinbeck, Christoph ; Apweiler, Rolf ; Wakelam, Michael J O; Vizca{'i}no, Juan Antonio LipidHome: a database of theoretical lipids optimized for high throughput mass spectrometry lipidomics. Journal Article PLoS ONE, 8 (5), pp. e61951, 2013. @article{Foster:2013bc, title = {LipidHome: a database of theoretical lipids optimized for high throughput mass spectrometry lipidomics.}, author = {Foster, Joseph M and Moreno, Pablo and Fabregat, Antonio and Hermjakob, Henning and Steinbeck, Christoph and Apweiler, Rolf and Wakelam, Michael J O and Vizca{'i}no, Juan Antonio}, url = {http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=23667450&retmode=ref&cmd=prlinks}, doi = {10.1371/journal.pone.0061951}, year = {2013}, date = {2013-01-01}, journal = {PLoS ONE}, volume = {8}, number = {5}, pages = {e61951}, abstract = {Protein sequence databases are the pillar upon which modern proteomics is supported, representing a stable reference space of predicted and validated proteins. One example of such resources is UniProt, enriched with both expertly curated and automatic annotations. Taken largely for granted, similar mature resources such as UniProt are not available yet in some other "omics" fields, lipidomics being one of them. While having a seasoned community of wet lab scientists, lipidomics lies significantly behind proteomics in the adoption of data standards and other core bioinformatics concepts. This work aims to reduce the gap by developing an equivalent resource to UniProt called 'LipidHome', providing theoretically generated lipid molecules and useful metadata. Using the 'FASTLipid' Java library, a database was populated with theoretical lipids, generated from a set of community agreed upon chemical bounds. In parallel, a web application was developed to present the information and provide computational access via a web service. Designed specifically to accommodate high throughput mass spectrometry based approaches, lipids are organised into a hierarchy that reflects the variety in the structural resolution of lipid identifications. Additionally, cross-references to other lipid related resources and papers that cite specific lipids were used to annotate lipid records. The web application encompasses a browser for viewing lipid records and a 'tools' section where an MS1 search engine is currently implemented. LipidHome can be accessed at http://www.ebi.ac.uk/apweiler-srv/lipidhome.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Protein sequence databases are the pillar upon which modern proteomics is supported, representing a stable reference space of predicted and validated proteins. One example of such resources is UniProt, enriched with both expertly curated and automatic annotations. Taken largely for granted, similar mature resources such as UniProt are not available yet in some other "omics" fields, lipidomics being one of them. While having a seasoned community of wet lab scientists, lipidomics lies significantly behind proteomics in the adoption of data standards and other core bioinformatics concepts. This work aims to reduce the gap by developing an equivalent resource to UniProt called 'LipidHome', providing theoretically generated lipid molecules and useful metadata. Using the 'FASTLipid' Java library, a database was populated with theoretical lipids, generated from a set of community agreed upon chemical bounds. In parallel, a web application was developed to present the information and provide computational access via a web service. Designed specifically to accommodate high throughput mass spectrometry based approaches, lipids are organised into a hierarchy that reflects the variety in the structural resolution of lipid identifications. Additionally, cross-references to other lipid related resources and papers that cite specific lipids were used to annotate lipid records. The web application encompasses a browser for viewing lipid records and a 'tools' section where an MS1 search engine is currently implemented. LipidHome can be accessed at http://www.ebi.ac.uk/apweiler-srv/lipidhome. |
De Matos, Paula ; Cham, Jennifer A; Cao, Hong ; Alcantara, Rafael ; Rowland, Francis ; Lopez, Rodrigo ; Steinbeck, Christoph The Enzyme Portal: A case study in applying user-centred design methods in bioinformatics Journal Article BMC Bioinformatics, 14 (1), pp. 103, 2013. @article{DeMatos:2013jp, title = {The Enzyme Portal: A case study in applying user-centred design methods in bioinformatics}, author = {De Matos, Paula and Cham, Jennifer A and Cao, Hong and Alcantara, Rafael and Rowland, Francis and Lopez, Rodrigo and Steinbeck, Christoph}, url = {http://www.biomedcentral.com/1471-2105/14/103}, doi = {10.1186/1471-2105-14-103}, year = {2013}, date = {2013-01-01}, journal = {BMC Bioinformatics}, volume = {14}, number = {1}, pages = {103}, publisher = {BioMed Central Ltd}, abstract = {User-centred design (UCD) is a type of user interface design in which the needs and desires of users are taken into account at each stage of the design process for a service or product; often for software applications and websites. Its goal is to facilitate the design of software that is both useful and easy to use. To achieve this, you must characterise users' requirements, design suitable interactions to meet their needs, and test your designs using prototypes and real life scenarios.For bioinformatics, there is little practical information available regarding how to carry out UCD in practice. To address this we describe a complete, multi-stage UCD process used for creating a new bioinformatics resource for integrating enzyme information, called the Enzyme Portal (http://www.ebi.ac.uk/enzymeportal). This freely-available service mines and displays data about proteins with enzymatic activity from public repositories via a single search, and includes biochemical reactions, biological pathways, small molecule chemistry, disease information, 3D protein structures and relevant scientific literature.We employed several UCD techniques, including: persona development, interviews, 'canvas sort' card sorting, user workflows, usability testing and others. Our hope is that this case study will motivate the reader to apply similar UCD approaches to their own software design for bioinformatics. Indeed, we found the benefits included more effective decision-making for design ideas and technologies; enhanced team-working and communication; cost effectiveness; and ultimately a service that more closely meets the needs of our target audience.}, keywords = {}, pubstate = {published}, tppubtype = {article} } User-centred design (UCD) is a type of user interface design in which the needs and desires of users are taken into account at each stage of the design process for a service or product; often for software applications and websites. Its goal is to facilitate the design of software that is both useful and easy to use. To achieve this, you must characterise users' requirements, design suitable interactions to meet their needs, and test your designs using prototypes and real life scenarios.For bioinformatics, there is little practical information available regarding how to carry out UCD in practice. To address this we describe a complete, multi-stage UCD process used for creating a new bioinformatics resource for integrating enzyme information, called the Enzyme Portal (http://www.ebi.ac.uk/enzymeportal). This freely-available service mines and displays data about proteins with enzymatic activity from public repositories via a single search, and includes biochemical reactions, biological pathways, small molecule chemistry, disease information, 3D protein structures and relevant scientific literature.We employed several UCD techniques, including: persona development, interviews, 'canvas sort' card sorting, user workflows, usability testing and others. Our hope is that this case study will motivate the reader to apply similar UCD approaches to their own software design for bioinformatics. Indeed, we found the benefits included more effective decision-making for design ideas and technologies; enhanced team-working and communication; cost effectiveness; and ultimately a service that more closely meets the needs of our target audience. |
2012 |
Markley, John L; Akutsu, Hideo ; Asakura, Tetsuo ; Baldus, Marc ; Boelens, Rolf ; Bonvin, Alexandre ; Kaptein, Robert ; Bax, Ad ; Bezsonova, Irina ; Gryk, Michael R; Hoch, Jeffrey C; Korzhnev, Dmitry M; Maciejewski, Mark W; Case, Dave ; Chazin, Walter J; Cross, Timothy A; Dames, Sonja ; Kessler, Horst ; Lange, Oliver ; Madl, Tobias ; Reif, Bernd ; Sattler, Michael ; Eliezer, David ; Fersht, Alan ; Forman-Kay, Julie ; Kay, Lewis E; Fraser, James ; Gross, John ; Kortemme, Tanja ; Sali, Andrej ; Fujiwara, Toshimichi ; Gardner, Kevin ; Luo, Xuelian ; Rizo-Rey, Jose ; Rosen, Michael ; Gil, Roberto R; Ho, Chien ; Rule, Gordon ; Gronenborn, Angela M; Ishima, Rieko ; Klein-Seetharaman, Judith ; Tang, Pei ; van der Wel, Patrick ; Xu, Yan ; Grzesiek, Stephan ; Hiller, Sebastian ; Seelig, Joachim ; Laue, Ernest D; Mott, Helen ; Nietlispach, Daniel ; Barsukov, Igor ; Lian, Lu-Yun ; Middleton, David ; Blumenschein, Tharin ; Moore, Geoffrey ; Campbell, Iain ; Schnell, Jason ; Vakonakis, Ioannis John ; Watts, Anthony ; Conte, Maria R; Mason, James ; Pfuhl, Mark ; Sanderson, Mark R; Craven, Jeremy ; Williamson, Michael ; Dominguez, Cyril ; Roberts, Gordon ; G{ü}nther, Ulrich ; Overduin, Michael ; Werner, Joern ; Williamson, Philip ; Blindauer, Claudia ; Crump, Matthew ; Driscoll, Paul ; Frenkiel, Tom ; Golovanov, Alexander ; Matthews, Steve ; Parkinson, John ; Uhrin, Dusan ; Williams, Mark ; Neuhaus, David ; Oschkinat, Hartmut ; Ramos, Andres ; Shaw, David E; Steinbeck, Christoph ; Vendruscolo, Michele ; Vuister, Geerten W; Walters, Kylie J; Weinstein, Harel ; W{ü}thrich, Kurt ; Yokoyama, Shigeyuki In support of the BMRB. Journal Article Nature structural & molecular biology, 19 (9), pp. 854–860, 2012. @article{Markley:2012jf, title = {In support of the BMRB.}, author = {Markley, John L and Akutsu, Hideo and Asakura, Tetsuo and Baldus, Marc and Boelens, Rolf and Bonvin, Alexandre and Kaptein, Robert and Bax, Ad and Bezsonova, Irina and Gryk, Michael R and Hoch, Jeffrey C and Korzhnev, Dmitry M and Maciejewski, Mark W and Case, Dave and Chazin, Walter J and Cross, Timothy A and Dames, Sonja and Kessler, Horst and Lange, Oliver and Madl, Tobias and Reif, Bernd and Sattler, Michael and Eliezer, David and Fersht, Alan and Forman-Kay, Julie and Kay, Lewis E and Fraser, James and Gross, John and Kortemme, Tanja and Sali, Andrej and Fujiwara, Toshimichi and Gardner, Kevin and Luo, Xuelian and Rizo-Rey, Jose and Rosen, Michael and Gil, Roberto R and Ho, Chien and Rule, Gordon and Gronenborn, Angela M and Ishima, Rieko and Klein-Seetharaman, Judith and Tang, Pei and van der Wel, Patrick and Xu, Yan and Grzesiek, Stephan and Hiller, Sebastian and Seelig, Joachim and Laue, Ernest D and Mott, Helen and Nietlispach, Daniel and Barsukov, Igor and Lian, Lu-Yun and Middleton, David and Blumenschein, Tharin and Moore, Geoffrey and Campbell, Iain and Schnell, Jason and Vakonakis, Ioannis John and Watts, Anthony and Conte, Maria R and Mason, James and Pfuhl, Mark and Sanderson, Mark R and Craven, Jeremy and Williamson, Michael and Dominguez, Cyril and Roberts, Gordon and G{ü}nther, Ulrich and Overduin, Michael and Werner, Joern and Williamson, Philip and Blindauer, Claudia and Crump, Matthew and Driscoll, Paul and Frenkiel, Tom and Golovanov, Alexander and Matthews, Steve and Parkinson, John and Uhrin, Dusan and Williams, Mark and Neuhaus, David and Oschkinat, Hartmut and Ramos, Andres and Shaw, David E and Steinbeck, Christoph and Vendruscolo, Michele and Vuister, Geerten W and Walters, Kylie J and Weinstein, Harel and W{ü}thrich, Kurt and Yokoyama, Shigeyuki}, url = {http://www.nature.com/doifinder/10.1038/nsmb.2371}, doi = {10.1038/nsmb.2371}, year = {2012}, date = {2012-09-01}, journal = {Nature structural & molecular biology}, volume = {19}, number = {9}, pages = {854--860}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Steinbeck, Christoph ; Conesa, Pablo ; Haug, Kenneth ; Mahendraker, Tejasvi ; Williams, Mark ; Maguire, Eamonn ; Rocca-Serra, Philippe ; Sansone, Susanna-Assunta ; Salek, Reza M; Griffin, Julian L MetaboLights: towards a new COSMOS of metabolomics data management Journal Article Metabolomics, 8 (5), pp. 757–760, 2012. @article{steinbeck2012metabolights, title = {MetaboLights: towards a new COSMOS of metabolomics data management}, author = {Steinbeck, Christoph and Conesa, Pablo and Haug, Kenneth and Mahendraker, Tejasvi and Williams, Mark and Maguire, Eamonn and Rocca-Serra, Philippe and Sansone, Susanna-Assunta and Salek, Reza M and Griffin, Julian L}, url = {http://www.springerlink.com/index/10.1007/s11306-012-0462-0}, doi = {10.1007/s11306-012-0462-0}, year = {2012}, date = {2012-09-01}, journal = {Metabolomics}, volume = {8}, number = {5}, pages = {757--760}, abstract = {Exciting funding initiatives are emerging in Europe and the US for metabolomics data production, storage, dissemination and analysis. This is based on a rich ecosystem of resources around the world, which has been build during the past ten years, including but not limited to resources such as MassBank in Japan and the Human Metabolome Database in Canada. Now, the European Bioinformatics Institute has launched MetaboLights, a database for metabolomics experiments and the associated metadata (http://www.ebi.ac.uk/metabolights). It is the first comprehensive, cross-species, cross-platform metabolomics database maintained by one of the major open access data providers in molecular biology. In October, the European COSMOS consortium will start its work on Metabolomics data standardization, publication and dissemination workflows. The NIH in the US is establishing 6-8 metabolomics services cores as well as a national metabolomics repository. This communication reports about MetaboLights as a new resource for Metabolomics research, summarises the related developments and outlines how they may consolidate the knowledge management in this third large omics field next to proteomics and genomics.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Exciting funding initiatives are emerging in Europe and the US for metabolomics data production, storage, dissemination and analysis. This is based on a rich ecosystem of resources around the world, which has been build during the past ten years, including but not limited to resources such as MassBank in Japan and the Human Metabolome Database in Canada. Now, the European Bioinformatics Institute has launched MetaboLights, a database for metabolomics experiments and the associated metadata (http://www.ebi.ac.uk/metabolights). It is the first comprehensive, cross-species, cross-platform metabolomics database maintained by one of the major open access data providers in molecular biology. In October, the European COSMOS consortium will start its work on Metabolomics data standardization, publication and dissemination workflows. The NIH in the US is establishing 6-8 metabolomics services cores as well as a national metabolomics repository. This communication reports about MetaboLights as a new resource for Metabolomics research, summarises the related developments and outlines how they may consolidate the knowledge management in this third large omics field next to proteomics and genomics. |
Pavelin, Katrina ; Cham, Jennifer A; De Matos, Paula ; Brooksbank, Cath ; Cameron, Graham ; Steinbeck, Christoph Bioinformatics Meets User-Centred Design: A Perspective Journal Article PLoS Computational Biology, 8 (7), pp. e1002554, 2012. @article{pavelin2012bioinformatics, title = {Bioinformatics Meets User-Centred Design: A Perspective}, author = {Pavelin, Katrina and Cham, Jennifer A and De Matos, Paula and Brooksbank, Cath and Cameron, Graham and Steinbeck, Christoph}, url = {http://dx.plos.org/10.1371/journal.pcbi.1002554.pdf}, doi = {10.1371/journal.pcbi.1002554}, year = {2012}, date = {2012-07-01}, journal = {PLoS Computational Biology}, volume = {8}, number = {7}, pages = {e1002554}, publisher = {Public Library of Science}, abstract = {Designers have a saying that "the joy of an early release lasts but a short time. The bitterness of an unusable system lasts for years." It is indeed disappointing to discover that your data resources are not being used to their full potential. Not only have you invested your time, effort, and research grant on the project, but you may face costly redesigns if you want to improve the system later. This scenario would be less likely if the product was designed to provide users with exactly what they need, so that it is fit for purpose before its launch. We work at EMBL-European Bioinformatics Institute (EMBL-EBI), and we consult extensively with life science researchers to find out what they need from biological data resources. We have found that although users believe that the bioinformatics community is providing accurate and valuable data, they often find the interfaces to these resources tricky to use and navigate. We believe that if you can find out what your users want even before you create the first mock-up of a system, the final product will provide a better user experience. This would encourage more people to use the resource and they would have greater access to the data, which could ultimately lead to more scientific discoveries. In this paper, we explore the need for a user-centred design (UCD) strategy when designing bioinformatics resources and illustrate this with examples from our work at EMBL-EBI. Our aim is to introduce the reader to how selected UCD techniques may be successfully applied to software design for bioinformatics.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Designers have a saying that "the joy of an early release lasts but a short time. The bitterness of an unusable system lasts for years." It is indeed disappointing to discover that your data resources are not being used to their full potential. Not only have you invested your time, effort, and research grant on the project, but you may face costly redesigns if you want to improve the system later. This scenario would be less likely if the product was designed to provide users with exactly what they need, so that it is fit for purpose before its launch. We work at EMBL-European Bioinformatics Institute (EMBL-EBI), and we consult extensively with life science researchers to find out what they need from biological data resources. We have found that although users believe that the bioinformatics community is providing accurate and valuable data, they often find the interfaces to these resources tricky to use and navigate. We believe that if you can find out what your users want even before you create the first mock-up of a system, the final product will provide a better user experience. This would encourage more people to use the resource and they would have greater access to the data, which could ultimately lead to more scientific discoveries. In this paper, we explore the need for a user-centred design (UCD) strategy when designing bioinformatics resources and illustrate this with examples from our work at EMBL-EBI. Our aim is to introduce the reader to how selected UCD techniques may be successfully applied to software design for bioinformatics. |
Sansone, Susanna-Assunta ; Rocca-Serra, Philippe ; Field, Dawn ; Maguire, Eamonn ; Taylor, Chris ; Hofmann, Oliver ; Fang, Hong ; Neumann, Steffen ; Tong, Weida ; Amaral-Zettler, Linda ; Begley, Kimberly ; Booth, Tim ; Bougueleret, Lydie ; Burns, Gully ; Chapman, Brad ; Clark, Tim ; Coleman, Lee-Ann ; Copeland, Jay ; Das, Sudeshna ; de Daruvar, Antoine ; De Matos, Paula ; Dix, Ian ; Edmunds, Scott ; Evelo, Chris T; Forster, Mark J; Gaudet, Pascale ; Gilbert, Jack ; Goble, Carole ; Griffin, Julian L; Jacob, Daniel ; Kleinjans, Jos ; Harland, Lee ; Haug, Kenneth ; Hermjakob, Henning ; Sui, Shannan Ho J; Laederach, Alain ; Liang, Shaoguang ; Marshall, Stephen ; McGrath, Annette ; Merrill, Emily ; Reilly, Dorothy ; Roux, Magali ; Shamu, Caroline E; Shang, Catherine A; Steinbeck, Christoph ; Trefethen, Anne ; Williams-Jones, Bryn ; Wolstencroft, Katherine ; Xenarios, Ioannis ; Hide, Winston Toward interoperable bioscience data Journal Article Nat Genet, 44 (2), pp. 121–126, 2012. @article{sansone2012toward, title = {Toward interoperable bioscience data}, author = {Sansone, Susanna-Assunta and Rocca-Serra, Philippe and Field, Dawn and Maguire, Eamonn and Taylor, Chris and Hofmann, Oliver and Fang, Hong and Neumann, Steffen and Tong, Weida and Amaral-Zettler, Linda and Begley, Kimberly and Booth, Tim and Bougueleret, Lydie and Burns, Gully and Chapman, Brad and Clark, Tim and Coleman, Lee-Ann and Copeland, Jay and Das, Sudeshna and de Daruvar, Antoine and De Matos, Paula and Dix, Ian and Edmunds, Scott and Evelo, Chris T and Forster, Mark J and Gaudet, Pascale and Gilbert, Jack and Goble, Carole and Griffin, Julian L and Jacob, Daniel and Kleinjans, Jos and Harland, Lee and Haug, Kenneth and Hermjakob, Henning and Sui, Shannan J Ho and Laederach, Alain and Liang, Shaoguang and Marshall, Stephen and McGrath, Annette and Merrill, Emily and Reilly, Dorothy and Roux, Magali and Shamu, Caroline E and Shang, Catherine A and Steinbeck, Christoph and Trefethen, Anne and Williams-Jones, Bryn and Wolstencroft, Katherine and Xenarios, Ioannis and Hide, Winston}, url = {http://www.nature.com/doifinder/10.1038/ng.1054}, doi = {10.1038/ng.1054}, year = {2012}, date = {2012-01-01}, journal = {Nat Genet}, volume = {44}, number = {2}, pages = {121--126}, publisher = {Nature Publishing Group}, abstract = {To make full use of research data, the bioscience community needs to adopt technologies and reward mechanisms that support interoperability and promote the growth of an open 'data commoning' culture. Here we describe the prerequisites for data commoning and present an established and growing ecosystem of solutions using the shared 'Investigation-Study-Assay' framework to support that vision.}, keywords = {}, pubstate = {published}, tppubtype = {article} } To make full use of research data, the bioscience community needs to adopt technologies and reward mechanisms that support interoperability and promote the growth of an open 'data commoning' culture. Here we describe the prerequisites for data commoning and present an established and growing ecosystem of solutions using the shared 'Investigation-Study-Assay' framework to support that vision. |
Chepelev, Leonid L; Hastings, Janna ; Ennis, Marcus ; Steinbeck, Christoph ; Dumontier, Michel Self-organizing ontology of biochemically relevant small molecules. Journal Article BMC Bioinformatics, 13 (1), pp. 3, 2012. @article{chepelev2012self, title = {Self-organizing ontology of biochemically relevant small molecules.}, author = {Chepelev, Leonid L and Hastings, Janna and Ennis, Marcus and Steinbeck, Christoph and Dumontier, Michel}, url = {http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-13-3}, doi = {10.1186/1471-2105-13-3}, year = {2012}, date = {2012-01-01}, journal = {BMC Bioinformatics}, volume = {13}, number = {1}, pages = {3}, abstract = {BACKGROUND:The advent of high-throughput experimentation in biochemistry has led to the generation of vast amounts of chemical data, necessitating the development of novel analysis, characterization, and cataloguing techniques and tools. Recently, a movement to publically release such data has advanced biochemical structure-activity relationship research, while providing new challenges, the biggest being the curation, annotation, and classification of this information to facilitate useful biochemical pattern analysis. Unfortunately, the human resources currently employed by the organizations supporting these efforts (e.g. ChEBI) are expanding linearly, while new useful scientific information is being released in a seemingly exponential fashion. Compounding this, currently existing chemical classification and annotation systems are not amenable to automated classification, formal and transparent chemical class definition axiomatization, facile class redefinition, or novel class integration, thus further limiting chemical ontology growth by necessitating human involvement in curation. Clearly, there is a need for the automation of this process, especially for novel chemical entities of biological interest. RESULTS:To address this, we present a formal framework based on Semantic Web technologies for the automatic design of chemical ontology which can be used for automated classification of novel entities. We demonstrate the automatic self-assembly of a structure-based chemical ontology based on 60 MeSH and 40 ChEBI chemical classes. This ontology is then used to classify 200 compounds with an accuracy of 92.7%. We extend these structure-based classes with molecular feature information and demonstrate the utility of our framework for classification of functionally relevant chemicals. Finally, we discuss an iterative approach that we envision for future biochemical ontology development. CONCLUSIONS:We conclude that the proposed methodology can ease the burden of chemical data annotators and dramatically increase their productivity. We anticipate that the use of formal logic in our proposed framework will make chemical classification criteria more transparent to humans and machines alike and will thus facilitate predictive and integrative bioactivity model development.}, keywords = {}, pubstate = {published}, tppubtype = {article} } BACKGROUND:The advent of high-throughput experimentation in biochemistry has led to the generation of vast amounts of chemical data, necessitating the development of novel analysis, characterization, and cataloguing techniques and tools. Recently, a movement to publically release such data has advanced biochemical structure-activity relationship research, while providing new challenges, the biggest being the curation, annotation, and classification of this information to facilitate useful biochemical pattern analysis. Unfortunately, the human resources currently employed by the organizations supporting these efforts (e.g. ChEBI) are expanding linearly, while new useful scientific information is being released in a seemingly exponential fashion. Compounding this, currently existing chemical classification and annotation systems are not amenable to automated classification, formal and transparent chemical class definition axiomatization, facile class redefinition, or novel class integration, thus further limiting chemical ontology growth by necessitating human involvement in curation. Clearly, there is a need for the automation of this process, especially for novel chemical entities of biological interest. RESULTS:To address this, we present a formal framework based on Semantic Web technologies for the automatic design of chemical ontology which can be used for automated classification of novel entities. We demonstrate the automatic self-assembly of a structure-based chemical ontology based on 60 MeSH and 40 ChEBI chemical classes. This ontology is then used to classify 200 compounds with an accuracy of 92.7%. We extend these structure-based classes with molecular feature information and demonstrate the utility of our framework for classification of functionally relevant chemicals. Finally, we discuss an iterative approach that we envision for future biochemical ontology development. CONCLUSIONS:We conclude that the proposed methodology can ease the burden of chemical data annotators and dramatically increase their productivity. We anticipate that the use of formal logic in our proposed framework will make chemical classification criteria more transparent to humans and machines alike and will thus facilitate predictive and integrative bioactivity model development. |
Hastings, Janna ; Magka, Despoina ; Batchelor, Colin ; Duan, Lian ; Stevens, Robert ; Ennis, Marcus ; Steinbeck, Christoph Structure-based classification and ontology in chemistry. Journal Article Journal of cheminformatics, 4 (1), pp. 8, 2012. @article{hastings2012structure, title = {Structure-based classification and ontology in chemistry.}, author = {Hastings, Janna and Magka, Despoina and Batchelor, Colin and Duan, Lian and Stevens, Robert and Ennis, Marcus and Steinbeck, Christoph}, url = {http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=22480202&retmode=ref&cmd=prlinks}, doi = {10.1186/1758-2946-4-8}, year = {2012}, date = {2012-01-01}, journal = {Journal of cheminformatics}, volume = {4}, number = {1}, pages = {8}, abstract = {BACKGROUND:Recent years have seen an explosion in the availability of data in the chemistry domain. With this information explosion, however, retrieving relevant results from the available information, and organising those results, become even harder problems. Computational processing is essential to filter and organise the available resources so as to better facilitate the work of scientists. Ontologies encode expert domain knowledge in a hierarchically organised machine-processable format. One such ontology for the chemical domain is ChEBI. ChEBI provides a classification of chemicals based on their structural features and a role or activity-based classification. An example of a structure-based class is 'pentacyclic compound' (compounds containing five-ring structures), while an example of a role-based class is 'analgesic', since many different chemicals can act as analgesics without sharing structural features. Structure-based classification in chemistry exploits elegant regularities and symmetries in the underlying chemical domain. As yet, there has been neither a systematic analysis of the types of structural classification in use in chemistry nor a comparison to the capabilities of available technologies. RESULTS:We analyze the different categories of structural classes in chemistry, presenting a list of patterns for features found in class definitions. We compare these patterns of class definition to tools which allow for automation of hierarchy construction within cheminformatics and within logic-based ontology technology, going into detail in the latter case with respect to the expressive capabilities of the Web Ontology Language and recent extensions for modelling structured objects. Finally we discuss the relationships and interactions between cheminformatics approaches and logic-based approaches. CONCLUSION:Systems that perform intelligent reasoning tasks on chemistry data require a diverse set of underlying computational utilities including algorithmic, statistical and logic-based tools. For the task of automatic structure-based classification of chemical entities, essential to managing the vast swathes of chemical data being brought online, systems which are capable of hybrid reasoning combining several different approaches are crucial. We provide a thorough review of the available tools and methodologies, and identify areas of open research.}, keywords = {}, pubstate = {published}, tppubtype = {article} } BACKGROUND:Recent years have seen an explosion in the availability of data in the chemistry domain. With this information explosion, however, retrieving relevant results from the available information, and organising those results, become even harder problems. Computational processing is essential to filter and organise the available resources so as to better facilitate the work of scientists. Ontologies encode expert domain knowledge in a hierarchically organised machine-processable format. One such ontology for the chemical domain is ChEBI. ChEBI provides a classification of chemicals based on their structural features and a role or activity-based classification. An example of a structure-based class is 'pentacyclic compound' (compounds containing five-ring structures), while an example of a role-based class is 'analgesic', since many different chemicals can act as analgesics without sharing structural features. Structure-based classification in chemistry exploits elegant regularities and symmetries in the underlying chemical domain. As yet, there has been neither a systematic analysis of the types of structural classification in use in chemistry nor a comparison to the capabilities of available technologies. RESULTS:We analyze the different categories of structural classes in chemistry, presenting a list of patterns for features found in class definitions. We compare these patterns of class definition to tools which allow for automation of hierarchy construction within cheminformatics and within logic-based ontology technology, going into detail in the latter case with respect to the expressive capabilities of the Web Ontology Language and recent extensions for modelling structured objects. Finally we discuss the relationships and interactions between cheminformatics approaches and logic-based approaches. CONCLUSION:Systems that perform intelligent reasoning tasks on chemistry data require a diverse set of underlying computational utilities including algorithmic, statistical and logic-based tools. For the task of automatic structure-based classification of chemical entities, essential to managing the vast swathes of chemical data being brought online, systems which are capable of hybrid reasoning combining several different approaches are crucial. We provide a thorough review of the available tools and methodologies, and identify areas of open research. |
Alcantara, Rafael ; Axelsen, Kristian B; Morgat, Anne ; Belda, Eugeni ; Coudert, Elisabeth ; Bridge, Alan ; Cao, Hong ; De Matos, Paula ; Ennis, Marcus ; Turner, Steve ; Owen, Gareth ; Bougueleret, Lydie ; Xenarios, Ioannis ; Steinbeck, Christoph Rhea--a manually curated resource of biochemical reactions. Journal Article Nucleic Acids Research, 40 (Database issue), pp. D754–60, 2012. @article{alcantara2012rhea, title = {Rhea--a manually curated resource of biochemical reactions.}, author = {Alcantara, Rafael and Axelsen, Kristian B and Morgat, Anne and Belda, Eugeni and Coudert, Elisabeth and Bridge, Alan and Cao, Hong and De Matos, Paula and Ennis, Marcus and Turner, Steve and Owen, Gareth and Bougueleret, Lydie and Xenarios, Ioannis and Steinbeck, Christoph}, url = {http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=22135291&retmode=ref&cmd=prlinks}, doi = {10.1093/nar/gkr1126}, year = {2012}, date = {2012-01-01}, journal = {Nucleic Acids Research}, volume = {40}, number = {Database issue}, pages = {D754--60}, abstract = {Rhea (http://www.ebi.ac.uk/rhea) is a comprehensive resource of expert-curated biochemical reactions. Rhea provides a non-redundant set of chemical transformations for use in a broad spectrum of applications, including metabolic network reconstruction and pathway inference. Rhea includes enzyme-catalyzed reactions (covering the IUBMB Enzyme Nomenclature list), transport reactions and spontaneously occurring reactions. Rhea reactions are described using chemical species from the Chemical Entities of Biological Interest ontology (ChEBI) and are stoichiometrically balanced for mass and charge. They are extensively manually curated with links to source literature and other public resources on metabolism including enzyme and pathway databases. This cross-referencing facilitates the mapping and reconciliation of common reactions and compounds between distinct resources, which is a common first step in the reconstruction of genome scale metabolic networks and models.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Rhea (http://www.ebi.ac.uk/rhea) is a comprehensive resource of expert-curated biochemical reactions. Rhea provides a non-redundant set of chemical transformations for use in a broad spectrum of applications, including metabolic network reconstruction and pathway inference. Rhea includes enzyme-catalyzed reactions (covering the IUBMB Enzyme Nomenclature list), transport reactions and spontaneously occurring reactions. Rhea reactions are described using chemical species from the Chemical Entities of Biological Interest ontology (ChEBI) and are stoichiometrically balanced for mass and charge. They are extensively manually curated with links to source literature and other public resources on metabolism including enzyme and pathway databases. This cross-referencing facilitates the mapping and reconciliation of common reactions and compounds between distinct resources, which is a common first step in the reconstruction of genome scale metabolic networks and models. |
Jayaseelan, Kalai Vanii ; Moreno, Pablo ; Truszkowski, Andreas ; Ertl, Peter ; Steinbeck, Christoph Natural product-likeness score revisited: an open-source, open-data implementation. Journal Article BMC Bioinformatics, 13 (1), pp. 106, 2012. @article{Jayaseelan:2012ig, title = {Natural product-likeness score revisited: an open-source, open-data implementation.}, author = {Jayaseelan, Kalai Vanii and Moreno, Pablo and Truszkowski, Andreas and Ertl, Peter and Steinbeck, Christoph}, url = {http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=22607271&retmode=ref&cmd=prlinks}, doi = {10.1186/1471-2105-13-106}, year = {2012}, date = {2012-01-01}, journal = {BMC Bioinformatics}, volume = {13}, number = {1}, pages = {106}, abstract = {BACKGROUND:Natural product-likeness of a molecule, i.e. similarity of this molecule to the structure space covered by natural products, is a useful criterion in screening compound libraries and in designing new lead compounds. A closed source implementation of a natural product-likeness score, that finds its application in virtual screening, library design and compound selection, has been previously reported by one of us. In this note, we report an open-source and open-data re-implementation of this scoring system, illustrate its efficiency in ranking small molecules for natural product likeness and discuss its potential applications. RESULTS:The Natural-Product-Likeness scoring system is implemented as Taverna 2.2 workflows, and is available under Creative Commons Attribution-Share Alike 3.0 Unported License at http://www.myexperiment.org/packs/183.html. It is also available for download as executable standalone java package from http://sourceforge.net/projects/np-likeness/under Academic Free License. CONCLUSIONS:Our open-source, open-data Natural-Product-Likeness scoring system can be used as a filter for metabolites in Computer Assisted Structure Elucidation or to select natural-product-like molecules from molecular libraries for the use as leads in drug discovery.}, keywords = {}, pubstate = {published}, tppubtype = {article} } BACKGROUND:Natural product-likeness of a molecule, i.e. similarity of this molecule to the structure space covered by natural products, is a useful criterion in screening compound libraries and in designing new lead compounds. A closed source implementation of a natural product-likeness score, that finds its application in virtual screening, library design and compound selection, has been previously reported by one of us. In this note, we report an open-source and open-data re-implementation of this scoring system, illustrate its efficiency in ranking small molecules for natural product likeness and discuss its potential applications. RESULTS:The Natural-Product-Likeness scoring system is implemented as Taverna 2.2 workflows, and is available under Creative Commons Attribution-Share Alike 3.0 Unported License at http://www.myexperiment.org/packs/183.html. It is also available for download as executable standalone java package from http://sourceforge.net/projects/np-likeness/under Academic Free License. CONCLUSIONS:Our open-source, open-data Natural-Product-Likeness scoring system can be used as a filter for metabolites in Computer Assisted Structure Elucidation or to select natural-product-like molecules from molecular libraries for the use as leads in drug discovery. |
Hastings, Janna ; Josephs, Zara ; Steinbeck, Christoph Accessing and using chemical property databases. Journal Article Methods in molecular biology (Clifton, N.J.), 929 (Chapter 9), pp. 193–219, 2012, ISBN: 978-1-62703-049-6. @article{Hastings:2012ft, title = {Accessing and using chemical property databases.}, author = {Hastings, Janna and Josephs, Zara and Steinbeck, Christoph}, url = {http://link.springer.com/10.1007/978-1-62703-050-2_9}, doi = {10.1007/978-1-62703-050-2_9}, isbn = {978-1-62703-049-6}, year = {2012}, date = {2012-01-01}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {929}, number = {Chapter 9}, pages = {193--219}, publisher = {Humana Press}, address = {Totowa, NJ}, abstract = {Chemical compounds participate in all the processes of life. Understanding the complex interactions of small molecules such as metabolites and drugs and the biological macromolecules that consume and produce them is key to gaining a wider understanding in a systemic context. Chemical property databases collect information on the biological effects and physicochemical properties of chemical entities. Accessing and using such databases is key to understanding the chemistry of toxic molecules. In this chapter, we present methods to search, understand, download, and manipulate the wealth of information available in public chemical property databases, with particular focus on the database of Chemical Entities of Biological Interest (ChEBI).}, keywords = {}, pubstate = {published}, tppubtype = {article} } Chemical compounds participate in all the processes of life. Understanding the complex interactions of small molecules such as metabolites and drugs and the biological macromolecules that consume and produce them is key to gaining a wider understanding in a systemic context. Chemical property databases collect information on the biological effects and physicochemical properties of chemical entities. Accessing and using such databases is key to understanding the chemistry of toxic molecules. In this chapter, we present methods to search, understand, download, and manipulate the wealth of information available in public chemical property databases, with particular focus on the database of Chemical Entities of Biological Interest (ChEBI). |
De Matos, Paula ; Adams, Nico ; Hastings, Janna ; Moreno, Pablo ; Steinbeck, Christoph A database for chemical proteomics: ChEBI. Journal Article Methods in molecular biology (Clifton, N.J.), 803 (Chapter 19), pp. 273–296, 2012, ISBN: 978-1-61779-363-9. @article{de2012database, title = {A database for chemical proteomics: ChEBI.}, author = {De Matos, Paula and Adams, Nico and Hastings, Janna and Moreno, Pablo and Steinbeck, Christoph}, url = {http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=22065232&retmode=ref&cmd=prlinks}, doi = {10.1007/978-1-61779-364-6_19}, isbn = {978-1-61779-363-9}, year = {2012}, date = {2012-01-01}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {803}, number = {Chapter 19}, pages = {273--296}, publisher = {Humana Press}, address = {Totowa, NJ}, abstract = {Chemical proteomics is concerned with the identification of protein targets interacting with small molecules. Hence, the availability of a high quality and free resource storing small molecules is essential for the future development of the field. The Chemical Entities of Biological Interest (ChEBI) database is one such database. The scope of ChEBI includes any constitutionally or isotopically distinct atom, molecule, ion, ion pair, radical, radical ion, complex, conformer, etc., identifiable as a separately distinguishable entity. These entities in question are either products of nature or synthetic products used to intervene in the processes of living organisms. In addition, ChEBI contains a chemical ontology which relates the small molecules with each other thereby making it easier for users to discover data. The ontology also describes the biological roles that the small molecules are active in. The ChEBI database also provides a central reference point in which to access a variety of bioinformatics data points such as pathways and their biochemical reactions; expression data; protein sequence and structures.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Chemical proteomics is concerned with the identification of protein targets interacting with small molecules. Hence, the availability of a high quality and free resource storing small molecules is essential for the future development of the field. The Chemical Entities of Biological Interest (ChEBI) database is one such database. The scope of ChEBI includes any constitutionally or isotopically distinct atom, molecule, ion, ion pair, radical, radical ion, complex, conformer, etc., identifiable as a separately distinguishable entity. These entities in question are either products of nature or synthetic products used to intervene in the processes of living organisms. In addition, ChEBI contains a chemical ontology which relates the small molecules with each other thereby making it easier for users to discover data. The ontology also describes the biological roles that the small molecules are active in. The ChEBI database also provides a central reference point in which to access a variety of bioinformatics data points such as pathways and their biochemical reactions; expression data; protein sequence and structures. |
2011 |
Orchard, Sandra ; Al-Lazikani, Bissan ; Bryant, Steve ; Clark, Dominic ; Calder, Elizabeth ; Dix, Ian ; Engkvist, Ola ; Forster, Mark ; Gaulton, Anna ; Gilson, Michael ; Glen, Robert ; Grigorov, Martin ; Hammond-Kosack, Kim ; Harland, Lee ; Hopkins, Andrew ; Larminie, Christopher ; Lynch, Nick ; Mann, Romeena K; Murray-Rust, Peter ; Lo Piparo, Elena ; Southan, Christopher ; Steinbeck, Christoph ; Wishart, David ; Hermjakob, Henning ; Overington, John ; Thornton, Janet Minimum information about a bioactive entity (MIABE) Journal Article Nature Reviews Drug Discovery, 10 (9), pp. 661–669, 2011. @article{orchard2011minimum, title = {Minimum information about a bioactive entity (MIABE)}, author = {Orchard, Sandra and Al-Lazikani, Bissan and Bryant, Steve and Clark, Dominic and Calder, Elizabeth and Dix, Ian and Engkvist, Ola and Forster, Mark and Gaulton, Anna and Gilson, Michael and Glen, Robert and Grigorov, Martin and Hammond-Kosack, Kim and Harland, Lee and Hopkins, Andrew and Larminie, Christopher and Lynch, Nick and Mann, Romeena K and Murray-Rust, Peter and Lo Piparo, Elena and Southan, Christopher and Steinbeck, Christoph and Wishart, David and Hermjakob, Henning and Overington, John and Thornton, Janet}, url = {http://www.nature.com/doifinder/10.1038/nrd3503}, doi = {10.1038/nrd3503}, year = {2011}, date = {2011-01-01}, journal = {Nature Reviews Drug Discovery}, volume = {10}, number = {9}, pages = {661--669}, publisher = {Nature Publishing Group}, abstract = {Bioactive molecules such as drugs, pesticides and food additives are produced in large numbers by many commercial and academic groups around the world. Enormous quantities of data are generated on the biological properties and quality of these molecules. Access to such data - both on licensed and commercially available compounds, and also on those that fail during development - is crucial for understanding how improved molecules could be developed. For example, computational analysis of aggregated data on molecules that are investigated in drug discovery programmes has led to a greater understanding of the properties of successful drugs. However, the information required to perform these analyses is rarely published, and when it is made available it is often missing crucial data or is in a format that is inappropriate for efficient data-mining. Here, we propose a solution: the definition of reporting guidelines for bioactive entities - the Minimum Information About a Bioactive Entity (MIABE) - which has been developed by representatives of pharmaceutical companies, data resource providers and academic groups.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Bioactive molecules such as drugs, pesticides and food additives are produced in large numbers by many commercial and academic groups around the world. Enormous quantities of data are generated on the biological properties and quality of these molecules. Access to such data - both on licensed and commercially available compounds, and also on those that fail during development - is crucial for understanding how improved molecules could be developed. For example, computational analysis of aggregated data on molecules that are investigated in drug discovery programmes has led to a greater understanding of the properties of successful drugs. However, the information required to perform these analyses is rarely published, and when it is made available it is often missing crucial data or is in a format that is inappropriate for efficient data-mining. Here, we propose a solution: the definition of reporting guidelines for bioactive entities - the Minimum Information About a Bioactive Entity (MIABE) - which has been developed by representatives of pharmaceutical companies, data resource providers and academic groups. |
Hastings, J; Batchelor, C R; Steinbeck, C; Schulz, S IOS Press Ebooks - Modularization Requirements in Bio-Ontologies: A Case Study of ChEbi Journal Article WoMO, 2011. @article{Hastings:2011hx, title = {IOS Press Ebooks - Modularization Requirements in Bio-Ontologies: A Case Study of ChEbi}, author = {Hastings, J and Batchelor, C R and Steinbeck, C and Schulz, S}, url = {http://ebooks.iospress.nl/publication/6516}, doi = {10.3233/978-1-60750-799-4-63}, year = {2011}, date = {2011-01-01}, journal = {WoMO}, abstract = {Abstract Bio-ontologies such as the Gene Ontology and ChEBI are characterized by large sizes and relatively low expressivity. However, ongoing efforts aim to increase the formalisation of these ontologies by adding full definitions (equivalent classes). This increase in complexity results in a decrease of performance for standard reasoning tasks. In this paper, we explore the contribution which modularization can play in the evolution of ...}, keywords = {}, pubstate = {published}, tppubtype = {article} } Abstract Bio-ontologies such as the Gene Ontology and ChEBI are characterized by large sizes and relatively low expressivity. However, ongoing efforts aim to increase the formalisation of these ontologies by adding full definitions (equivalent classes). This increase in complexity results in a decrease of performance for standard reasoning tasks. In this paper, we explore the contribution which modularization can play in the evolution of ... |
Griffin, Julian L; Atherton, Helen J; Steinbeck, Chris ; Salek, Reza M A Metadata description of the data in "A metabolomic comparison of urinary changes in type 2 diabetes in mouse, rat, and human.". Journal Article BMC Research Notes, 4 (1), pp. 272, 2011. @article{Griffin:2011es, title = {A Metadata description of the data in "A metabolomic comparison of urinary changes in type 2 diabetes in mouse, rat, and human.".}, author = {Griffin, Julian L and Atherton, Helen J and Steinbeck, Chris and Salek, Reza M}, url = {http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=21801423&retmode=ref&cmd=prlinks}, doi = {10.1186/1756-0500-4-272}, year = {2011}, date = {2011-01-01}, journal = {BMC Research Notes}, volume = {4}, number = {1}, pages = {272}, abstract = {BACKGROUND:Metabolomics is a rapidly developing functional genomic tool that has a wide range of applications in diverse fields in biology and medicine. However, unlike transcriptomics and proteomics there is currently no central repository for the depositing of data despite efforts by the Metabolomics Standard Initiative (MSI) to develop a standardised description of a metabolomic experiment. FINDINGS:In this manuscript we describe how the MSI description has been applied to a published dataset involving the identification of cross-species metabolic biomarkers associated with type II diabetes. The study describes sample collection of urine from mice, rats and human volunteers, and the subsequent acquisition of data by high resolution 1H NMR spectroscopy. The metadata is described to demonstrate how the MSI descriptions could be applied in a manuscript and the spectra have also been made available for the mouse and rat studies to allow others to process the data. CONCLUSIONS:The intention of this manuscript is to stimulate discussion as to whether the MSI description is sufficient to describe the metadata associated with metabolomic experiments and encourage others to make their data available to other researchers.}, keywords = {}, pubstate = {published}, tppubtype = {article} } BACKGROUND:Metabolomics is a rapidly developing functional genomic tool that has a wide range of applications in diverse fields in biology and medicine. However, unlike transcriptomics and proteomics there is currently no central repository for the depositing of data despite efforts by the Metabolomics Standard Initiative (MSI) to develop a standardised description of a metabolomic experiment. FINDINGS:In this manuscript we describe how the MSI description has been applied to a published dataset involving the identification of cross-species metabolic biomarkers associated with type II diabetes. The study describes sample collection of urine from mice, rats and human volunteers, and the subsequent acquisition of data by high resolution 1H NMR spectroscopy. The metadata is described to demonstrate how the MSI descriptions could be applied in a manuscript and the spectra have also been made available for the mouse and rat studies to allow others to process the data. CONCLUSIONS:The intention of this manuscript is to stimulate discussion as to whether the MSI description is sufficient to describe the metadata associated with metabolomic experiments and encourage others to make their data available to other researchers. |
Hastings, Janna ; Batchelor, Colin ; Neuhaus, Fabian ; Steinbeck, Christoph Whattextquoterights in an textquoteleftis abouttextquoterightlink? Chemical diagrams and the IAO Journal Article Proceedings of the International Conference on Biomedical Ontology (ICBO2011), Buffalo, USA, 2011. @article{hastings2011s, title = {Whattextquoterights in an textquoteleftis abouttextquoterightlink? Chemical diagrams and the IAO}, author = {Hastings, Janna and Batchelor, Colin and Neuhaus, Fabian and Steinbeck, Christoph}, url = {http://www.google.de/search?client=safari&rls=10_7_4&q=Whats+in+an+is+aboutlink+Chemical+diagrams+and+the+IAO&ie=UTF-8&oe=UTF-8&redir_esc=&ei=Mvm5ULHuCKyL4gSDi4CoCg}, year = {2011}, date = {2011-01-01}, journal = {Proceedings of the International Conference on Biomedical Ontology (ICBO2011), Buffalo, USA}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
O'Boyle, Noel M; Guha, Rajarshi ; Willighagen, Egon L; Adams, Samuel E; Alvarsson, Jonathan ; Bradley, Jean-Claude ; Filippov, Igor V; Hanson, Robert M; Hanwell, Marcus D; Hutchison, Geoffrey R; James, Craig A; Jeliazkova, Nina ; Lang, Andrew Sid ; Langner, Karol M; Lonie, David C; Lowe, Daniel M; Pansanel, Jerome ; Pavlov, Dmitry ; Spjuth, Ola ; Steinbeck, Christoph ; Tenderholt, Adam L; Theisen, Kevin J; Murray-Rust, Peter Open Data, Open Source and Open Standards in chemistry: The Blue Obelisk five years on. Journal Article Journal of cheminformatics, 3 (1), pp. 37, 2011. @article{o2011open, title = {Open Data, Open Source and Open Standards in chemistry: The Blue Obelisk five years on.}, author = {O'Boyle, Noel M and Guha, Rajarshi and Willighagen, Egon L and Adams, Samuel E and Alvarsson, Jonathan and Bradley, Jean-Claude and Filippov, Igor V and Hanson, Robert M and Hanwell, Marcus D and Hutchison, Geoffrey R and James, Craig A and Jeliazkova, Nina and Lang, Andrew Sid and Langner, Karol M and Lonie, David C and Lowe, Daniel M and Pansanel, Jerome and Pavlov, Dmitry and Spjuth, Ola and Steinbeck, Christoph and Tenderholt, Adam L and Theisen, Kevin J and Murray-Rust, Peter}, url = {http://www.jcheminf.com/content/3/1/37}, doi = {10.1186/1758-2946-3-37}, year = {2011}, date = {2011-01-01}, journal = {Journal of cheminformatics}, volume = {3}, number = {1}, pages = {37}, abstract = {BACKGROUND:The Blue Obelisk movement was established in 2005 as a response to the lack of Open Data, Open Standards and Open Source (ODOSOS) in chemistry. It aims to make it easier to carry out chemistry research by promoting interoperability between chemistry software, encouraging cooperation between Open Source developers, and developing community resources and Open Standards. RESULTS:This contribution looks back on the work carried out by the Blue Obelisk in the past 5 years and surveys progress and remaining challenges in the areas of Open Data, Open Standards, and Open Source in chemistry. CONCLUSIONS:We show that the Blue Obelisk has been very successful in bringing together researchers and developers with common interests in ODOSOS, leading to development of many useful resources freely available to the chemistry community.}, keywords = {}, pubstate = {published}, tppubtype = {article} } BACKGROUND:The Blue Obelisk movement was established in 2005 as a response to the lack of Open Data, Open Standards and Open Source (ODOSOS) in chemistry. It aims to make it easier to carry out chemistry research by promoting interoperability between chemistry software, encouraging cooperation between Open Source developers, and developing community resources and Open Standards. RESULTS:This contribution looks back on the work carried out by the Blue Obelisk in the past 5 years and surveys progress and remaining challenges in the areas of Open Data, Open Standards, and Open Source in chemistry. CONCLUSIONS:We show that the Blue Obelisk has been very successful in bringing together researchers and developers with common interests in ODOSOS, leading to development of many useful resources freely available to the chemistry community. |
Truszkowski, Andreas ; Jayaseelan, Kalai Vanii ; Neumann, Stefan ; Willighagen, Egon L; Zielesny, Achim ; Steinbeck, Christoph New developments on the cheminformatics open workflow environment CDK-Taverna. Journal Article Journal of cheminformatics, 3 (1), pp. 54, 2011. @article{truszkowski2011new, title = {New developments on the cheminformatics open workflow environment CDK-Taverna.}, author = {Truszkowski, Andreas and Jayaseelan, Kalai Vanii and Neumann, Stefan and Willighagen, Egon L and Zielesny, Achim and Steinbeck, Christoph}, url = {http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=22166170&retmode=ref&cmd=prlinks}, doi = {10.1186/1758-2946-3-54}, year = {2011}, date = {2011-01-01}, journal = {Journal of cheminformatics}, volume = {3}, number = {1}, pages = {54}, abstract = {BACKGROUND:The computational processing and analysis of small molecules is at heart of cheminformatics and structural bioinformatics and their application in e.g. metabolomics or drug discovery. Pipelining or workflow tools allow for the Legotexttrademark-like, graphical assembly of I/O modules and algorithms into a complex workflow which can be easily deployed, modified and tested without the hassle of implementing it into a monolithic application. The CDK-Taverna project aims at building a free open-source cheminformatics pipelining solution through combination of different open-source projects such as Taverna, the Chemistry Development Kit (CDK) or the Waikato Environment for Knowledge Analysis (WEKA). A first integrated version 1.0 of CDK-Taverna was recently released to the public. RESULTS:The CDK-Taverna project was migrated to the most up-to-date versions of its foundational software libraries with a complete re-engineering of its worker's architecture (version 2.0). 64-bit computing and multi-core usage by paralleled threads are now supported to allow for fast in-memory processing and analysis of large sets of molecules. Earlier deficiencies like workarounds for iterative data reading are removed. The combinatorial chemistry related reaction enumeration features are considerably enhanced. Additional functionality for calculating a natural product likeness score for small molecules is implemented to identify possible drug candidates. Finally the data analysis capabilities are extended with new workers that provide access to the open-source WEKA library for clustering and machine learning as well as training and test set partitioning. The new features are outlined with usage scenarios. CONCLUSIONS:CDK-Taverna 2.0 as an open-source cheminformatics workflow solution matured to become a freely available and increasingly powerful tool for the biosciences. The combination of the new CDK-Taverna worker family with the already available workflows developed by a lively Taverna community and published on myexperiment.org enables molecular scientists to quickly calculate, process and analyse molecular data as typically found in e.g. today's systems biology scenarios.}, keywords = {}, pubstate = {published}, tppubtype = {article} } BACKGROUND:The computational processing and analysis of small molecules is at heart of cheminformatics and structural bioinformatics and their application in e.g. metabolomics or drug discovery. Pipelining or workflow tools allow for the Legotexttrademark-like, graphical assembly of I/O modules and algorithms into a complex workflow which can be easily deployed, modified and tested without the hassle of implementing it into a monolithic application. The CDK-Taverna project aims at building a free open-source cheminformatics pipelining solution through combination of different open-source projects such as Taverna, the Chemistry Development Kit (CDK) or the Waikato Environment for Knowledge Analysis (WEKA). A first integrated version 1.0 of CDK-Taverna was recently released to the public. RESULTS:The CDK-Taverna project was migrated to the most up-to-date versions of its foundational software libraries with a complete re-engineering of its worker's architecture (version 2.0). 64-bit computing and multi-core usage by paralleled threads are now supported to allow for fast in-memory processing and analysis of large sets of molecules. Earlier deficiencies like workarounds for iterative data reading are removed. The combinatorial chemistry related reaction enumeration features are considerably enhanced. Additional functionality for calculating a natural product likeness score for small molecules is implemented to identify possible drug candidates. Finally the data analysis capabilities are extended with new workers that provide access to the open-source WEKA library for clustering and machine learning as well as training and test set partitioning. The new features are outlined with usage scenarios. CONCLUSIONS:CDK-Taverna 2.0 as an open-source cheminformatics workflow solution matured to become a freely available and increasingly powerful tool for the biosciences. The combination of the new CDK-Taverna worker family with the already available workflows developed by a lively Taverna community and published on myexperiment.org enables molecular scientists to quickly calculate, process and analyse molecular data as typically found in e.g. today's systems biology scenarios. |
Hastings, Janna ; Chepelev, Leonid ; Willighagen, Egon ; Adams, Nico ; Steinbeck, Christoph ; Dumontier, Michel The chemical information ontology: provenance and disambiguation for chemical data on the biological semantic web. Journal Article PLoS ONE, 6 (10), pp. e25513, 2011. @article{hastings2011chemical, title = {The chemical information ontology: provenance and disambiguation for chemical data on the biological semantic web.}, author = {Hastings, Janna and Chepelev, Leonid and Willighagen, Egon and Adams, Nico and Steinbeck, Christoph and Dumontier, Michel}, url = {http://dx.plos.org/10.1371/journal.pone.0025513.pdf}, doi = {10.1371/journal.pone.0025513}, year = {2011}, date = {2011-01-01}, journal = {PLoS ONE}, volume = {6}, number = {10}, pages = {e25513}, abstract = {Cheminformatics is the application of informatics techniques to solve chemical problems in silico. There are many areas in biology where cheminformatics plays an important role in computational research, including metabolism, proteomics, and systems biology. One critical aspect in the application of cheminformatics in these fields is the accurate exchange of data, which is increasingly accomplished through the use of ontologies. Ontologies are formal representations of objects and their properties using a logic-based ontology language. Many such ontologies are currently being developed to represent objects across all the domains of science. Ontologies enable the definition, classification, and support for querying objects in a particular domain, enabling intelligent computer applications to be built which support the work of scientists both within the domain of interest and across interrelated neighbouring domains. Modern chemical research relies on computational techniques to filter and organise data to maximise research productivity. The objects which are manipulated in these algorithms and procedures, as well as the algorithms and procedures themselves, enjoy a kind of virtual life within computers. We will call these information entities. Here, we describe our work in developing an ontology of chemical information entities, with a primary focus on data-driven research and the integration of calculated properties (descriptors) of chemical entities within a semantic web context. Our ontology distinguishes algorithmic, or procedural information from declarative, or factual information, and renders of particular importance the annotation of provenance to calculated data. The Chemical Information Ontology is being developed as an open collaborative project. More details, together with a downloadable OWL file, are available at http://code.google.com/p/semanticchemistry/ (license: CC-BY-SA).}, keywords = {}, pubstate = {published}, tppubtype = {article} } Cheminformatics is the application of informatics techniques to solve chemical problems in silico. There are many areas in biology where cheminformatics plays an important role in computational research, including metabolism, proteomics, and systems biology. One critical aspect in the application of cheminformatics in these fields is the accurate exchange of data, which is increasingly accomplished through the use of ontologies. Ontologies are formal representations of objects and their properties using a logic-based ontology language. Many such ontologies are currently being developed to represent objects across all the domains of science. Ontologies enable the definition, classification, and support for querying objects in a particular domain, enabling intelligent computer applications to be built which support the work of scientists both within the domain of interest and across interrelated neighbouring domains. Modern chemical research relies on computational techniques to filter and organise data to maximise research productivity. The objects which are manipulated in these algorithms and procedures, as well as the algorithms and procedures themselves, enjoy a kind of virtual life within computers. We will call these information entities. Here, we describe our work in developing an ontology of chemical information entities, with a primary focus on data-driven research and the integration of calculated properties (descriptors) of chemical entities within a semantic web context. Our ontology distinguishes algorithmic, or procedural information from declarative, or factual information, and renders of particular importance the annotation of provenance to calculated data. The Chemical Information Ontology is being developed as an open collaborative project. More details, together with a downloadable OWL file, are available at http://code.google.com/p/semanticchemistry/ (license: CC-BY-SA). |
2010 |
Apweiler, Rolf ; Armstrong, Richard ; Bairoch, Amos ; Cornish-Bowden, Athel ; Halling, Peter J; Hofmeyr, Jan-Hendrik S; Kettner, Carsten ; Leyh, Thomas S; Rohwer, Johann ; Schomburg, Dietmar ; Steinbeck, Christoph ; Tipton, Keith A large-scale protein-function database. Journal Article Nature Chemical Biology, 6 (11), pp. 785–785, 2010. @article{apweiler2010large, title = {A large-scale protein-function database.}, author = {Apweiler, Rolf and Armstrong, Richard and Bairoch, Amos and Cornish-Bowden, Athel and Halling, Peter J and Hofmeyr, Jan-Hendrik S and Kettner, Carsten and Leyh, Thomas S and Rohwer, Johann and Schomburg, Dietmar and Steinbeck, Christoph and Tipton, Keith}, url = {http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=20956966&retmode=ref&cmd=prlinks}, doi = {10.1038/nchembio.460}, year = {2010}, date = {2010-11-01}, journal = {Nature Chemical Biology}, volume = {6}, number = {11}, pages = {785--785}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Hastings, Janna ; Batchelor, Colin ; Steinbeck, Christoph ; Schulz, Stefan What are chemical structures and their relations? Journal Article pp. 257–270, 2010, ISBN: 978-1-60750-534-1. @article{Hastings:2010uk, title = {What are chemical structures and their relations?}, author = {Hastings, Janna and Batchelor, Colin and Steinbeck, Christoph and Schulz, Stefan}, url = {http://dl.acm.org/citation.cfm?id=1804715.1804742}, isbn = {978-1-60750-534-1}, year = {2010}, date = {2010-07-01}, pages = {257--270}, publisher = {IOS Press}, abstract = {Abstract In chemistry, advances in computational technologies have allowed research into molecules that have not been synthesized yet, and may never be, to become widespread. These are described in terms of their structures, which are expressed as chemical graphs. ...}, keywords = {}, pubstate = {published}, tppubtype = {article} } Abstract In chemistry, advances in computational technologies have allowed research into molecules that have not been synthesized yet, and may never be, to become widespread. These are described in terms of their structures, which are expressed as chemical graphs. ... |
Griffin, Julian L; Steinbeck, Christoph So what have data standards ever done for us? The view from metabolomics. Journal Article Genome medicine, 2 (6), pp. 38, 2010. @article{griffin2010so, title = {So what have data standards ever done for us? The view from metabolomics.}, author = {Griffin, Julian L and Steinbeck, Christoph}, url = {http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=20587079&retmode=ref&cmd=prlinks}, doi = {10.1186/gm159}, year = {2010}, date = {2010-01-01}, journal = {Genome medicine}, volume = {2}, number = {6}, pages = {38}, abstract = {The standardization of reporting of data promises to revolutionize biology by allowing community access to data generated in laboratories across the globe. This approach has already influenced genomics and transcriptomics. Projects that have previously been viewed as being too big to implement can now be distributed across multiple sites. There are now public databases for gene sequences, transcriptomic profiling and proteomic experiments. However, progress in the metabolomic community has seemed to falter recently, and whereas there are ontologies to describe the metadata for metabolomics there are still no central repositories for the datasets themselves. Here, we examine some of the challenges and potential benefits of further efforts towards data standardization in metabolomics and metabonomics.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The standardization of reporting of data promises to revolutionize biology by allowing community access to data generated in laboratories across the globe. This approach has already influenced genomics and transcriptomics. Projects that have previously been viewed as being too big to implement can now be distributed across multiple sites. There are now public databases for gene sequences, transcriptomic profiling and proteomic experiments. However, progress in the metabolomic community has seemed to falter recently, and whereas there are ontologies to describe the metadata for metabolomics there are still no central repositories for the datasets themselves. Here, we examine some of the challenges and potential benefits of further efforts towards data standardization in metabolomics and metabonomics. |
Hastings, Janna ; Dumontier, Michel ; Hull, Duncan ; Horridge, Matthew ; Steinbeck, Christoph ; Sattler, Ulrike ; Stevens, Robert ; H{ö}rne, Tertia ; Britz, Katarina Representing chemicals using OWL, description graphs and rules Journal Article CEUR Workshop Proceedings, 614 , 2010. @article{hastings2010representing, title = {Representing chemicals using OWL, description graphs and rules}, author = {Hastings, Janna and Dumontier, Michel and Hull, Duncan and Horridge, Matthew and Steinbeck, Christoph and Sattler, Ulrike and Stevens, Robert and H{ö}rne, Tertia and Britz, Katarina}, url = {http://researchspace.csir.co.za/dspace/handle/10204/4919}, year = {2010}, date = {2010-01-01}, journal = {CEUR Workshop Proceedings}, volume = {614}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Kuhn, Thomas ; Willighagen, Egon L; Zielesny, Achim ; Steinbeck, Christoph CDK-Taverna: an open workflow environment for cheminformatics. Journal Article BMC Bioinformatics, 11 (1), pp. 159, 2010. @article{kuhn2010cdk, title = {CDK-Taverna: an open workflow environment for cheminformatics.}, author = {Kuhn, Thomas and Willighagen, Egon L and Zielesny, Achim and Steinbeck, Christoph}, url = {http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=20346188&retmode=ref&cmd=prlinks}, doi = {10.1186/1471-2105-11-159}, year = {2010}, date = {2010-01-01}, journal = {BMC Bioinformatics}, volume = {11}, number = {1}, pages = {159}, publisher = {BioMed Central}, abstract = {BACKGROUND:Small molecules are of increasing interest for bioinformatics in areas such as metabolomics and drug discovery. The recent release of large open access chemistry databases generates a demand for flexible tools to process them and discover new knowledge. To freely support open science based on these data resources, it is desirable for the processing tools to be open source and available for everyone. RESULTS:Here we describe a novel combination of the workflow engine Taverna and the cheminformatics library Chemistry Development Kit (CDK) resulting in a open source workflow solution for cheminformatics. We have implemented more than 160 different workers to handle specific cheminformatics tasks. We describe the applications of CDK-Taverna in various usage scenarios. CONCLUSIONS:The combination of the workflow engine Taverna and the Chemistry Development Kit provides the first open source cheminformatics workflow solution for the biosciences. With the Taverna-community working towards a more powerful workflow engine and a more user-friendly user interface, CDK-Taverna has the potential to become a free alternative to existing proprietary workflow tools.}, keywords = {}, pubstate = {published}, tppubtype = {article} } BACKGROUND:Small molecules are of increasing interest for bioinformatics in areas such as metabolomics and drug discovery. The recent release of large open access chemistry databases generates a demand for flexible tools to process them and discover new knowledge. To freely support open science based on these data resources, it is desirable for the processing tools to be open source and available for everyone. RESULTS:Here we describe a novel combination of the workflow engine Taverna and the cheminformatics library Chemistry Development Kit (CDK) resulting in a open source workflow solution for cheminformatics. We have implemented more than 160 different workers to handle specific cheminformatics tasks. We describe the applications of CDK-Taverna in various usage scenarios. CONCLUSIONS:The combination of the workflow engine Taverna and the Chemistry Development Kit provides the first open source cheminformatics workflow solution for the biosciences. With the Taverna-community working towards a more powerful workflow engine and a more user-friendly user interface, CDK-Taverna has the potential to become a free alternative to existing proprietary workflow tools. |
De Matos, Paula ; Alcantara, Rafael ; Dekker, Adriano ; Ennis, Marcus ; Hastings, Janna ; Haug, Kenneth ; Spiteri, Inmaculada ; Turner, Steve ; Steinbeck, Christoph Chemical Entities of Biological Interest: an update. Journal Article Nucleic Acids Research, 38 (Database issue), pp. D249–54, 2010. @article{de2010chemical, title = {Chemical Entities of Biological Interest: an update.}, author = {De Matos, Paula and Alcantara, Rafael and Dekker, Adriano and Ennis, Marcus and Hastings, Janna and Haug, Kenneth and Spiteri, Inmaculada and Turner, Steve and Steinbeck, Christoph}, url = {http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=19854951&retmode=ref&cmd=prlinks}, doi = {10.1093/nar/gkp886}, year = {2010}, date = {2010-01-01}, journal = {Nucleic Acids Research}, volume = {38}, number = {Database issue}, pages = {D249--54}, abstract = {Chemical Entities of Biological Interest (ChEBI) is a freely available dictionary of molecular entities focused on 'small' chemical compounds. The molecular entities in question are either natural products or synthetic products used to intervene in the processes of living organisms. Genome-encoded macromolecules (nucleic acids, proteins and peptides derived from proteins by cleavage) are not as a rule included in ChEBI. In addition to molecular entities, ChEBI contains groups (parts of molecular entities) and classes of entities. ChEBI includes an ontological classification, whereby the relationships between molecular entities or classes of entities and their parents and/or children are specified. ChEBI is available online at http://www.ebi.ac.uk/chebi/. This article reports on new features in ChEBI since the last NAR report in 2007, including substructure and similarity searching, a submission tool for authoring of ChEBI datasets by the community and a 30-fold increase in the number of chemical structures stored in ChEBI.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Chemical Entities of Biological Interest (ChEBI) is a freely available dictionary of molecular entities focused on 'small' chemical compounds. The molecular entities in question are either natural products or synthetic products used to intervene in the processes of living organisms. Genome-encoded macromolecules (nucleic acids, proteins and peptides derived from proteins by cleavage) are not as a rule included in ChEBI. In addition to molecular entities, ChEBI contains groups (parts of molecular entities) and classes of entities. ChEBI includes an ontological classification, whereby the relationships between molecular entities or classes of entities and their parents and/or children are specified. ChEBI is available online at http://www.ebi.ac.uk/chebi/. This article reports on new features in ChEBI since the last NAR report in 2007, including substructure and similarity searching, a submission tool for authoring of ChEBI datasets by the community and a 30-fold increase in the number of chemical structures stored in ChEBI. |