Open Babel 2.3 C++ API Quick Reference Guide

Total Page:16

File Type:pdf, Size:1020Kb

Open Babel 2.3 C++ API Quick Reference Guide // Automatic format perception // Molecular formula std::ifstream ifs(filename); (std::string) mol.GetFormula(); Open Babel 2.3 OBConversion conv; OBFormat* inFormat = conv.FormatFromExt(filename); // Conformations C++ API OBFormat* outFormat = conv.SetFormat("SDF"); (int) mol.NumConformers(); if (inFormat && outFormat) Quick reference guide { // Smallest set of smallest rings conv.SetInAndOutFormats(inFormat, outFormat); (std::vector<OBRing*>) mol.GetSSSR(); } // Hydrogen manipulations (bool) mol.DeleteHydrogens(); // All H (bool) mol.AddPolarHydrogens(); // Polar H (bool) mol.AddHydrogens(); // All H Namespace Looping over atoms and bonds // Generic data using namespace OpenBabel; #include <openbabel/obiter.h> std::vector<OBGenericData*>::iterator k; #include <openbabel/mol.h> std::vector<OBGenericData*> vdata = mol.GetData(); for (k = vdata.begin(); k != vdata.end(); ++k) using namespace OpenBabel; if ((*k)->GetDataType() == OBGenericDataType::PairData) { Reading and writing OBMol mol; std::cout << "> <" << (*k)->GetAttribute(); OBAtom* atom; std::cout << ">" << std::endl; #include <openbabel/mol.h> OBAtom* nbrAtom; std::cout << ((OBPairData*)(*k))->GetValue(); #include <openbabel/obconversion.h> OBBond* bond; std::cout << std::endl; } int main(int argc, char **argv) // Looping over all atoms { double exactMass(0.0); // Remove all but the largest fragments OBMol mol; FOR_ATOMS_OF_MOL(atom, mol) (bool) mol.StripSalts(0); { // Read from standard input and write to exactMass += atom->GetExactMass(); // Clear molecule for re-use // standard output } (bool) mol.Clear(); OBConversion conv(&std::cin, &std::cout); // Looping over all bonds // Input format is sd-file, output format unsigned int totalBondOrder(0); // is canonical smiles FOR_BONDS_OF_MOL(bond, mol) if (conv.SetInAndOutFormats("sdf", "can")) { Atoms { totalBondOrder += bond->GetBondOrder(); if (conv.Read(&mol)) } #include <openbabel/atom.h> { // Looping over all neighbor atoms of given atom // Print number of atoms unsigned int nAtoms(0); OBAtom atom; std::cerr << "Molecule has: " << FOR_NBORS_OF_ATOM(nbrAtom, atom) ++nAtoms; mol.NumAtoms() << " atoms.\n"; // Properties } (int) atom.GetFormalCharge(); conv->Write(&mol); (unsigned int) atom.GetAtomicNum(); mol.Clear(); (double) atom.GetAtomicMass(); } Element table return 0; // Explicit and maximum expected connections } #include <openbabel/data.h> (unsigned int) atom.GetValence(); (unsigned int) atom.GetImplicitValence(); OBElementTable etab; // Non-hydrogen connections (char*) etab.GetSymbol(6); (unsigned int) atom.GetHvyValence(); Conversion (int) etab.GetAtomicNum("C"); (double) etab.GetVdwRad(7); // Implicit and explicit hydrogens #include <openbabel/obconversion.h> (double) etab.GetCovalentRad(8); (unsigned int) atom.ImplicitHydrogenCount(); #include <openbabel/mol.h> (double) etab.GetMass(1); (unsigned int) atom.ExplicitHydrogenCount(); // Create molecule from SMILES string // Is atom in ring of size 6? std::string SmilesString("c1ccccc1"); (bool) atom.IsInRing() && atom.IsInRingSize(6); OBMol mol; stringstream ss(SmilesString) Molecules // Size of smallest ring that contains the atom OBConversion conv(&ss); (unsigned int) atom.MemberOfRingSize(); if (conv.SetInFormat("smi") && conv.Read(&mol)) #include <openbabel/mol.h> { /* ... */ } #include <openbabel/generic.h> // Number of rings that contain the atom (unsigned int) atom.MemberOfRingCount(); // Conversion without manipulation OBMol mol; OBConversion conv(&is, &os); if (conv.SetInAndOutFormats("SMI", "MOL")) // Number of atoms and bonds { (unsigned int) mol.NumAtoms(); // All atoms // Option "h" adds explicit hydrogens (unsigned int) mol.NumHvyAtoms(); // Non-H atoms Bonds conv.AddOption("h", OBConversion::GENOPTIONS); (unsigned int) mol.NumBonds(); // All bonds conv.Convert(); #include <openbabel/bond.h> } // Molecular weight with implicit hydrogens #include <openbabel/atom.h> (double) mol.GetMolWt(true); www.silicos.com www.silicos.com www.silicos.com OBBond bond; // Set calculation parameters spec.SetResolution(3.0); Energy calculations // Properties spec.SetAccuracy(AngStepSize20); (unsigned int) bond.GetBondOrder(); spec.SetStereo(NoStereoSpecificProbes); #include <openbabel/forcefield.h> (bool) bond.IsPrimaryAmide(); // and 2 and 3... spec.SetNormalization(NormalizationTowardsZeroMean); #include <openbabel/mol.h> (bool) bond.IsSingle(); // and 2 and 3... (bool) bond.IsRotor(); // Calculate and print OBMol mol; std::vector<double> sphore; /* ... */ // Flanking atoms sphore = spec.GetSpectrophore(&mol); (OBAtom*) bond.GetBeginAtom(); for (int i(0); i < sphore.size(); ++i) // Select the MMFF94 forcefield (OBAtom*) bond.GetEndAtom(); { OBForceField* pFF; OBAtom* atom; (OBAtom*) bond.GetNbrAtom(atom); std::cout << sphore[i] << "\t"; pFF = OBForceField::FindForceField("MMFF94"); } if (!pFF) exit(1); // Is bond in ring? std::cout << std::endl; (bool) bond.IsInRing(); // Set the logfile pFF->SetLogFile(&std::cerr); Stereochemistry // Assign atom types, parameters, ... Substructure search pFF->Setup(mol); #include <openbabel/mol.h> // Calculate the energy #include <openbabel/parsmart.h> #include <openbabel/obconversion.h> pFF->Energy(); #include <openbabel/mol.h> #include <openbabel/stereo/tetrahedral.h> #include <openbabel/atom.h> // Perform maximum 1000 steps of minimization OBMol mol; pFF->ConjugateGradients(1000); OBMol mol; OBConversion conv; OBAtom* atom; conv.SetInFormat("smi"); /* ... */ conv.ReadString(&mol, "C[C@H](Cl)Br"); // Create a SMARTS pattern of a phenyl ring // Stereofacade object OBSmartsPattern sp; OBStereoFacade facade(&mol); Open Babel sp.Init("c1ccccc1"); (unsigned int) facade.NumTetrahedralStereo(); (unsigned int) facade.NumCisTransStereo(); This documentation is part of the Open Babel project. For more information, // Properties of the substructure (unsigned int) facade.SquarePlanarStereo(); see www.openbabel.org. Open Babel is free software; you can redistribute if (sp.IsValid()) it and/or modify it under the terms of the GNU General Public License as { // Loop over all atoms to check if stereocenter published by the Free Software Foundation version 2 of the License. Open std::cout << sp.NumAtoms(); FOR_ATOMS_OF_MOL(atom, mol) Babel is distributed in the hope that it will be useful, but without any std::cout << sp.NumBonds(); { std::cout << sp.GetSmarts(); std::cout << atom->GetId() << ": "; warranty; without even the implied warranty of merchantability or fitness for } if (facade.HasTetrahedralStereo(atom->GetId())) a particular purpose. See the GNU General Public License for more details. std::cout << ": stereo"; // Matching else (bool) sp.Match(mol, true); // Single matching std::cout << ": no stereo"; Spectrophore™ (bool) sp.Match(mol, false); // Complete matching std::cout << std::endl; } Spectrophore™ is a trademark of Silicos NV and the technology is part of // Substructure mapping the Open Babel project. std::vector<std::vector<int> > mapListA; mapListA = sp.GetMapList(); // Non-unique matches std::vector<std::vector<int> > mapListU; Silicos NV mapListU = sp.GetUMapList(); // Unique matches Rings for (int m(0); m < mapListU.size(); ++m) Silicos is a fee-for-service company empowering open source chemo- { #include <openbabel/mol.h> informatics virtual screening technologies for the discovery of novel lead std::cout << "Unique match " << m << std::endl; #include <openbabel/ring.h> compounds and database characterization. Silicos fully endorses the for (int a(0); a < mapListU[m].size(); ++a) #include <openbabel/math/vector3.h> concept of open innovation and open source software development, and { provides its clients with a wide variety of computational chemistry-based atom = mol.GetAtom(mapListU[m][a]); OBMol mol; lead discovery services, including Open Babel support, training and code std::cout << atom->GetAtomicNum() << std::endl; /* ... */ } development. Please visit www.silicos.com for more details. } // Get the smallest-set-of-smallest-rings std::vector<OBRing*> rings = mol.GetSSSR(); vector3 center; vector3 normal_up; Spectrophores™ vector3 normal_down; Copyright © 2010 Silicos NV for (int i(0); i < rings.size(); ++i) { Silicos NV #include <openbabel/obspectrophore.h> (bool) rings[i].IsAromatic(); #include <openbabel/mol.h> (int) rings[i].Size(); Wetenschapspark 7, (bool) rings[i].findCenterAndNormal(center, B-3590 Diepenbeek OBMol mol; normal_up, Belgium /* ... */ normal_down); } www.silicos.com // Create a Spectrophore object www.openbabel.org OBSpectrophore spec; www.silicos.com www.silicos.com .
Recommended publications
  • Open Babel Documentation Release 2.3.1
    Open Babel Documentation Release 2.3.1 Geoffrey R Hutchison Chris Morley Craig James Chris Swain Hans De Winter Tim Vandermeersch Noel M O’Boyle (Ed.) December 05, 2011 Contents 1 Introduction 3 1.1 Goals of the Open Babel project ..................................... 3 1.2 Frequently Asked Questions ....................................... 4 1.3 Thanks .................................................. 7 2 Install Open Babel 9 2.1 Install a binary package ......................................... 9 2.2 Compiling Open Babel .......................................... 9 3 obabel and babel - Convert, Filter and Manipulate Chemical Data 17 3.1 Synopsis ................................................. 17 3.2 Options .................................................. 17 3.3 Examples ................................................. 19 3.4 Differences between babel and obabel .................................. 21 3.5 Format Options .............................................. 22 3.6 Append property values to the title .................................... 22 3.7 Filtering molecules from a multimolecule file .............................. 22 3.8 Substructure and similarity searching .................................. 25 3.9 Sorting molecules ............................................ 25 3.10 Remove duplicate molecules ....................................... 25 3.11 Aliases for chemical groups ....................................... 26 4 The Open Babel GUI 29 4.1 Basic operation .............................................. 29 4.2 Options .................................................
    [Show full text]
  • Open Data, Open Source, and Open Standards in Chemistry: the Blue Obelisk Five Years On" Journal of Cheminformatics Vol
    Oral Roberts University Digital Showcase College of Science and Engineering Faculty College of Science and Engineering Research and Scholarship 10-14-2011 Open Data, Open Source, and Open Standards in Chemistry: The lueB Obelisk five years on Andrew Lang Noel M. O'Boyle Rajarshi Guha National Institutes of Health Egon Willighagen Maastricht University Samuel Adams See next page for additional authors Follow this and additional works at: http://digitalshowcase.oru.edu/cose_pub Part of the Chemistry Commons Recommended Citation Andrew Lang, Noel M O'Boyle, Rajarshi Guha, Egon Willighagen, et al.. "Open Data, Open Source, and Open Standards in Chemistry: The Blue Obelisk five years on" Journal of Cheminformatics Vol. 3 Iss. 37 (2011) Available at: http://works.bepress.com/andrew-sid-lang/ 19/ This Article is brought to you for free and open access by the College of Science and Engineering at Digital Showcase. It has been accepted for inclusion in College of Science and Engineering Faculty Research and Scholarship by an authorized administrator of Digital Showcase. For more information, please contact [email protected]. Authors Andrew Lang, Noel M. O'Boyle, Rajarshi Guha, Egon Willighagen, Samuel Adams, Jonathan Alvarsson, Jean- Claude Bradley, Igor Filippov, Robert M. Hanson, Marcus D. Hanwell, Geoffrey R. Hutchison, Craig A. James, Nina Jeliazkova, Karol M. Langner, David C. Lonie, Daniel M. Lowe, Jerome Pansanel, Dmitry Pavlov, Ola Spjuth, Christoph Steinbeck, Adam L. Tenderholt, Kevin J. Theisen, and Peter Murray-Rust This article is available at Digital Showcase: http://digitalshowcase.oru.edu/cose_pub/34 Oral Roberts University From the SelectedWorks of Andrew Lang October 14, 2011 Open Data, Open Source, and Open Standards in Chemistry: The Blue Obelisk five years on Andrew Lang Noel M O'Boyle Rajarshi Guha, National Institutes of Health Egon Willighagen, Maastricht University Samuel Adams, et al.
    [Show full text]
  • Computer-Assisted Catalyst Development Via Automated Modelling of Conformationally Complex Molecules
    www.nature.com/scientificreports OPEN Computer‑assisted catalyst development via automated modelling of conformationally complex molecules: application to diphosphinoamine ligands Sibo Lin1*, Jenna C. Fromer2, Yagnaseni Ghosh1, Brian Hanna1, Mohamed Elanany3 & Wei Xu4 Simulation of conformationally complicated molecules requires multiple levels of theory to obtain accurate thermodynamics, requiring signifcant researcher time to implement. We automate this workfow using all open‑source code (XTBDFT) and apply it toward a practical challenge: diphosphinoamine (PNP) ligands used for ethylene tetramerization catalysis may isomerize (with deleterious efects) to iminobisphosphines (PPNs), and a computational method to evaluate PNP ligand candidates would save signifcant experimental efort. We use XTBDFT to calculate the thermodynamic stability of a wide range of conformationally complex PNP ligands against isomeriation to PPN (ΔGPPN), and establish a strong correlation between ΔGPPN and catalyst performance. Finally, we apply our method to screen novel PNP candidates, saving signifcant time by ruling out candidates with non‑trivial synthetic routes and poor expected catalytic performance. Quantum mechanical methods with high energy accuracy, such as density functional theory (DFT), can opti- mize molecular input structures to a nearby local minimum, but calculating accurate reaction thermodynamics requires fnding global minimum energy structures1,2. For simple molecules, expert intuition can identify a few minima to focus study on, but an alternative approach must be considered for more complex molecules or to eventually fulfl the dream of autonomous catalyst design 3,4: the potential energy surface must be frst surveyed with a computationally efcient method; then minima from this survey must be refned using slower, more accurate methods; fnally, for molecules possessing low-frequency vibrational modes, those modes need to be treated appropriately to obtain accurate thermodynamic energies 5–7.
    [Show full text]
  • Designing Universal Chemical Markup (UCM) Through the Reusable Methodology Based on Analyzing Existing Related Formats
    Designing Universal Chemical Markup (UCM) through the reusable methodology based on analyzing existing related formats Background: In order to design concepts for a new general-purpose chemical format we analyzed the strengths and weaknesses of current formats for common chemical data. While the new format is discussed more in the next article, here we describe our software s t tools and two stage analysis procedure that supplied the necessary information for the n i r development. The chemical formats analyzed in both stages were: CDX, CDXML, CML, P CTfile and XDfile. In addition the following formats were included in the first stage only: e r P CIF, InChI, NCBI ASN.1, NCBI XML, PDB, PDBx/mmCIF, PDBML, SMILES, SLN and Mol2. Results: A two stage analysis process devised for both XML (Extensible Markup Language) and non-XML formats enabled us to verify if and how potential advantages of XML are utilized in the widely used general-purpose chemical formats. In the first stage we accumulated information about analyzed formats and selected the formats with the most general-purpose chemical functionality for the second stage. During the second stage our set of software quality requirements was used to assess the benefits and issues of selected formats. Additionally, the detailed analysis of XML formats structure in the second stage helped us to identify concepts in those formats. Using these concepts we came up with the concise structure for a new chemical format, which is designed to provide precise built-in validation capabilities and aims to avoid the potential issues of analyzed formats.
    [Show full text]
  • Open Data, Open Source and Open Standards in Chemistry: the Blue Obelisk five Years On
    Open Data, Open Source and Open Standards in chemistry: The Blue Obelisk ¯ve years on Noel M O'Boyle¤1 , Rajarshi Guha2 , Egon L Willighagen3 , Samuel E Adams4 , Jonathan Alvarsson5 , Richard L Apodaca6 , Jean-Claude Bradley7 , Igor V Filippov8 , Robert M Hanson9 , Marcus D Hanwell10 , Geo®rey R Hutchison11 , Craig A James12 , Nina Jeliazkova13 , Andrew SID Lang14 , Karol M Langner15 , David C Lonie16 , Daniel M Lowe4 , J¶er^omePansanel17 , Dmitry Pavlov18 , Ola Spjuth5 , Christoph Steinbeck19 , Adam L Tenderholt20 , Kevin J Theisen21 , Peter Murray-Rust4 1Analytical and Biological Chemistry Research Facility, Cavanagh Pharmacy Building, University College Cork, College Road, Cork, Co. Cork, Ireland 2NIH Center for Translational Therapeutics, 9800 Medical Center Drive, Rockville, MD 20878, USA 3Division of Molecular Toxicology, Institute of Environmental Medicine, Nobels vaeg 13, Karolinska Institutet, 171 77 Stockholm, Sweden 4Unilever Centre for Molecular Sciences Informatics, Department of Chemistry, University of Cambridge, Lens¯eld Road, CB2 1EW, UK 5Department of Pharmaceutical Biosciences, Uppsala University, Box 591, 751 24 Uppsala, Sweden 6Metamolecular, LLC, 8070 La Jolla Shores Drive #464, La Jolla, CA 92037, USA 7Department of Chemistry, Drexel University, 32nd and Chestnut streets, Philadelphia, PA 19104, USA 8Chemical Biology Laboratory, Basic Research Program, SAIC-Frederick, Inc., NCI-Frederick, Frederick, MD 21702, USA 9St. Olaf College, 1520 St. Olaf Ave., North¯eld, MN 55057, USA 10Kitware, Inc., 28 Corporate Drive, Clifton Park, NY 12065, USA 11Department of Chemistry, University of Pittsburgh, 219 Parkman Avenue, Pittsburgh, PA 15260, USA 12eMolecules Inc., 380 Stevens Ave., Solana Beach, California 92075, USA 13Ideaconsult Ltd., 4.A.Kanchev str., So¯a 1000, Bulgaria 14Department of Engineering, Computer Science, Physics, and Mathematics, Oral Roberts University, 7777 S.
    [Show full text]
  • Conformational Analysis 3D Structures, Conformations and Molecular Surfaces
    Conformational Analysis 3D Structures, Conformations and Molecular Surfaces Christof H. Schwab Molecular Networks GmbH Altamira LLC Henkestr. 91 1455 Candlewood Drive 91052 Erlangen, Germany mn-am.com Columbus, Ohio 43235, USA Molecular Networks and Altamira MN-AM Erlangen, Germany Columbus, Ohio, USA Friedrich-Alexander-Universität The Ohio State University 1997 2008 Chemoinformatics 3D structure generation Physicochemical and reaction properties Metabolic reaction knowledge Computational toxicology and risk assessment Database and knowledgebase Predictive models Consulting services 2 Product Lines – Chemoinformatics CORINA Classic CORINA Classic Industry-standard 3D structure generation CORINA Symphony Profiling and managing of chemical datasets Workflows Structure cleaning/processing Descriptor generation (properties and fragments) SYLVIA Estimation of synthetic accessibility of compounds Public tools ChemoTyper (with ToxPrint Chemotypes) https://chemotyper.org, https://toxprint.org 3 Product Lines – Computational Toxicology and Risk Assessment ChemTunes Platform to support decision making in human health and regulatory critical endpoints Toxicity database (all endpoints) "Inventory" concept for compound location ChemTunes ToxGPS Prediction models All human health related endpoints Workflows TTC (thresholds of toxicological concern) Genotoxic Impurities Read-Across (in development) Workflow ICH M7 GTI (in development) 4 3D Structures, Conformations and Molecular Surfaces – Overview 3D structures – why,
    [Show full text]
  • Bringing Open Source to Drug Discovery
    Bringing Open Source to Drug Discovery Chris Swain Cambridge MedChem Consulting Standing on the shoulders of giants • There are a huge number of people involved in writing open source software • It is impossible to acknowledge them all individually • The slide deck will be available for download and includes 25 slides of details and download links – Copy on my website www.cambridgemedchemconsulting.com Why us Open Source software? • Allows access to source code – You can customise the code to suit your needs – If developer ceases trading the code can continue to be developed – Outside scrutiny improves stability and security What Resources are available • Toolkits • Databases • Web Services • Workflows • Applications • Scripts Toolkits • OpenBabel (htttp://openbabel.org) is a chemical toolbox – Ready-to-use programs, and complete programmer's toolkit – Read, write and convert over 110 chemical file formats – Filter and search molecular files using SMARTS and other methods, KNIME add-on – Supports molecular modeling, cheminformatics, bioinformatics – Organic chemistry, inorganic chemistry, solid-state materials, nuclear chemistry – Written in C++ but accessible from Python, Ruby, Perl, Shell scripts… Toolkits • OpenBabel • R • CDK • OpenCL • RDkit • SciPy • Indigo • NumPy • ChemmineR • Pandas • Helium • Flot • FROWNS • GNU Octave • Perlmol • OpenMPI Toolkits • RDKit (http://www.rdkit.org) – A collection of cheminformatics and machine-learning software written in C++ and Python. – Knime nodes – The core algorithms and data structures are written in C ++. Wrappers are provided to use the toolkit from either Python or Java. – Additionally, the RDKit distribution includes a PostgreSQL-based cartridge that allows molecules to be stored in relational database and retrieved via substructure and similarity searches.
    [Show full text]
  • Open Source Molecular Modeling
    Accepted Manuscript Title: Open Source Molecular Modeling Author: Somayeh Pirhadi Jocelyn Sunseri David Ryan Koes PII: S1093-3263(16)30118-8 DOI: http://dx.doi.org/doi:10.1016/j.jmgm.2016.07.008 Reference: JMG 6730 To appear in: Journal of Molecular Graphics and Modelling Received date: 4-5-2016 Accepted date: 25-7-2016 Please cite this article as: Somayeh Pirhadi, Jocelyn Sunseri, David Ryan Koes, Open Source Molecular Modeling, <![CDATA[Journal of Molecular Graphics and Modelling]]> (2016), http://dx.doi.org/10.1016/j.jmgm.2016.07.008 This is a PDF file of an unedited manuscript that has been accepted for publication. As a service to our customers we are providing this early version of the manuscript. The manuscript will undergo copyediting, typesetting, and review of the resulting proof before it is published in its final form. Please note that during the production process errors may be discovered which could affect the content, and all legal disclaimers that apply to the journal pertain. Open Source Molecular Modeling Somayeh Pirhadia, Jocelyn Sunseria, David Ryan Koesa,∗ aDepartment of Computational and Systems Biology, University of Pittsburgh Abstract The success of molecular modeling and computational chemistry efforts are, by definition, de- pendent on quality software applications. Open source software development provides many advantages to users of modeling applications, not the least of which is that the software is free and completely extendable. In this review we categorize, enumerate, and describe available open source software packages for molecular modeling and computational chemistry. 1. Introduction What is Open Source? Free and open source software (FOSS) is software that is both considered \free software," as defined by the Free Software Foundation (http://fsf.org) and \open source," as defined by the Open Source Initiative (http://opensource.org).
    [Show full text]
  • 3Dmolnet: a Generative Network for Molecular Structures
    3DMolNet: A Generative Network for Molecular Structures Vitali Nesterov, Mario Wieser, Volker Roth Department of Mathematics and Computer Science University of Basel, Switzerland [email protected] Abstract With the recent advances in machine learning for quan- tum chemistry, it is now possible to predict the chemical properties of compounds and to generate novel molecules. Existing generative models mostly use a string- or graph- based representation, but the precise three-dimensional co- ordinates of the atoms are usually not encoded. First at- tempts in this direction have been proposed, where autore- gressive or GAN-based models generate atom coordinates. Those either lack a latent space in the autoregressive setting, such that a smooth exploration of the compound space is not possible, or cannot generalize to varying chemical compo- sitions. We propose a new approach to efficiently generate molecular structures that are not restricted to a fixed size or composition. Our model is based on the variational autoen- coder which learns a translation-, rotation-, and permutation- invariant low-dimensional representation of molecules. Our experiments yield a mean reconstruction error below 0.05 A,˚ outperforming the current state-of-the-art methods by a fac- tor of four, and which is even lower than the spatial quanti- zation error of most chemical descriptors. The compositional and structural validity of newly generated molecules has been confirmed by quantum chemical methods in a set of experi- Figure 1: A schematic illustration of the decoding process of ments. the 3DMolNet. The model learns a low-dimensional repre- sentation Z of molecules (red plane). Each latent encoding can be decoded to a nuclear charge matrix C, an Euclidean Introduction distance matrix D, and a bond matrix B using neural net- An exploration of the chemical compound space (CCS) is works (blue dotted arrows).
    [Show full text]
  • SOMA2 –Application Oriented Molecular Modelling
    SOMA2 SOMA2 – Application Oriented Molecular Modelling Workflows in WWW-Browser EGI Community Forum 2012 Pinja Koskinen, Kimmo Mattila and Dr. Tapani Kinnunen (tapani.kinnunen [at] csc.fi) CSC – IT Center for Science Ltd., Espoo, Finland SOMA2 CSC at a glance – Founded in 1971 as a technical support unit for Univac 1108 – Connected Finland to the Internet in 1988 – Reorganized as a company, CSC – Scientific Computing Ltd. in 1993 – All shares to the Ministry of Education and Culture of Finland in 1997 – Operates on a non-profit principle – Facilities in Espoo, close to Otaniemi campus (of 15,000 students and 16,000 technology professionals) and Kajaani – Staff 230 – Turnover 2011 27,3 million euros SOMA2 CSC’s mission – CSC, as part of the Finnish national research structure, develops and offers high-quality information technology services. CSC’s services – Funet Services – Computing Services – Application Services – Data Services for Science and Culture – Information Management Services SOMA2 SOMA2 is a modelling environment for computational drug discovery and molecular modelling – SOMA2 is operated with web browser Intuitive WWW interface provides an easy access to computational tools. Offers a full scale environment from data input to result analysis. System is operated with user’s own user account and access rights. – SOMA2 makes use of scientific applications available in the computing system Uniform interface tools for applications. Automatic configuration and execution of applications. Different applications and tools can be integrated into application workflows. Applications can be executed locally or remotely and as interactive or batch jobs Jobs in grids are supported via suitable middleware (since version 1.4 Aluminium) – SOMA2 Software is open source Released in May 2007 under GNU General Public License (GPL).
    [Show full text]
  • Fast, Efficient Fragment-Based Coordinate Generation for Open Babel
    Yoshikawa and Hutchison J Cheminform (2019) 11:49 https://doi.org/10.1186/s13321-019-0372-5 Journal of Cheminformatics SOFTWARE Open Access Fast, efcient fragment-based coordinate generation for Open Babel Naruki Yoshikawa1 and Geofrey R. Hutchison2* Abstract Rapidly predicting an accurate three dimensional geometry of a molecule is a crucial task for cheminformatics and across a wide range of molecular modeling. Consequently, developing a fast, accurate, and open implementation of structure prediction is necessary for reproducible cheminformatics research. We introduce a fragment-based coordinate generation implementation for Open Babel, a widely-used open source toolkit for cheminformatics. The new implementation improves speed and stereochemical accuracy, while retaining or improving accuracy of bond lengths, bond angles, and dihedral torsions. Input molecules are broken into fragments by cutting at rotatable bonds. The coordinates of fragments are set according to a fragment library, prepared from open crystallographic databases. Since the coordinates of multiple atoms are decided at once, coordinate prediction is accelerated over the previous rules-based implementation in Open Babel, as well as the widely-used distance geometry methods in RDKit. This new implementation will be benefcial for a wide range of applications, including computational property prediction in polymers, molecular materials and drug design. Keywords: Coordinate generation, Fragments, Molecular geometry Introduction coordinatte generation, including BALL [19], FROG [20, Accurate prediction of the three-dimensional structure of 21], RDKit [22], and Open Babel [23]. Te latter, which is a molecule is critical to a wide range of cheminformatics highly popular, has used a rule-based coordinate builder and molecular modeling tasks, since electrostatic, inter- with a small set of ring fragments, followed by force feld molecular, and other conformation-driven properties minimization.
    [Show full text]
  • Open Babel Access and Interconvert Chemical Information
    Open Babel Access and interconvert chemical information Noel M. O’Boyle Open Babel development team and NextMove Software, Cambridge, UK Dec 2013 EMBL-EBI/Wellcome Trust Course: Resources for Computational Drug Discovery Image credit: AJ Cann (AJC1 on Flickr) Image credit: Jon Osborne (jonno101101 on Flickr) • Volunteer effort, an open source success story – Originally a fork from OpenEye’s OELib in 2001 – Lead is Geoff Hutchison (Uni of Pittsburgh) – 4 or 5 active developers – I got involved in late 2005 • http://openbabel.org • Associated paper: (Open Access) – Open Babel: An open chemical toolbox, J. Cheminf., 2011, 3, 33. 5 Does anyone else use Open Babel? • 40K downloads (from SF) in last 12 months – 1.4K downloads of Windows Python bindings • Paper #1 most accessed in last year – Cited 60 times in 1 year • In short, very widely-used Features • Multiple chemical file formats (+ options) and utility formats • 2D coordinate generation and depiction (PNG and SVG) • 3D coordinate generation, forcefield minimisation, conformer generation • Binary fingerprints (path-based, substructure- based) and associated “fast search” database • Bond perception, aromaticity detection and atom- typing • Canonical labelling, automorphisms, alignment • Plugin architecture • Several command-line applications, but also a software library • Written in C++ but bindings in several languages obabel and file conversion • Basic usage: obabel infile.extn –O outfile.extn • Can also read from stdin, write to stdout, read from a SMILES string, specify the input and
    [Show full text]