Now showing 1 - 2 of 2
  • 2009Journal Article
    [["dc.bibliographiccitation.firstpage","W101"],["dc.bibliographiccitation.journal","Nucleic Acids Research"],["dc.bibliographiccitation.lastpage","W105"],["dc.bibliographiccitation.volume","37"],["dc.contributor.author","Hoff, Katharina J."],["dc.contributor.author","Lingner, Thomas"],["dc.contributor.author","Meinicke, Peter"],["dc.contributor.author","Tech, Maike"],["dc.date.accessioned","2018-11-07T08:28:27Z"],["dc.date.available","2018-11-07T08:28:27Z"],["dc.date.issued","2009"],["dc.description.abstract","Metagenomic sequencing projects yield numerous sequencing reads of a diverse range of uncultivated and mostly yet unknown microorganisms. In many cases, these sequencing reads cannot be assembled into longer contigs. Thus, gene prediction tools that were originally developed for whole-genome analysis are not suitable for processing metagenomes. Orphelia is a program for predicting genes in short DNA sequences that is available through a web server application (http://orphelia.gobics.de). Orphelia utilizes prediction models that were created with machine learning techniques on the basis of a wide range of annotated genomes. In contrast to other methods for metagenomic gene prediction, Orphelia has fragment length-specific prediction models for the two most popular sequencing techniques in metagenomics, chain termination sequencing and pyrosequencing. These models ensure highly specific gene predictions."],["dc.identifier.doi","10.1093/nar/gkp327"],["dc.identifier.isi","000267889100019"],["dc.identifier.pmid","19429689"],["dc.identifier.purl","https://resolver.sub.uni-goettingen.de/purl?gs-1/5949"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/16421"],["dc.notes.intern","Merged from goescholar"],["dc.notes.status","zu prüfen"],["dc.notes.submitter","Najko"],["dc.publisher","Oxford Univ Press"],["dc.relation.issn","0305-1048"],["dc.rights","Goescholar"],["dc.rights.uri","https://goescholar.uni-goettingen.de/licenses"],["dc.title","Orphelia: predicting genes in metagenomic sequencing reads"],["dc.type","journal_article"],["dc.type.internalPublication","yes"],["dc.type.peerReviewed","yes"],["dc.type.status","published"],["dc.type.version","published_version"],["dspace.entity.type","Publication"]]
    Details DOI PMID PMC WOS
  • 2008Journal Article
    [["dc.bibliographiccitation.artnumber","217"],["dc.bibliographiccitation.journal","BMC Bioinformatics"],["dc.bibliographiccitation.volume","9"],["dc.contributor.author","Hoff, Katharina J."],["dc.contributor.author","Tech, Maike"],["dc.contributor.author","Lingner, Thomas"],["dc.contributor.author","Daniel, Rolf"],["dc.contributor.author","Morgenstern, Burkhard"],["dc.contributor.author","Meinicke, Peter"],["dc.date.accessioned","2018-11-07T11:15:57Z"],["dc.date.available","2018-11-07T11:15:57Z"],["dc.date.issued","2008"],["dc.description.abstract","Background: Metagenomics is an approach to the characterization of microbial genomes via the direct isolation of genomic sequences from the environment without prior cultivation. The amount of metagenomic sequence data is growing fast while computational methods for metagenome analysis are still in their infancy. In contrast to genomic sequences of single species, which can usually be assembled and analyzed by many available methods, a large proportion of metagenome data remains as unassembled anonymous sequencing reads. One of the aims of all metagenomic sequencing projects is the identification of novel genes. Short length, for example, Sanger sequencing yields on average 700 bp fragments, and unknown phylogenetic origin of most fragments require approaches to gene prediction that are different from the currently available methods for genomes of single species. In particular, the large size of metagenomic samples requires fast and accurate methods with small numbers of false positive predictions. Results: We introduce a novel gene prediction algorithm for metagenomic fragments based on a two-stage machine learning approach. In the first stage, we use linear discriminants for monocodon usage, dicodon usage and translation initiation sites to extract features from DNA sequences. In the second stage, an artificial neural network combines these features with open reading frame length and fragment GC-content to compute the probability that this open reading frame encodes a protein. This probability is used for the classification and scoring of gene candidates. With large scale training, our method provides fast single fragment predictions with good sensitivity and specificity on artificially fragmented genomic DNA. Additionally, this method is able to predict translation initiation sites accurately and distinguishes complete from incomplete genes with high reliability. Conclusion: Large scale machine learning methods are well-suited for gene prediction in metagenomic DNA fragments. In particular, the combination of linear discriminants and neural networks is promising and should be considered for integration into metagenomic analysis pipelines. The data sets can be downloaded from the URL provided ( see Availability and requirements section)."],["dc.identifier.doi","10.1186/1471-2105-9-217"],["dc.identifier.isi","000256421900002"],["dc.identifier.pmid","18442389"],["dc.identifier.purl","https://resolver.sub.uni-goettingen.de/purl?gs-1/8429"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/54482"],["dc.notes.intern","Merged from goescholar"],["dc.notes.status","zu prüfen"],["dc.notes.submitter","Najko"],["dc.publisher","Biomed Central Ltd"],["dc.relation.issn","1471-2105"],["dc.rights","Goescholar"],["dc.rights.uri","https://goescholar.uni-goettingen.de/licenses"],["dc.title","Gene prediction in metagenomic fragments: A large scale machine learning approach"],["dc.type","journal_article"],["dc.type.internalPublication","yes"],["dc.type.peerReviewed","yes"],["dc.type.status","published"],["dc.type.version","published_version"],["dspace.entity.type","Publication"]]
    Details DOI PMID PMC WOS