Now showing 1 - 10 of 12
  • 2018Lecture
    [["dc.contributor.author","Wiese, Lena"],["dc.contributor.author","Wangmo, Chimi"],["dc.contributor.author","Steuernagel, Lukas"],["dc.contributor.author","Schmitt, Armin O."],["dc.contributor.author","Gültas, Mehmet"],["dc.date.accessioned","2020-05-25T13:25:09Z"],["dc.date.available","2020-05-25T13:25:09Z"],["dc.date.issued","2018"],["dc.description.abstract","Genome analysis is a major precondition for future advances in the life sciences. The complex organization of genome data and the interactions between genomic components can often be modeled and visualized in graph structures. In this paper we propose the integration of several data sets into a graph database. We study the aptness of the database system in terms of analysis and visualization of a genome regulatory network (GRN) by running a benchmark on it. Major advantages of using a database system are the modifiability of the data set, the immediate visualization of query results as well as built-in indexing and caching features."],["dc.identifier.doi","10.5446/38888"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/65961"],["dc.language.iso","en"],["dc.relation.conference","Data Integration in the Life Sciences (DILS2018)"],["dc.relation.date","2018"],["dc.relation.eventlocation","Hannover"],["dc.title","Construction and Visualization of Dynamic Biological Networks: Benchmarking the Neo4J Graph Database"],["dc.type","lecture"],["dc.type.internalPublication","unknown"],["dspace.entity.type","Publication"]]
    Details DOI
  • 2020Journal Article
    [["dc.bibliographiccitation.firstpage","101732"],["dc.bibliographiccitation.journal","Data & knowledge engineering"],["dc.bibliographiccitation.volume","126"],["dc.contributor.author","Wiese, Lena"],["dc.contributor.author","Waage, Tim"],["dc.contributor.author","Brenner, Michael"],["dc.date.accessioned","2020-12-10T14:23:24Z"],["dc.date.available","2020-12-10T14:23:24Z"],["dc.date.issued","2020"],["dc.identifier.doi","10.1016/j.datak.2019.101732"],["dc.identifier.issn","0169-023X"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/71919"],["dc.language.iso","en"],["dc.notes.intern","DOI Import GROB-354"],["dc.title","CloudDBGuard: A framework for encrypted data storage in NoSQL wide column stores"],["dc.type","journal_article"],["dc.type.internalPublication","yes"],["dspace.entity.type","Publication"]]
    Details DOI
  • 2018Book Chapter
    [["dc.bibliographiccitation.firstpage","207"],["dc.bibliographiccitation.lastpage","220"],["dc.contributor.author","Wiese, Lena"],["dc.contributor.author","Homann, Daniel"],["dc.contributor.author","Waage, Tim"],["dc.contributor.author","Brenner, Michael"],["dc.date.accessioned","2020-05-25T13:50:15Z"],["dc.date.available","2020-05-25T13:50:15Z"],["dc.date.issued","2018"],["dc.description.abstract","Auslagerung von Daten in Cloud-Datenbanken verspricht eine Reihe von Vorteilen wie reduzierte Wartungskosten, Flexibilität der Ressourcenverteilung und einfache Zugreifbarkeit von nahezu überall. Diese Datenbanken bieten dabei eine Vielzahl von Funktionalitäten, um Berechnungen auf Daten auszuführen. Datensicherheit (einschließlich dem Schutz persönlicher Daten) ist in Cloud-Datenbanken jedoch noch nicht angemessen umgesetzt worden. Konventionelle Verschlüsselungsverfahren garantieren zwar hohe Sicherheit, verhindern aber auch weitere Berechnungen auf den Daten. Modernere homomorphe Verschlüsselungsverfahren versprechen dagegen sowohl Datensicherheit als auch die Möglichkeit, auf verschlüsselten Daten zu rechnen. Das bestehende System FamilyGuard kombiniert bisher eigenschaftsbewahrende Verschlüsselungsverfahren. Um die Funktionalität auf Aggregationsfunktionen zu erweitern, soll in Zukunft auch homomorphe Verschlüsselung eingesetzt werden. In diesem Artikel geben wir eine Übersicht über diverse Kategorien homomorpher Verschlüsselungsverfahren und ihre Sicherheitsgrundlagen. Im Anschluss stellen wir Anforderungen für den Einsatz homomorpher Verfahren in Cloud-Datenbanken auf."],["dc.identifier.doi","10.18420/sicherheit2018_17"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/65964"],["dc.language.iso","de"],["dc.relation.ispartof","Sicherheit 2018"],["dc.title","Homomorphe Verschlüsselung für Cloud-Datenbanken: Übersicht und Anforderungsanalyse"],["dc.type","book_chapter"],["dc.type.internalPublication","yes"],["dspace.entity.type","Publication"]]
    Details DOI
  • 2019Journal Article
    [["dc.bibliographiccitation.firstpage","1"],["dc.bibliographiccitation.journal","Data & knowledge engineering"],["dc.bibliographiccitation.lastpage","44"],["dc.bibliographiccitation.volume","120"],["dc.contributor.author","Biskup, Joachim"],["dc.contributor.author","Dahn, Christine"],["dc.contributor.author","Diekmann, Katharina"],["dc.contributor.author","Menzel, Ralf"],["dc.contributor.author","Schalge, Dirk"],["dc.contributor.author","Wiese, Lena"],["dc.date.accessioned","2020-12-10T14:23:23Z"],["dc.date.available","2020-12-10T14:23:23Z"],["dc.date.issued","2019"],["dc.identifier.doi","10.1016/j.datak.2018.11.001"],["dc.identifier.issn","0169-023X"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/71918"],["dc.language.iso","en"],["dc.notes.intern","DOI Import GROB-354"],["dc.title","Publishing inference–proof relational data: An implementation and experiments"],["dc.type","journal_article"],["dc.type.internalPublication","yes"],["dspace.entity.type","Publication"]]
    Details DOI
  • 2019Conference Paper
    [["dc.bibliographiccitation.firstpage","83"],["dc.bibliographiccitation.lastpage","92"],["dc.contributor.author","Tashkandi, Araek"],["dc.contributor.author","Wiese, Lena"],["dc.date.accessioned","2020-05-25T13:59:36Z"],["dc.date.available","2020-05-25T13:59:36Z"],["dc.date.issued","2019"],["dc.description.abstract","The efficiency of Machine Learning (ML) models has widely been acknowledged in the healthcare area. However, the quality of the underlying medical data is a major challenge when applying ML in medical decision making. In particular, the imbalanced class distribution problem causes the ML model to be biased towards the majority class. Furthermore, the accuracy will be biased, too, which produces the Accuracy Paradox. In this paper, we identify an optimal ML model for predicting mortality risk for Intensive Care Units (ICU) patients. We comprehensively assess an approach that leverages the efficiency of ML ensemble learning (in particular, Gradient Boosting Decision Tree) and clustering-based data sampling to handle the imbalanced data problem that this model faces. We comprehensively compare different competitors (in terms of ML models as well as clustering methods) on a big real-world ICU dataset achieving a maximum area under the curve value of 0.956."],["dc.identifier.doi","10.1145/3366030.3366040"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/65965"],["dc.language.iso","en"],["dc.relation.conference","iiWAS2019: The 21st International Conference on Information Integration and Web-based Applications & Services"],["dc.relation.eventend","2019-12"],["dc.relation.eventlocation","München"],["dc.relation.eventstart","2019-12"],["dc.relation.ispartof","Proceedings of the 21st International Conference on Information Integration and Web-based Applications & Service"],["dc.title","A Hybrid Machine Learning Approach for Improving Mortality Risk Prediction on Imbalanced Data"],["dc.type","conference_paper"],["dc.type.internalPublication","yes"],["dspace.entity.type","Publication"]]
    Details DOI
  • 2019Journal Article
    [["dc.bibliographiccitation.artnumber","e12465"],["dc.bibliographiccitation.issue","1"],["dc.bibliographiccitation.journal","Expert Systems"],["dc.bibliographiccitation.volume","37"],["dc.contributor.author","Bahra, Guryash"],["dc.contributor.author","Wiese, Lena"],["dc.date.accessioned","2020-05-25T14:01:00Z"],["dc.date.available","2020-05-25T14:01:00Z"],["dc.date.issued","2019"],["dc.description.abstract","Neural networks are one option to implement decision support systems for health care applications. In this paper, we identify optimal settings of neural networks for medical diagnoses: The study involves the application of supervised machine learning using an artificial neural network to distinguish between gout and leukaemia patients. With the objective to improve the base accuracy (calculated from the initial set‐up of the neural network model), several enhancements are analysed, such as the use of hyperbolic tangent activation function instead of the sigmoid function, the use of two hidden layers instead of one, and transforming the measurements with linear regression to obtain a smoothened data set. Another setting we study is the impact on the accuracy when using a data set of reduced size but with higher data quality. We also discuss the tradeoff between accuracy and runtime efficiency."],["dc.identifier.doi","10.1111/exsy.12465"],["dc.identifier.purl","https://resolver.sub.uni-goettingen.de/purl?gs-1/16837"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/65966"],["dc.language.iso","en"],["dc.notes.intern","Merged from goescholar"],["dc.relation.eissn","1468-0394"],["dc.relation.issn","0266-4720"],["dc.rights","CC BY-NC 4.0"],["dc.rights.uri","https://creativecommons.org/licenses/by-nc/4.0"],["dc.title","Parameterizing neural networks for disease classification"],["dc.type","journal_article"],["dc.type.internalPublication","yes"],["dc.type.version","published_version"],["dspace.entity.type","Publication"]]
    Details DOI
  • 2018Journal Article
    [["dc.bibliographiccitation.firstpage","297"],["dc.bibliographiccitation.issue","2"],["dc.bibliographiccitation.journal","Distributed and Parallel Databases"],["dc.bibliographiccitation.lastpage","321"],["dc.bibliographiccitation.volume","37"],["dc.contributor.author","Wiese, Ingmar"],["dc.contributor.author","Sarna, Nicole"],["dc.contributor.author","Wiese, Lena"],["dc.contributor.author","Tashkandi, Araek"],["dc.contributor.author","Sax, Ulrich"],["dc.date.accessioned","2020-05-25T13:20:07Z"],["dc.date.available","2020-05-25T13:20:07Z"],["dc.date.issued","2018"],["dc.description.abstract","Efficient identification of cohorts of similar patients is a major precondition for personalized medicine. In order to train prediction models on a given medical data set, similarities have to be calculated for every pair of patients—which results in a roughly quadratic data blowup. In this paper we discuss the topic of in-database patient similarity analysis ranging from data extraction to implementing and optimizing the similarity calculations in SQL. In particular, we introduce the notion of chunking that uniformly distributes the workload among the individual similarity calculations. Our benchmark comprises the application of one similarity measures (Cosine similariy) and one distance metric (Euclidean distance) on two real-world data sets; it compares the performance of a column store (MonetDB) and a row store (PostgreSQL) with two external data mining tools (ELKI and Apache Mahout)."],["dc.identifier.doi","10.1007/s10619-018-7249-x"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/65959"],["dc.language.iso","en"],["dc.relation.issn","0926-8782"],["dc.title","Concept acquisition and improved in-database similarity analysis for medical data"],["dc.type","journal_article"],["dc.type.internalPublication","yes"],["dspace.entity.type","Publication"]]
    Details DOI
  • 2019Conference Paper
    [["dc.bibliographiccitation.firstpage","1"],["dc.bibliographiccitation.lastpage","5"],["dc.contributor.author","Doan, Triet"],["dc.contributor.author","Bingert, Sven"],["dc.contributor.author","Wiese, Lena"],["dc.contributor.author","Yahyapour, Ramin"],["dc.contributor.editor","Jäschke, Robert"],["dc.contributor.editor","Weidlich, Matthias"],["dc.date.accessioned","2021-10-26T13:12:10Z"],["dc.date.available","2021-10-26T13:12:10Z"],["dc.date.issued","2019"],["dc.identifier.doi","10.15488/9817"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/91394"],["dc.relation.conference","LWDA 2019"],["dc.relation.eventlocation","Berlin"],["dc.relation.eventstart","2019"],["dc.relation.ispartof","Proceedings of the Conference on \"Lernen, Wissen, Daten, Analysen\""],["dc.relation.orgunit","Gesellschaft für wissenschaftliche Datenverarbeitung"],["dc.rights","CC BY 4.0"],["dc.title","A Graph Database for Persistent Identifiers"],["dc.type","conference_paper"],["dc.type.internalPublication","yes"],["dspace.entity.type","Publication"]]
    Details DOI
  • 2018Journal Article
    [["dc.bibliographiccitation.firstpage","52"],["dc.bibliographiccitation.journal","Big Data Research"],["dc.bibliographiccitation.lastpage","64"],["dc.bibliographiccitation.volume","13"],["dc.contributor.author","Tashkandi, Araek"],["dc.contributor.author","Wiese, Ingmar"],["dc.contributor.author","Wiese, Lena"],["dc.date.accessioned","2020-05-25T13:22:11Z"],["dc.date.available","2020-05-25T13:22:11Z"],["dc.date.issued","2018"],["dc.description.abstract","Patient similarity analysis is a precondition to apply machine learning technology on medical data. In this sense, patient similarity analysis harnesses the information wealth of electronic medical records (EMRs) to support medical decision making. A pairwise similarity computation can be used as the basis for personalized health prediction. With n patients the amount of similarity calculations is required. Thus, analyzing patient similarity leads to data explosion when exploiting big data. By increasing the data size the computational burden of this analysis increases. A real-life medical application may exceed the limits of current hardware in a fairly short amount of time. Finding ways to optimize patient similarity analysis and handling this data explosion is the topic of this paper. Current implementations for patient similarity analysis require their users to have knowledge of complex data analysis tools. Moreover, data pre-processing and analysis are performed in synthetic conditions: the data are extracted from the EMR database and then the data preparation and analysis are processed in external tools. After all of this effort the users might not experience a superior performance of the patient similarity analysis. We propose methods to optimize the patient similarity analysis in order to make it scalable to big data. Our method was tested against two real datasets and a low execution time was accomplished. Our result hence benefits a comprehensive medical decision support system. Moreover, our implementation comprises a balance between performance and applicability: the majority of the workload is processed within a database management system to enable a direct implementation on an EMR database."],["dc.identifier.doi","10.1016/j.bdr.2018.05.001"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/65960"],["dc.language.iso","en"],["dc.relation.issn","2214-5796"],["dc.title","Efficient In-Database Patient Similarity Analysis for Personalized Medical Decision Support Systems"],["dc.type","journal_article"],["dc.type.internalPublication","yes"],["dspace.entity.type","Publication"]]
    Details DOI
  • 2017Journal Article
    [["dc.bibliographiccitation.firstpage","308"],["dc.bibliographiccitation.issue","3"],["dc.bibliographiccitation.journal","The Computer Journal"],["dc.bibliographiccitation.lastpage","328"],["dc.bibliographiccitation.volume","60"],["dc.contributor.author","Wiese, Lena"],["dc.contributor.author","Waage, Tim"],["dc.contributor.author","Bollwein, Ferdinand"],["dc.date.accessioned","2018-11-07T10:26:30Z"],["dc.date.available","2018-11-07T10:26:30Z"],["dc.date.issued","2017"],["dc.description.abstract","In this article, we introduce a replication procedure in a distributed database system that supports several fragmentations of the same data table. One application that requires multiple fragmentations is flexible (similarity-based) query answering. The major feature of our replication procedure is that replication and recovery respect the overlaps of fragments stemming from different fragmentations. In this paper we extend the data replication problem by not only considering hard constraints to ensure a fixed replication factor but also adding soft constraints that express desired data locality of fragments. We furthermore analyze the case that there are more fragmentations (leading to the situation that some replication conditions are optional); and we study the influences of data updates (insertions and deletions) on the data distribution."],["dc.description.sponsorship","German Research Foundation [WI 4086/2-1]"],["dc.identifier.doi","10.1093/comjnl/bxw041"],["dc.identifier.isi","000397056700003"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/43057"],["dc.notes.status","zu prüfen"],["dc.notes.submitter","PUB_WoS_Import"],["dc.publisher","Oxford Univ Press"],["dc.relation.issn","1460-2067"],["dc.relation.issn","0010-4620"],["dc.title","A Replication Scheme for Multiple Fragmentations with Overlapping Fragments"],["dc.type","journal_article"],["dc.type.internalPublication","yes"],["dc.type.peerReviewed","yes"],["dc.type.status","published"],["dspace.entity.type","Publication"]]
    Details DOI WOS