Now showing 1 - 10 of 10
  • 2016Journal Article
    [["dc.bibliographiccitation.firstpage","1866"],["dc.bibliographiccitation.issue","4"],["dc.bibliographiccitation.journal","Empirical Software Engineering"],["dc.bibliographiccitation.lastpage","1902"],["dc.bibliographiccitation.volume","22"],["dc.contributor.author","Herbold, Steffen"],["dc.contributor.author","Trautsch, Alexander"],["dc.contributor.author","Grabowski, Jens"],["dc.date.accessioned","2020-12-10T14:11:32Z"],["dc.date.available","2020-12-10T14:11:32Z"],["dc.date.issued","2016"],["dc.identifier.doi","10.1007/s10664-016-9468-y"],["dc.identifier.eissn","1573-7616"],["dc.identifier.issn","1382-3256"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/71101"],["dc.language.iso","en"],["dc.notes.intern","DOI Import GROB-354"],["dc.title","Global vs. local models for cross-project defect prediction"],["dc.title.alternative","A replication study"],["dc.type","journal_article"],["dc.type.internalPublication","yes"],["dspace.entity.type","Publication"]]
    Details DOI
  • 2020Conference Paper
    [["dc.bibliographiccitation.firstpage","127"],["dc.bibliographiccitation.lastpage","138"],["dc.contributor.author","Trautsch, Alexander Richard"],["dc.contributor.author","Herbold, Steffen"],["dc.contributor.author","Grabowski, Jens"],["dc.contributor.orcid","0000-0001-5236-7953"],["dc.creator.author","Trautsch, Alexander"],["dc.date.accessioned","2020-12-02T07:47:36Z"],["dc.date.available","2020-12-02T07:47:36Z"],["dc.date.issued","2020"],["dc.description.abstract","Software quality evolution and predictive models to support decisions about resource distribution in software quality assurance tasks are an important part of software engineering research. Recently, a fine-grained just-in-time defect prediction approach was proposed which has the ability to find bug-inducing files within changes instead of only complete changes. In this work, we utilize this approach and improve it in multiple places: data collection, labeling and features. We include manually validated issue types, an improved SZZ algorithm which discards comments, whitespaces and refactorings. Additionally, we include static source code metrics as well as static analysis warnings and warning density derived metrics as features. To assess whether we can save cost we incorporate a specialized defect prediction cost model. To evaluate our proposed improvements of the fine-grained just-in-time defect prediction approach we conduct a case study that encompasses 38 Java projects, 492,241 file changes in 73,598 commits and spans 15 years. We find that static source code metrics and static analysis warnings are correlated with bugs and that they can improve the quality and cost saving potential of just-in-time defect prediction models."],["dc.identifier.doi","10.1109/icsme46990.2020.00022"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/69417"],["dc.language.iso","en"],["dc.publisher","IEEE"],["dc.relation.conference","International Conference on Software Maintenance and Evolution"],["dc.relation.doi","10.1109/ICSME46990.2020"],["dc.relation.eventend","2020-10-03"],["dc.relation.eventlocation","Adelaide"],["dc.relation.eventstart","2020-09-27"],["dc.relation.isbn","978-1-7281-5619-4"],["dc.relation.ispartof","2020 IEEE International Conference on Software Maintenance and Evolution (ICSME)"],["dc.relation.orgunit","Institut für Informatik"],["dc.title","Static source code metrics and static analysis warnings for fine-grained just-in-time defect prediction"],["dc.type","conference_paper"],["dc.type.internalPublication","yes"],["dc.type.status","accepted"],["dspace.entity.type","Publication"]]
    Details DOI
  • 2019Poster
    [["dc.contributor.author","Trautsch, Alexander Richard"],["dc.contributor.orcid","0000-0001-5236-7953"],["dc.creator.author","Trautsch, Alexander"],["dc.date.accessioned","2020-12-02T07:47:53Z"],["dc.date.available","2020-12-02T07:47:53Z"],["dc.date.issued","2019"],["dc.identifier.doi","10.1109/icse-companion.2019.00075"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/69418"],["dc.language.iso","en"],["dc.relation.conference","International Conference on Software Engineering"],["dc.relation.eventlocation","Montreal"],["dc.relation.eventstart","2019"],["dc.relation.orgunit","Institut für Informatik"],["dc.title","Effects of Automated Static Analysis Tools: A Multidimensional View on Quality Evolution"],["dc.type","poster"],["dc.type.internalPublication","yes"],["dspace.entity.type","Publication"]]
    Details DOI
  • 2018Journal Article
    [["dc.bibliographiccitation.firstpage","811"],["dc.bibliographiccitation.issue","9"],["dc.bibliographiccitation.journal","IEEE Transactions on Software Engineering"],["dc.bibliographiccitation.lastpage","833"],["dc.bibliographiccitation.volume","44"],["dc.contributor.author","Herbold, Steffen"],["dc.contributor.author","Trautsch, Alexander"],["dc.contributor.author","Grabowski, Jens"],["dc.date.accessioned","2020-12-10T18:26:23Z"],["dc.date.available","2020-12-10T18:26:23Z"],["dc.date.issued","2018"],["dc.identifier.doi","10.1109/TSE.2017.2724538"],["dc.identifier.eissn","1939-3520"],["dc.identifier.eissn","2326-3881"],["dc.identifier.issn","0098-5589"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/76062"],["dc.notes.intern","DOI Import GROB-354"],["dc.relation.haserratum","/handle/2/76064"],["dc.title","A Comparative Study to Benchmark Cross-Project Defect Prediction Approaches"],["dc.type","journal_article"],["dc.type.internalPublication","yes"],["dspace.entity.type","Publication"]]
    Details DOI
  • 2022Journal Article
    [["dc.bibliographiccitation.issue","6"],["dc.bibliographiccitation.journal","Empirical Software Engineering"],["dc.bibliographiccitation.volume","27"],["dc.contributor.author","Herbold, Steffen"],["dc.contributor.author","Trautsch, Alexander"],["dc.contributor.author","Ledel, Benjamin"],["dc.contributor.author","Aghamohammadi, Alireza"],["dc.contributor.author","Ghaleb, Taher A."],["dc.contributor.author","Chahal, Kuljit Kaur"],["dc.contributor.author","Bossenmaier, Tim"],["dc.contributor.author","Nagaria, Bhaveet"],["dc.contributor.author","Makedonski, Philip"],["dc.contributor.author","Ahmadabadi, Matin Nili"],["dc.contributor.author","Erbel, Johannes"],["dc.date.accessioned","2022-09-01T09:49:20Z"],["dc.date.available","2022-09-01T09:49:20Z"],["dc.date.issued","2022"],["dc.description.abstract","Abstract\n \n Context\n Tangled commits are changes to software that address multiple concerns at once. For researchers interested in bugs, tangled commits mean that they actually study not only bugs, but also other concerns irrelevant for the study of bugs.\n \n \n Objective\n We want to improve our understanding of the prevalence of tangling and the types of changes that are tangled within bug fixing commits.\n \n \n Methods\n We use a crowd sourcing approach for manual labeling to validate which changes contribute to bug fixes for each line in bug fixing commits. Each line is labeled by four participants. If at least three participants agree on the same label, we have consensus.\n \n \n Results\n We estimate that between 17% and 32% of all changes in bug fixing commits modify the source code to fix the underlying problem. However, when we only consider changes to the production code files this ratio increases to 66% to 87%. We find that about 11% of lines are hard to label leading to active disagreements between participants. Due to confirmed tangling and the uncertainty in our data, we estimate that 3% to 47% of data is noisy without manual untangling, depending on the use case.\n \n \n Conclusion\n Tangled commits have a high prevalence in bug fixes and can lead to a large amount of noise in the data. Prior research indicates that this noise may alter results. As researchers, we should be skeptics and assume that unvalidated data is likely very noisy, until proven otherwise."],["dc.identifier.doi","10.1007/s10664-021-10083-5"],["dc.identifier.pii","10083"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/113398"],["dc.language.iso","en"],["dc.notes.intern","DOI-Import GROB-597"],["dc.relation.eissn","1573-7616"],["dc.relation.issn","1382-3256"],["dc.rights.uri","https://creativecommons.org/licenses/by/4.0"],["dc.title","A fine-grained data set and analysis of tangling in bug fixing commits"],["dc.type","journal_article"],["dc.type.internalPublication","yes"],["dspace.entity.type","Publication"]]
    Details DOI
  • 2022Journal Article
    [["dc.bibliographiccitation.artnumber","42"],["dc.bibliographiccitation.issue","2"],["dc.bibliographiccitation.journal","Empirical Software Engineering"],["dc.bibliographiccitation.volume","27"],["dc.contributor.author","Herbold, Steffen"],["dc.contributor.author","Trautsch, Alexander"],["dc.contributor.author","Trautsch, Fabian"],["dc.contributor.author","Ledel, Benjamin"],["dc.date.accessioned","2022-02-01T10:31:56Z"],["dc.date.available","2022-02-01T10:31:56Z"],["dc.date.issued","2022"],["dc.description.abstract","Abstract Context The SZZ algorithm is the de facto standard for labeling bug fixing commits and finding inducing changes for defect prediction data. Recent research uncovered potential problems in different parts of the SZZ algorithm. Most defect prediction data sets provide only static code metrics as features, while research indicates that other features are also important. Objective We provide an empirical analysis of the defect labels created with the SZZ algorithm and the impact of commonly used features on results. Method We used a combination of manual validation and adopted or improved heuristics for the collection of defect data. We conducted an empirical study on 398 releases of 38 Apache projects. Results We found that only half of the bug fixing commits determined by SZZ are actually bug fixing. If a six-month time frame is used in combination with SZZ to determine which bugs affect a release, one file is incorrectly labeled as defective for every file that is correctly labeled as defective. In addition, two defective files are missed. We also explored the impact of the relatively small set of features that are available in most defect prediction data sets, as there are multiple publications that indicate that, e.g., churn related features are important for defect prediction. We found that the difference of using more features is not significant. Conclusion Problems with inaccurate defect labels are a severe threat to the validity of the state of the art of defect prediction. Small feature sets seem to be a less severe threat."],["dc.description.abstract","Abstract Context The SZZ algorithm is the de facto standard for labeling bug fixing commits and finding inducing changes for defect prediction data. Recent research uncovered potential problems in different parts of the SZZ algorithm. Most defect prediction data sets provide only static code metrics as features, while research indicates that other features are also important. Objective We provide an empirical analysis of the defect labels created with the SZZ algorithm and the impact of commonly used features on results. Method We used a combination of manual validation and adopted or improved heuristics for the collection of defect data. We conducted an empirical study on 398 releases of 38 Apache projects. Results We found that only half of the bug fixing commits determined by SZZ are actually bug fixing. If a six-month time frame is used in combination with SZZ to determine which bugs affect a release, one file is incorrectly labeled as defective for every file that is correctly labeled as defective. In addition, two defective files are missed. We also explored the impact of the relatively small set of features that are available in most defect prediction data sets, as there are multiple publications that indicate that, e.g., churn related features are important for defect prediction. We found that the difference of using more features is not significant. Conclusion Problems with inaccurate defect labels are a severe threat to the validity of the state of the art of defect prediction. Small feature sets seem to be a less severe threat."],["dc.identifier.doi","10.1007/s10664-021-10092-4"],["dc.identifier.pii","10092"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/98985"],["dc.language.iso","en"],["dc.notes.intern","DOI-Import GROB-517"],["dc.relation.eissn","1573-7616"],["dc.relation.issn","1382-3256"],["dc.rights.uri","https://creativecommons.org/licenses/by/4.0"],["dc.title","Problems with SZZ and features: An empirical study of the state of practice of defect prediction data collection"],["dc.type","journal_article"],["dc.type.internalPublication","yes"],["dspace.entity.type","Publication"]]
    Details DOI
  • 2020Journal Article Research Paper
    [["dc.bibliographiccitation.firstpage","5137"],["dc.bibliographiccitation.issue","6"],["dc.bibliographiccitation.journal","Empirical Software Engineering"],["dc.bibliographiccitation.lastpage","5192"],["dc.bibliographiccitation.volume","25"],["dc.contributor.author","Trautsch, Alexander"],["dc.contributor.author","Herbold, Steffen"],["dc.contributor.author","Grabowski, Jens"],["dc.date.accessioned","2020-12-02T07:38:33Z"],["dc.date.available","2020-12-02T07:38:33Z"],["dc.date.issued","2020"],["dc.identifier.doi","10.1007/s10664-020-09880-1"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/69416"],["dc.language.iso","en"],["dc.relation.issn","1382-3256"],["dc.relation.issn","1573-7616"],["dc.relation.orgunit","Institut für Informatik"],["dc.title","A longitudinal study of static analysis warning evolution and the effects of PMD on software quality in Apache open source projects"],["dc.type","journal_article"],["dc.type.internalPublication","yes"],["dc.type.subtype","original_ja"],["dspace.entity.type","Publication"]]
    Details DOI
  • 2019Journal Article Erratum
    [["dc.bibliographiccitation.firstpage","632"],["dc.bibliographiccitation.issue","6"],["dc.bibliographiccitation.journal","IEEE Transactions on Software Engineering"],["dc.bibliographiccitation.lastpage","636"],["dc.bibliographiccitation.volume","45"],["dc.contributor.author","Herbold, Steffen"],["dc.contributor.author","Trautsch, Alexander"],["dc.contributor.author","Grabowski, Jens"],["dc.date.accessioned","2020-12-10T18:26:23Z"],["dc.date.available","2020-12-10T18:26:23Z"],["dc.date.issued","2019"],["dc.identifier.doi","10.1109/TSE.32"],["dc.identifier.eissn","1939-3520"],["dc.identifier.eissn","2326-3881"],["dc.identifier.issn","0098-5589"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/76064"],["dc.notes.intern","DOI Import GROB-354"],["dc.relation.iserratumof","/handle/2/76062"],["dc.title","Correction of “A Comparative Study to Benchmark Cross-Project Defect Prediction Approaches”"],["dc.type","journal_article"],["dc.type.internalPublication","yes"],["dc.type.subtype","erratum_ja"],["dspace.entity.type","Publication"]]
    Details DOI
  • 2020Journal Article Erratum
    [["dc.bibliographiccitation.firstpage","5370"],["dc.bibliographiccitation.issue","6"],["dc.bibliographiccitation.journal","Empirical Software Engineering"],["dc.bibliographiccitation.lastpage","5373"],["dc.bibliographiccitation.volume","25"],["dc.contributor.author","Herbold, Steffen"],["dc.contributor.author","Trautsch, Alexander"],["dc.contributor.author","Trautsch, Fabian"],["dc.date.accessioned","2021-04-14T08:32:08Z"],["dc.date.available","2021-04-14T08:32:08Z"],["dc.date.issued","2020"],["dc.identifier.doi","10.1007/s10664-020-09888-7"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/83824"],["dc.language.iso","en"],["dc.notes.intern","DOI Import GROB-399"],["dc.relation.eissn","1573-7616"],["dc.relation.iserratumof","/handle/2/80971"],["dc.relation.issn","1382-3256"],["dc.title","Correction to: On the feasibility of automated prediction of bug and non-bug issues"],["dc.type","journal_article"],["dc.type.internalPublication","yes"],["dc.type.subtype","erratum_ja"],["dspace.entity.type","Publication"]]
    Details DOI
  • 2020Journal Article
    [["dc.bibliographiccitation.firstpage","5333"],["dc.bibliographiccitation.issue","6"],["dc.bibliographiccitation.journal","Empirical Software Engineering"],["dc.bibliographiccitation.lastpage","5369"],["dc.bibliographiccitation.volume","25"],["dc.contributor.author","Herbold, Steffen"],["dc.contributor.author","Trautsch, Alexander"],["dc.contributor.author","Trautsch, Fabian"],["dc.date.accessioned","2021-04-14T08:23:34Z"],["dc.date.available","2021-04-14T08:23:34Z"],["dc.date.issued","2020"],["dc.identifier.doi","10.1007/s10664-020-09885-w"],["dc.identifier.uri","https://resolver.sub.uni-goettingen.de/purl?gro-2/80971"],["dc.language.iso","en"],["dc.notes.intern","DOI Import GROB-399"],["dc.relation.eissn","1573-7616"],["dc.relation.haserratum","/handle/2/83824"],["dc.relation.issn","1382-3256"],["dc.title","On the feasibility of automated prediction of bug and non-bug issues"],["dc.type","journal_article"],["dc.type.internalPublication","yes"],["dspace.entity.type","Publication"]]
    Details DOI