title = {A {Behavioral} {Approach} to {Understanding} the {Git} {Experience}},
url = {http://hdl.handle.net/10125/71493},
doi = {10.24251/HICSS.2021.872},
abstract = {The Investigating and Archiving the Scholarly Git Experience (IASGE) project is multi-track study focused on understanding the uses of Git by students, faculty, and staff working in academic research institutions as well as the ways source code repositories and their associated contextual ephemera can be better preserved. This research, in turn, has implications regarding how to support Git in the scholarly process, how version control systems contribute to reproducibility, and how Library and Information Science (LIS) professionals can support Git through instruction and sustainability efforts. In this paper, we focus on a subset of our larger project and take a deep look at what code hosting platforms offer researchers in terms of productivity and collaboration. For this portion, a survey, focus groups, and user experience interviews were conducted to gain an understanding of how and why scholarly researchers use Version Control Systems (VCS) as well as some of the pain points in learning and using VCS for daily work.},
author = {Milliken, Genevieve and Nguyen, Sarah and Steeves, Vicky},
booktitle = {{Hawaii International Conference on System Sciences}},
title = {{ReproServer}: {Making} {Reproducibility} {Easier} and {Less} {Intensive}},
shorttitle = {{ReproServer}},
url = {http://arxiv.org/abs/1808.01406},
abstract = {Reproducibility in the computational sciences has been stymied because of the complex and rapidly changing computational environments in which modern research takes place. While many will espouse reproducibility as a value, the challenge of making it happen (both for themselves and testing the reproducibility of others' work) often outweigh the benefits. There have been a few reproducibility solutions designed and implemented by the community. In particular, the authors are contributors to ReproZip, a tool to enable computational reproducibility by tracing and bundling together research in the environment in which it takes place (e.g. one's computer or server). In this white paper, we introduce a tool for unpacking ReproZip bundles in the cloud, ReproServer. ReproServer takes an uploaded ReproZip bundle (.rpz file) or a link to a ReproZip bundle, and users can then unpack them in the cloud via their browser, allowing them to reproduce colleagues' work without having to install anything locally. This will help lower the barrier to reproducing others' work, which will aid reviewers in verifying the claims made in papers and reusing previously published research.},
journal = {arXiv:1808.01406 [cs]},
author = {Rampin, Remi and Chirigati, Fernando and Steeves, Vicky and Freire, Juliana},
title = {Reproducible experiments on dynamic resource allocation in cloud data centers},
volume = {59},
copyright = {All rights reserved},
issn = {03064379},
url = {https://arxiv.org/abs/1703.00042},
doi = {10.1016/j.is.2015.12.004},
abstract = {Abstract In Wolke et al. [1] we compare the efficiency of different resource allocation strategies experimentally. We focused on dynamic environments where virtual machines need to be allocated and deallocated to servers over time. In this companion paper, we describe the simulation framework and how to run simulations to replicate experiments or run new experiments within the framework.},
language = {en},
journal = {Information Systems},
author = {Wolke, Andreas and Bichler, Martin and Chirigati, Fernando and Steeves, Victoria},
abstract={{ReproZip is a tool aimed at simplifying the process of creatingreproducible experiments. After finishing an experiment, writing a website, constructinga database, or creating an interactive environment, users can run ReproZip to createreproducible packages, archival snapshots, and an easy way for reviewers to validate theirwork.}},
abstract = {{Over the past few years, research reproducibility has been increasingly highlighted as a multifaceted challenge across many disciplines. There are socio-cultural obstacles as well as a constantly changing technical landscape that make replicating and reproducing research extremely difficult. Researchers face challenges in reproducing research across different operating systems and different versions of software, to name just a few of the many technical barriers. The prioritization of citation counts and journal prestige has undermined incentives to make research reproducible. While libraries have been building support around research data management and digital scholarship, reproducibility is an emerging area that has yet to be systematically addressed. To respond to this, New York University (NYU) created the position of Librarian for Research Data Management and Reproducibility (RDM & R), a dual appointment between the Center for Data Science (CDS) and the Division of Libraries. This report will outline the role of the RDM & R librarian, paying close attention to the collaboration between the CDS and Libraries to bring reproducible research practices into the norm.}},
abstract={{Objective: This paper aims to inform on opportunities for librarians to assist faculty with research data management by examining practices and attitudes among life sciences faculty at a tier one research university. Methods: The authors issued a survey to estimate actual and perceived research data management needs of New York University (NYU) life sciences faculty in order to understand how the library could best contribute to the research life cycle. Results: Survey responses indicate that over half of the respondents were aware of publisher and funder mandates, and most are willing to share their data, but many indicated they do not utilize data repositories. Respondents were largely unaware of data services available through the library, but the majority were open to considering such services. Survey results largely mimic those of similar studies, in that storing data (and the subsequent ability to share it) is the most easily recognized barrier to sound data management practices. Conclusions: At NYU, as with other institutions, the library is not immediately recognized as a valuable partner in managing research output. This study suggests that faculty are largely unaware of, but are open to, existent library services, indicating that immediate outreach efforts should be aimed at promoting them.}},
abstract={{The adoption of reproducibility remains low, despite incentives becoming increasingly common in different domains, conferences, and journals. The truth is, reproducibility is technically difficult to achieve due to the complexities of computational environments. To address these technical challenges, we created ReproZip, an open-source tool that automatically packs research along with all the necessary information to reproduce it, including data files, software, OS version, and environment variables. Everything is then bundled into an rpz file, which users can use to reproduce the work with ReproZip and a suitable unpacker (e.g.: using Vagrant or Docker). The rpz file is general and contains rich metadata: more unpackers can be added as needed, better guaranteeing long-term preservation. However, installing the unpackers can still be burdensome for secondary users of ReproZip bundles. In this paper, we will discuss how ReproZip and our new tool, ReproServer, can be used together to facilitate access to well-preserved, reproducible work. ReproServer is a web application that allows users to upload or provide a link to a ReproZip bundle, and then interact with/reproduce the contents from the comfort of their browser. Users are then provided a persistent link to the unpacked work on ReproServer which they can share with reviewers or colleagues.}},
abstract={{Achieving research reproducibility is challenging in many ways: there are social and cultural obstacles as well as a constantly changing technical landscape that makes replicating and reproducing research difficult. Users face challenges in reproducing research across different operating systems, in using different versions of software across long projects and among collaborations, and in using publicly available work. The dependencies required to reproduce the computational environments in which research happens can be exceptionally hard to track – in many cases, these dependencies are hidden or nested too deeply to discover, and thus impossible to install on a new machine, which means adoption remains low. In this paper, we present ReproZip , an open source tool to help overcome the technical difficulties involved in preserving and replicating research, applications, databases, software, and more. We will examine the current use cases of ReproZip , ranging from digital humanities to machine learning. We also explore potential library use cases for ReproZip, particularly in digital libraries and archives, liaison librarianship, and other library services. We believe that libraries and archives can leverage ReproZip to deliver more robust reproducibility services, repository services, as well as enhanced discoverability and preservation of research materials, applications, software, and computational environments.}},
title = {Saving {Data} {Journalism}: {Using} {ReproZip}-{Web} to {Capture} {Dynamic} {Websites} for {Future} {Reuse}},
shorttitle = {Saving {Data} {Journalism}},
url = {https://osf.io/preprints/lissa/khtdr/},
doi = {10.31229/osf.io/khtdr},
abstract = {While dynamic and interactive Web applications are becoming increasingly common to convey news and stories to people all around the world, their technological complexity makes it hard to archive and preserve such applications, and as such, they are being lost. We present ReproZip-Web, an open-source prototype aimed at saving these news applications from extinction. ReproZip-Web leverages ReproZip, a computational reproducibility tool, and Webrecorder, a tool for recording Web resources, to automatically and transparently capture and replay dynamic Websites. The prototype creates a bundle that contains all the information needed to reproduce a news application, and its lightweight nature makes it ideal for distribution and preservation. We will present our ongoing work on the prototype, and also discuss some use cases and avenues for future development.},
publisher = {LIS Scholarship Archive},
booktitle = {iPres 2019},
author = {Boss, Katherine E. and Steeves, Vicky and Rampin, Remi and Chirigati, Fernando and Hoffman, Brian},
title = {What makes a digital steward: {A} competency profile based on the {National} {Digital} {Stewardship} {Residencies}},
shorttitle = {What makes a digital steward},
url = {https://osf.io/preprints/lissa/tnmra/},
booktitle = {{iPres} 2016},
doi = {10.31229/osf.io/tnmra},
abstract = {Digital stewardship is a rapidly maturing field within library and information science. This domain engages in the active and long-term management of digital objects towards their preservation for and unencumbered access by future generations. Although this field is growing quickly, it lacks a compentancy profile for practioners that is commonplace in LIS (example: the American Library Association's Core Compentencies of Librarianship). This study sought to fill that gap by creating a profile of the skills, responsibilities, and knowledge areas that define competency in digital stewardship, based on three key datasets: 1) literature in the field through a literature review (to define the scope of the profile), 2) NDSR project descriptions, qualitatively analyzed to get a baseline understanding of expected competencies 3) the results of a survey given to current and past NDSR residents, quantitively evaluated to illustrate competencies’ importance to professional success.},
publisher = {LIS Scholarship Archive},
author = {Blumenthal, Karl-Rainer and Griesinger, Peggy and Kim, Julia and Peltzman, Shira and Steeves, Vicky},
title = {What’s {Wrong} with {Digital} {Stewardship}: {Evaluating} the {Organization} of {Digital} {Preservation} {Programs} from {Practitioners}’ {Perspectives}},
volume = {7},
issn = {2380-8845},
shorttitle = {What’s {Wrong} with {Digital} {Stewardship}},
abstract={{The National Digital Stewardship Alliance surveyed practitioners in 2012 and again in 2017 to gauge, among other things, how satisfied they were with their organizations’ digital preservation function. This study seeks to understand what causes the high and rising levels of dissatisfaction that practitioners reported. We interviewed 21 digital stewards and asked them to describe the organizational context in which they work; to reflect on what aspects of their organizations’ approach to digital preservation are working well and which are not; and to evaluate necessary areas of improvement. We identified experiences that were common among participants using a qualitative research methodology based on phenomenology. These conversations revealed that practitioners largely consider digital stewardship values and goals to be misunderstood at an organizational level, and demonstrated that the absence of a long-term vision for digital stewardship disempowers practitioners.}},