@mastersthesis {2021, title = {Occasional Groups in Crowdsourcing Platforms}, year = {2021}, month = {7/2021}, school = {Syracuse University}, type = {Doctoral Thesis}, address = {Syracuse, NY, USA}, abstract = {Contributors to online crowdsourcing systems generally work independently on pieces of the product but in some cases, task interdependencies may require collaboration to develop a final product. These collaborations though take a distinctive form because of the nature of crowdsourced work. Collaboration may be implicit instead of explicit. Individuals engaged in a group conversation may not stay with the group for long, i.e., the group is an {\textquotedblleft}occasional group.{\textquotedblright} Occasional group interactions are often not well supported by systems, as they are not designed for team work. This dissertation examines the characteristics and work of occasional groups in the Gravity Spy citizen science project. Occasional groups in this system form to reach agreement about the description of novel categories of data that volunteers identify in the system. The author first employed virtual ethnography over six months to investigate volunteers{\textquoteright} interactions and to identify features of the occasional groups in this setting. Most groups were transient, interacting only for a short time to develop one product, but a few worked together repeatedly. To describe the overall process of finding new categories brings individuals to work together, the author interviewed nine active volunteers about their work practices. Volunteers individually or collectively use tools such as hashtags, collections and a search tool to identify examples of a new category and to agree on a name and description. Finally, the author investigated the details of the processes of developing proposals for four new categories over three years. She employed virtual and trace ethnography to collect messages from several discussion threads and boards to identify the analytical moves made by occasional group members in developing a new category. Volunteers would speculate on a new pattern and its causes, discuss how different categories are related and split or merge descriptions. They employed techniques such as detailed descriptions of data to create common ground, @-mention of other volunteers to increase the visibility of their work to each other and use of the category proposal as a vehicle to coordinate their actions. Findings contribute to the group literature by recognizing that groups with no formal formation and work processes are capable of doing work that would not otherwise be possible. The results advance our understanding of group categorization literature by showing how the analytical moves are different when group members work occasionally. The thesis also provides some suggestions for better support of occasional groups in crowdsourcing platforms.

}, attachments = {https://crowston.syr.edu/sites/crowston.syr.edu/files/Dissertation_MH.pdf}, author = {Mahboobeh Harandi} } @proceedings {2018, title = {The Genie in the Bottle: Different Stakeholders, Different Interpretations of Machine Learning}, year = {2020}, type = {Working paper}, address = {Wailea, HI}, abstract = {

Machine learning (ML) constitute an algorithmic phenomenon with some distinctive characteristics (e.g., being trained, probabilistic). Our understanding of such systems is limited when it comes to how these unique characteristics play out in organizational settings and what challenges different groups of users will face in working with them. We explore how people developing or using an ML system come to understand its capabilities and challenges. We draw on the social construction of technology tradition to frame our analysis of interviews and discussion board posts involving designers and users of a ML-supported citizen-science crowdsourcing project named Gravity Spy. Our findings reveal some of the challenges facing different relevant social groups. We find that groups with less interaction with the technology have their understanding. We find that the type of understandings achieved by groups having less interaction with the technology is shaped by outside influences rather than the specifics of the system and its role in the project. Notable, some users mistake human input for ML input. This initial understanding of how different participants understand and engage with ML point to challenges that need to be overcome to help participants deal with the opaque position ML often hold in a work system.

}, doi = {10.24251/HICSS.2020.719 }, attachments = {https://crowston.syr.edu/sites/crowston.syr.edu/files/Social_Construction_of_ML_in_GS_HICCS2020.pdf}, author = {Mahboobeh Harandi and Kevin Crowston and Corey Jackson and Carsten {\O}sterlund} } @article {9998, title = {Knowledge Tracing to Model Learning in Online Citizen Science Projects}, journal = {IEEE Transactions on Learning Technologies}, volume = {13}, year = {2020}, pages = {123-134}, abstract = {

We present the design of a citizen science system that uses machine learning to guide the presentation of image classification tasks to newcomers to help them more quickly learn how to do the task while still contributing to the work of the project. A Bayesian model for tracking volunteer learning for training with tasks with uncertain outcomes is presented and fit to data from 12,986 volunteer contributors. The model can be used both to estimate the ability of volunteers and to decide the classification of an image. A simulation of the model applied to volunteer promotion and image retirement suggests that the model requires fewer classifications than the current system.

}, issn = {1939-1382}, doi = {10.1109/TLT.2019.2936480 }, attachments = {https://crowston.syr.edu/sites/crowston.syr.edu/files/transaction\%20paper\%20final\%20figures\%20in\%20text.pdf}, author = {Kevin Crowston and Carsten {\O}sterlund and Tae Kyoung Lee and Corey Brian Jackson and Mahboobeh Harandi and Sarah Allen and Sara Bahaadini and Scott Coughlin and Aggelos Katsaggelos and Shane Larson and Neda Rohani and Joshua Smith and Laura Trouille and Michael Zevin} } @article {9999, title = {Shifting forms of Engagement: Volunteer Learning in Online Citizen Science}, journal = {Proceedings of the ACM on Human-Computer Interaction}, year = {2020}, pages = {36}, abstract = {

Open collaboration platforms involve people in many tasks, from editing articles to analyzing datasets. To facilitate mastery of these practices, communities offer a number of learning resources, ranging from project-defined FAQs to individually-oriented search tools and communal discussion boards. However, it is not clear which project resources best support participant learning, overall and at different stages of engagement with the project. We draw on S{\o}rensen{\textquoteright}s framework of forms of presence to distinguish three forms of engagement with learning resources: authoritative, agent-centered and communal. We analyzed trace data from the GravitySpy citizen-science project using a mixed-effects logistic regression with volunteer performance as an outcome variable. The findings suggest that engagement with authoritative resources (e.g., those constructed by project organizers) facilitates performance initially. However, as tasks become more difficult, volunteers seek and benefit from engagement with their own agent-centered resources and community generated resources. These findings suggest a broader scope for the design of learning resources for online communities.

}, doi = {10.1145/3392841}, attachments = {https://crowston.syr.edu/sites/crowston.syr.edu/files/3392841.pdf}, author = {Corey Brian Jackson and Carsten {\O}sterlund and Mahboobeh Harandi and Kevin Crowston and Laura Trouille} } @article {9999, title = {Teaching Citizen Scientists to Categorize Glitches using Machine-Learning-Guided Training}, journal = {Computers in Human Behavior}, volume = {105}, year = {2020}, pages = {106198}, abstract = {

Training users in online communities is important for making high performing contributors. However, several conundrums exists in choosing the most effective approaches to training users. For example, if it takes time to learn to do the task correctly, then the initial contributions may not be of high enough quality to be useful. We conducted an online field experiment where we recruited users (N = 386) in a web-based citizen-science project to evaluate the two training approaches. In one training regime, users received one-time training and were asked to learn and apply twenty classes to the data. In the other approach, users were gradually exposed to classes of data that were selected by trained machine learning algorithms as being members of particular classes. The results of our analysis revealed that the gradual training produced {\textquotedblleft}high performing contributors{\textquotedblright}. In our comparison of the treatment and control groups we found users who experienced gradual training performed significantly better on the task (an average accuracy of 90\% vs. 54\%), contributed more work (an average of 228 vs. 121 classifications), and were retained in the project for a longer period of time (an average of 2.5 vs. 2 sessions). The results suggests online production communities seeking to train newcomers would benefit from training regimes that gradually introduce them to the work of the project using real tasks.

}, doi = {10.1016/j.chb.2019.106198}, attachments = {https://crowston.syr.edu/sites/crowston.syr.edu/files/MLGT-preprint.pdf}, author = {Corey Jackson and Carsten {\O}sterlund and Kevin Crowston and Mahboobeh Harandi and Sarah Allen and Sara Bahaadini and Scott Coughlin and Vicky Kalogera and Aggelos Katsaggelos and Shane Larson and Neda Rohani and Joshua Smith and Laura Trouille and Michael Zevin} } @article {2019, title = {Classifying the unknown: Discovering novel gravitational-wave detector glitches using similarity learning}, journal = {Physical Review D}, volume = {99}, year = {2019}, pages = {082002}, abstract = {

The observation of gravitational waves from compact binary coalescences by LIGO and Virgo has begun a new era in astronomy. A critical challenge in making detections is determining whether loud transient features in the data are caused by gravitational waves or by instrumental or environmental sources. The citizen-science project Gravity Spy has been demonstrated as an efficient infrastructure for classifying known types of noise transients (glitches) through a combination of data analysis performed by both citizen volunteers and machine learning. We present the next iteration of this project, using similarity indices to empower citizen scientists to create large data sets of unknown transients, which can then be used to facilitate supervised machine-learning characterization. This new evolution aims to alleviate a persistent challenge that plagues both citizen-science and instrumental detector work: the ability to build large samples of relatively rare events. Using two families of transient noise that appeared unexpectedly during LIGO{\textquoteright}s second observing run, we demonstrate the impact that the similarity indices could have had on finding these new glitch types in the Gravity Spy program.

}, issn = {2470-0010}, doi = {10.1103/PhysRevD.99.082002}, author = {Scott Coughlin and Sara Bahaadini and Neda Rohani and Michael Zevin and Patane, Oli and Mahboobeh Harandi and Corey Brian Jackson and Noroozi, V. and Sarah Allen and Areeda, J. and Coughlin, M. and Ruiz, P. and Berry, C. P. L. and Kevin Crowston and Aggelos Katsaggelos and Andrew Lundgren and Carsten {\O}sterlund and Joshua Smith and Laura Trouille and Vicky Kalogera} } @proceedings {9999, title = {Linguistic adoption in online citizen science: A structurational perspective}, year = {2019}, address = {Munich, Germany}, abstract = {

For peer-production projects to be successful, members must develop a specific and common language that enables them to cooperate. We address the question of what factors affect the development of shared language in open peer production communities? Answering this question is important because we want the communities to be productive even when self-managed, which requires understanding how shared language emerges. We examine this question using a structurational lens in the setting of a citizen science project. Examining the use of words in the Gravity Spy citizen science project, we find that many words are reused and that most novel words that are introduced are not picked up, showing reproduction of structure. However, some novel words are used by others, showing an evolution of the structure. Participants with roles closer to the science are more likely to have their words reused, showing the mutually reinforcing nature of structures of signification, legitimation and domination.

}, url = {https://aisel.aisnet.org /icis2019/crowds_social/crowds_social/28/}, attachments = {https://crowston.syr.edu/sites/crowston.syr.edu/files/Linguistic\%20Adoption\%20\%28ICIS\%29\%20final.pdf}, author = {Corey Brian Jackson and Carsten {\O}sterlund and Mahboobeh Harandi and Dhruv Kharwar and Kevin Crowston} } @article {9999, title = {Appealing to different motivations in a message to recruit citizen scientists: results of a field experiment}, journal = {Journal of Science Communication}, volume = {17}, year = {2018}, chapter = {A02}, abstract = {

This study examines the relative efficacy of citizen science recruitment messages appealing to four motivations that were derived from previous research on motives for participation in citizen-science projects. We report on an experiment (N=36,513) that compared the response to email messages designed to appeal to these four motives for participation. We found that the messages appealing to the possibility of contributing to science and learning about science attracted more attention than did one about helping scientists but that one about helping scientists generated more initial contributions. Overall, the message about contributing to science resulted in the largest volume of contributions and joining a community, the lowest. The results should be informative to those managing citizen-science projects.

}, keywords = {Citizen Science}, doi = {10.22323/2.17010202}, url = {https://jcom.sissa.it/archive/17/01/JCOM_1701_2018_A02}, attachments = {https://crowston.syr.edu/sites/crowston.syr.edu/files/JCOM_1701_2018_A02.pdf}, author = {Lee, Tae Kyoung and Kevin Crowston and Mahboobeh Harandi and Carsten {\O}sterlund and Grant Miller} } @article {9999, title = {Folksonomies to support coordination and coordination of folksonomies}, journal = {Computer Supported Cooperative Work}, volume = {27}, year = {2018}, pages = {647{\textendash}678}, abstract = {

Members of highly-distributed groups in online production communities face challenges in achieving coordinated action. Existing CSCW research highlights the importance of shared language and artifacts when coordinating actions in such settings. To better understand how such shared language and artifacts are, not only a guide for, but also a result of collaborative work we examine the development of folksonomies (i.e., volunteer-generated classification schemes) to support coordinated action. Drawing on structuration theory, we conceptualize a folksonomy as an interpretive schema forming a structure of signification. Our study is set in the context of an online citizen-science project, Gravity Spy, in which volunteers label "glitches" (noise events recorded by a scientific instrument) to identify and name novel classes of glitches. Through a multi-method study combining virtual and trace ethnography, we analyze folksonomies and the work of labelling as mutually constitutive, giving folksonomies a dual role: an emergent folksonomy supports the volunteers in labelling images at the same time that the individual work of labelling images supports the development of a folksonomy. However, our analysis suggests that the lack of supporting norms and authoritative resources (structures of legitimation and domination) undermines the power of the folksonomy and so the ability of volunteers to coordinate their decisions about naming novel glitch classes. These results have implications design. If we hope to support the development of emergent folksonomies online production communities need to facilitate 1) tag gardening, a process of consolidating overlapping terms of artifacts; 2) demarcate a clear home for discourses around folksonomy disagreements; 3) highlight clearly when decisions have been reached; and 4) inform others about those decisions.

}, doi = {10.1007/s10606-018-9327-z}, url = {https://rdcu.be/NZ7E}, attachments = {https://crowston.syr.edu/sites/crowston.syr.edu/files/ECSCW-Paper-Final.pdf}, author = {Corey Brian Jackson and Kevin Crowston and Carsten {\O}sterlund and Mahboobeh Harandi} }