Creating Fake ODK Data

@janna I usually hack together Python script to do this. For example, here is a post-covid-submissions.py script that I used to generate realistic submissions for the demo of WHO COVID-19 Contact Tracing Form.

It'd be awesome if someone could build a friendly web UI to generate fake data. @kayr's CLI tool would be a great place to start.


#!/usr/bin/env python
# -*- coding: utf-8 -*-

import uuid
import random
import requests

# template is from a minimal submission
template = "<?xml version='1.0' ?><data id=\"covid-19_A0\" version=\"2020032802\" xmlns:ev=\"http://www.w3.org/2001/xml-events\" xmlns:h=\"http://www.w3.org/1999/xhtml\" xmlns:jr=\"http://openrosa.org/javarosa\" xmlns:odk=\"http://www.opendatakit.org/xforms\" xmlns:orx=\"http://openrosa.org/xforms\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"><device_id>%s</device_id><start_time>%s</start_time><end_time>%s</end_time><case><case_id>%s</case_id><status>%s</status></case><data_collector><dc_name>%s</dc_name><dc_institution>%s</dc_institution></data_collector><case_info><sex>%s</sex></case_info><case_id_dob><age_years>%s</age_years><age_months>%s</age_months></case_id_dob><case_status>%s</case_status><symptoms_1><fever>%s</fever><sore_throat>%s</sore_throat><runny_nose>%s</runny_nose><cough>%s</cough></symptoms_1><symptoms_2><shortness_of_breath>%s</shortness_of_breath><vomiting>%s</vomiting><nausea>%s</nausea><diarrhoea>%s</diarrhoea></symptoms_2><meta><instanceID>%s</instanceID></meta></data>"

# bearer token is taken from the app user url
post_headers = {'Authorization': 'Bearer ABC123', 'Content-Type':'application/xml'}

yes_no = ["yes", "no"]

for i in range(0, 100):
   
    # generate fake data
    device_id = str("%015d" % random.randint(1, 999999999999999))
    start_time = "2020-03-29T" + str("%02d" % random.randint(0, 23)) + ":" + str("%02d" % random.randint(0, 59)) + ":" + str("%02d" % random.randint(0, 59))+ ".000-07:00"
    end_time = "2020-03-29T" + str("%02d" % random.randint(0, 23)) + ":" + str("%02d" % random.randint(0, 59)) + ":" + str("%02d" % random.randint(0, 59))+ ".000-07:00"
    case_id = str("%05d" % random.randint(1, 99999))
    status = random.sample(["alive", "dead"], 1)[0]
    dc_name = random.sample(["Alexander", "Alice", "Ayesha", "Benjamin", "Charlotte", "Do Yoon", "Emilia", "Emily", "Emma", "Francesco", "Gabriel", "Ha Yoon", "Hiroshi", "Hugo", "Jakob", "James", "Jose", "Junior", "Li", "Liam", "Louise", "Lucia", "Maria", "Mohammed", "Muhammed", "Noah", "Nozomi", "Oliver", "Olivia", "Precious", "Saanvi", "Sofia", "Sofie", "Tamar", "Wei", "William"], 1)[0]
    dc_institution = random.sample(["WHO", "CDC", "MOH", "Hospital", "Red Cross", "Clinic"], 1)[0]
    sex = random.sample(["male", "female"], 1)[0]
    age_years = random.randint(0, 99)
    age_months = random.randint(0, 11)
    case_status = random.sample(["suspected", "probable", "confirmed"], 1)[0]
    instanceid = uuid.uuid4()

    # insert data into template
    instance = template % (device_id, start_time, end_time, case_id, status, dc_name, dc_institution, sex, age_years, age_months, case_status, random.sample(yes_no, 1)[0], random.sample(yes_no, 1)[0], random.sample(yes_no, 1)[0], random.sample(yes_no, 1)[0], random.sample(yes_no, 1)[0], random.sample(yes_no, 1)[0], random.sample(yes_no, 1)[0], random.sample(yes_no, 1)[0], instanceid)

    # post to server
    result = requests.post("https://demo.example.com/v1/projects/1/forms/covid-19_A0/submissions", data = instance, headers = post_headers)
    # print(result)
7 Likes