Initial recut of data import
This commit is contained in:
		
							parent
							
								
									6162bd984f
								
							
						
					
					
						commit
						75829eb4a1
					
				
					 5 changed files with 69 additions and 302 deletions
				
			
		
							
								
								
									
										69
									
								
								ccdb/utils/data.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								ccdb/utils/data.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,69 @@
 | 
			
		|||
import sqlite3
 | 
			
		||||
import os
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from django.conf import settings
 | 
			
		||||
 | 
			
		||||
import requests
 | 
			
		||||
import dateutil.parser as dp
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_data_sources():
 | 
			
		||||
    manifest_url = settings.MANIFEST_URL
 | 
			
		||||
    if not manifest_url:
 | 
			
		||||
        return None
 | 
			
		||||
    data_dir = 'data/'
 | 
			
		||||
    if not os.path.exists(data_dir):
 | 
			
		||||
        os.makedirs(data_dir)
 | 
			
		||||
        _fetch_data(data_dir, manifest_url)
 | 
			
		||||
    return {
 | 
			
		||||
        'db0': _get_db0(),
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _fetch_data(data_dir, url):
 | 
			
		||||
    manifest = _filename(data_dir, url)
 | 
			
		||||
    if not os.path.exists(manifest):
 | 
			
		||||
        _write_url(url, manifest)
 | 
			
		||||
    with open(manifest) as data:
 | 
			
		||||
        d = json.load(data)
 | 
			
		||||
        for f in d['files']:
 | 
			
		||||
            p = _filename(data_dir, f)
 | 
			
		||||
            if not os.path.exists(p):
 | 
			
		||||
                _write_url(f, p)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _filename(data_dir, url):
 | 
			
		||||
    return ''.join([data_dir, url.split('/')[-1]])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _write_url(url, filename):
 | 
			
		||||
    r = requests.get(url, stream=True)
 | 
			
		||||
    with open(filename, 'wb') as outfile:
 | 
			
		||||
        for chunk in r:
 | 
			
		||||
            outfile.write(chunk)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _get_db0():
 | 
			
		||||
    dbfile = 'data/Replica_Hibernators_Back_UAF_Laptop_29_June_2015.sqlite'
 | 
			
		||||
    return setup_sqlite(dbfile)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def dtdt(s):
 | 
			
		||||
    """
 | 
			
		||||
        This lets us parse whatever crazy date/time formats that
 | 
			
		||||
        come our way (looking at you, MS Access)
 | 
			
		||||
    """
 | 
			
		||||
    return dp.parse(s)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
sqlite3.register_converter("dtdt", dtdt)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def setup_sqlite(dbfile):
 | 
			
		||||
    if os.path.exists(dbfile):
 | 
			
		||||
        db = sqlite3.connect(dbfile, detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES)
 | 
			
		||||
        db.row_factory = sqlite3.Row
 | 
			
		||||
        return db.cursor()
 | 
			
		||||
    else:
 | 
			
		||||
        return None
 | 
			
		||||
| 
						 | 
				
			
			@ -1,25 +0,0 @@
 | 
			
		|||
import sqlite3
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
import dateutil.parser as dp
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def dtdt(s):
 | 
			
		||||
    """
 | 
			
		||||
        This lets us parse whatever crazy date/time formats that
 | 
			
		||||
        come our way (looking at you, MS Access)
 | 
			
		||||
    """
 | 
			
		||||
    return dp.parse(s)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
sqlite3.register_converter("dtdt", dtdt)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def setup_sqlite():
 | 
			
		||||
    dbfile = 'data/CC_Database_101314.sqlite'
 | 
			
		||||
    if os.path.exists(dbfile):
 | 
			
		||||
        db = sqlite3.connect(dbfile, detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
 | 
			
		||||
        db.row_factory = sqlite3.Row
 | 
			
		||||
        return db.cursor()
 | 
			
		||||
    else:
 | 
			
		||||
        return None
 | 
			
		||||
| 
						 | 
				
			
			@ -1,277 +0,0 @@
 | 
			
		|||
import os
 | 
			
		||||
 | 
			
		||||
from django.core.management.base import BaseCommand
 | 
			
		||||
from django.db import IntegrityError
 | 
			
		||||
 | 
			
		||||
import requests
 | 
			
		||||
 | 
			
		||||
from ccdb.utils.data_import import setup_sqlite
 | 
			
		||||
from ccdb.projects.models import Project, Grant, GrantReport
 | 
			
		||||
from ccdb.misc.models import MeasurementUnit, MeasurementType, Container, \
 | 
			
		||||
    Material, Color
 | 
			
		||||
from ccdb.locations.models import Region, Site, MunicipalLocation, \
 | 
			
		||||
    StudyLocation, StorageLocation
 | 
			
		||||
from ccdb.species.models import Species, CollectionSpecies
 | 
			
		||||
from ccdb.processing.models import ProcessType, Reagent, \
 | 
			
		||||
    Flaw as ProcessingFlaw,Processing
 | 
			
		||||
from ccdb.collections_ccdb.models import CollectionType, CollectionMethod, \
 | 
			
		||||
    Flaw as CollectionFlaw, ADFGPermit, Collection
 | 
			
		||||
from ccdb.experiments.models import Flaw as ExperimentFlaw, Experiment, \
 | 
			
		||||
    ProtocolAttachment, TreatmentType, Treatment, TreatmentReplicate, \
 | 
			
		||||
    AliveDeadCount
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Command(BaseCommand):
 | 
			
		||||
    help = 'Imports prior data into the DB'
 | 
			
		||||
 | 
			
		||||
    def add_arguments(self, parser):
 | 
			
		||||
        parser.add_argument('manifest_url', type=str)
 | 
			
		||||
 | 
			
		||||
    def handle(self, **options):
 | 
			
		||||
        _fetch_data(options['manifest_url'], self.stdout.write)
 | 
			
		||||
        self.stdout.write('Fetched data')
 | 
			
		||||
        _import_admin_data()
 | 
			
		||||
        self.stdout.write('Imported data')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _fetch_data(url, write):
 | 
			
		||||
    data_dir = 'data/'
 | 
			
		||||
    r = requests.get(url)
 | 
			
		||||
    files = r.json()
 | 
			
		||||
    if not os.path.exists(data_dir):
 | 
			
		||||
        os.makedirs(data_dir)
 | 
			
		||||
    for f in files['files']:
 | 
			
		||||
        p = ''.join([data_dir, f.split('/')[-1]])
 | 
			
		||||
        if not os.path.exists(p):
 | 
			
		||||
            write('Grabbing {}'.format(p))
 | 
			
		||||
            r = requests.get(f, stream=True)
 | 
			
		||||
            with open(p, 'wb') as out_file:
 | 
			
		||||
                for chunk in r:
 | 
			
		||||
                    out_file.write(chunk)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _import_admin_data():
 | 
			
		||||
    c = setup_sqlite()
 | 
			
		||||
    if c:
 | 
			
		||||
        # Projects
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_lu_projects;'):
 | 
			
		||||
            p = Project(id=r[0], name=r[1], code=r[2], iacuc_number=r[3],
 | 
			
		||||
                description=r[4], sort_order=r[5])
 | 
			
		||||
            p.save()
 | 
			
		||||
 | 
			
		||||
        # Grants
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_lu_grants;'):
 | 
			
		||||
            g = Grant(id=r[0], title=r[1], code=r[2],
 | 
			
		||||
                description=r[3], sort_order=r[4])
 | 
			
		||||
            g.save()
 | 
			
		||||
 | 
			
		||||
        # Project-Grants
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_hash_project_grants;'):
 | 
			
		||||
            p = Project.objects.get(id=r[0])
 | 
			
		||||
            g = Grant.objects.get(id=r[1])
 | 
			
		||||
            p.grants.add(g)
 | 
			
		||||
            p.save()
 | 
			
		||||
 | 
			
		||||
        # Grant Reports
 | 
			
		||||
        q = '''
 | 
			
		||||
               SELECT *, report_due_date AS "due_date [dtdt]"
 | 
			
		||||
               FROM tbl_lu_grant_reports;
 | 
			
		||||
            '''
 | 
			
		||||
        for r in c.execute(q):
 | 
			
		||||
            # No PK field in Andre's file
 | 
			
		||||
            gr = GrantReport(grant_id=r[0], title=r[1], report_type=r[2],
 | 
			
		||||
                description=r[3], due_date=r[8], submitted_date=r[5],
 | 
			
		||||
                attachment=r[6], sort_order=r[7])
 | 
			
		||||
            try:
 | 
			
		||||
                gr.save()
 | 
			
		||||
            except IntegrityError:
 | 
			
		||||
                pass
 | 
			
		||||
 | 
			
		||||
        # Measurement Units
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_lu_measurement_units;'):
 | 
			
		||||
            mu = MeasurementUnit(id=r[0], name=r[1], code=r[2],
 | 
			
		||||
                unit_class=r[3], description=r[4], sort_order=r[5])
 | 
			
		||||
            mu.save()
 | 
			
		||||
 | 
			
		||||
        # Measurement Types
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_lu_measurement_types;'):
 | 
			
		||||
            mt = MeasurementType(id=r[0], name=r[1], code=r[2],
 | 
			
		||||
                measurement_type_class=r[3], description=r[4],
 | 
			
		||||
                default_measurement_unit_id=r[5], sort_order=r[6])
 | 
			
		||||
            mt.save()
 | 
			
		||||
 | 
			
		||||
        # Materials
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_lu_materials;'):
 | 
			
		||||
            m = Material(id=r[0], name=r[1], code=r[2], material_class=r[3],
 | 
			
		||||
                description=r[4], sort_order=r[5])
 | 
			
		||||
            m.save()
 | 
			
		||||
 | 
			
		||||
        # Colors
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_lu_colors;'):
 | 
			
		||||
            cl = Color(id=r[0], name=r[1], code=r[2],
 | 
			
		||||
                color_number=r[3], sort_order=r[4])
 | 
			
		||||
            cl.save()
 | 
			
		||||
 | 
			
		||||
        # Containers
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_lu_containers;'):
 | 
			
		||||
            cl = Container(id=r[0], name=r[1], code=r[2], application=r[3],
 | 
			
		||||
                color_id=r[4], material_id=r[5], volume=r[6],
 | 
			
		||||
                measurement_unit_id=r[7], sort_order=r[8])
 | 
			
		||||
            cl.save()
 | 
			
		||||
 | 
			
		||||
        # Regions
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_lu_regions;'):
 | 
			
		||||
            re = Region(id=r[0], name=r[1], code=r[2], sort_order=r[3])
 | 
			
		||||
            re.save()
 | 
			
		||||
 | 
			
		||||
        # Site
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_lu_sites;'):
 | 
			
		||||
            s = Site(region_id=r[0], id=r[1], name=r[2], code=r[3],
 | 
			
		||||
                description=r[4], sort_order=r[5])
 | 
			
		||||
            s.save()
 | 
			
		||||
 | 
			
		||||
        # Municipal Locations
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_lu_municipal_locations;'):
 | 
			
		||||
            ml = MunicipalLocation(id=r[1], name=r[2], code=r[3],
 | 
			
		||||
                municipal_location_type=r[4], description=r[5], sort_order=r[6])
 | 
			
		||||
            ml.save()
 | 
			
		||||
 | 
			
		||||
        # Study Locations
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_lu_study_locations;'):
 | 
			
		||||
            sl = StudyLocation(site_id=r[0], id=r[1], name=r[2], code=r[3],
 | 
			
		||||
                study_location_type=r[4], treatment_type=r[5],
 | 
			
		||||
                municipal_location_id=r[6], collecting_location=r[7],
 | 
			
		||||
                description=r[13], sort_order=r[14])
 | 
			
		||||
            sl.save()
 | 
			
		||||
 | 
			
		||||
        # Storage Location
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_lu_storage_locations;'):
 | 
			
		||||
            bldg = "".join(e[0].upper() for e in r[2].split())
 | 
			
		||||
            temp_c = '20'
 | 
			
		||||
            if r[5]:
 | 
			
		||||
                temp_c = r[5]
 | 
			
		||||
            freezer = 'No Freezer'
 | 
			
		||||
            if r[4]:
 | 
			
		||||
                freezer = r[4]
 | 
			
		||||
            code = " ".join([bldg, str(temp_c)+'C', str(freezer)])
 | 
			
		||||
            sl = StorageLocation(id=r[0], facility=r[1], building=r[2],
 | 
			
		||||
                room=r[3], freezer=r[4], temp_c=r[5], code=code,
 | 
			
		||||
                description=r[6], sort_order=r[7])
 | 
			
		||||
            sl.save()
 | 
			
		||||
 | 
			
		||||
        # Species
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_lu_species;'):
 | 
			
		||||
            s = Species(id=r[0], common_name=r[1], genus=r[2], species=r[3],
 | 
			
		||||
                parasite=r[4], sort_order=r[5])
 | 
			
		||||
            s.save()
 | 
			
		||||
 | 
			
		||||
        # Processing Type
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_lu_process_types;'):
 | 
			
		||||
            pt = ProcessType(id=r[0], name=r[1], code=r[2], description=r[3],
 | 
			
		||||
                sort_order=r[4])
 | 
			
		||||
            pt.save()
 | 
			
		||||
 | 
			
		||||
        # Reagent
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_lu_reagents;'):
 | 
			
		||||
            rg = Reagent(id=r[0], name=r[1], code=r[2], reagent_class=r[3],
 | 
			
		||||
                sort_order=r[4])
 | 
			
		||||
            rg.save()
 | 
			
		||||
 | 
			
		||||
        # Collection Type
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_lu_collection_types;'):
 | 
			
		||||
            ct = CollectionType(id=r[0], name=r[1], code=r[2], sort_order=r[3])
 | 
			
		||||
            ct.save()
 | 
			
		||||
 | 
			
		||||
        # Collection Method
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_lu_collection_methods;'):
 | 
			
		||||
            cm = CollectionMethod(id=r[0], name=r[1], code=r[2],
 | 
			
		||||
                collection_method_class=r[3], sort_order=r[4])
 | 
			
		||||
            cm.save()
 | 
			
		||||
 | 
			
		||||
        # Collection
 | 
			
		||||
        for r in c.execute('''
 | 
			
		||||
               SELECT *,
 | 
			
		||||
                 collection_start_date AS "collection_start_date [dtdt]",
 | 
			
		||||
                 collection_start_time AS "collection_start_time [dtdt]",
 | 
			
		||||
                 collection_end_date   AS "collection_end_date [dtdt]",
 | 
			
		||||
                 collection_end_time   AS "collection_end_time [dtdt]"
 | 
			
		||||
               FROM tbl_collections;
 | 
			
		||||
            '''):
 | 
			
		||||
            if r[14] is not '':
 | 
			
		||||
                permit, _ = ADFGPermit.objects.get_or_create(name=r[14])
 | 
			
		||||
            else:
 | 
			
		||||
                permit = None
 | 
			
		||||
            col = Collection(project_id=r[0], id=r[1], study_location_id=r[2],
 | 
			
		||||
                collection_type_id=r[3], collection_method_id=r[4],
 | 
			
		||||
                number_of_traps=r[5], collection_start_date=r[17],
 | 
			
		||||
                collection_start_time=r[18], collection_end_date=r[19],
 | 
			
		||||
                collection_end_time=r[20], storage_location_id=r[10],
 | 
			
		||||
                specimen_state=r[11], process_type_id=r[12], reagent_id=r[13],
 | 
			
		||||
                adfg_permit=permit)
 | 
			
		||||
            col.save()
 | 
			
		||||
 | 
			
		||||
        # Collection Species
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_hash_collection_species;'):
 | 
			
		||||
            # No PK field in Andre's file
 | 
			
		||||
            cs = CollectionSpecies(collection_id=r[0], species_id=r[1],
 | 
			
		||||
                sex=r[2], count=r[3], count_estimated=r[4])
 | 
			
		||||
            try:
 | 
			
		||||
                cs.save()
 | 
			
		||||
            except IntegrityError:
 | 
			
		||||
                pass
 | 
			
		||||
 | 
			
		||||
        # Experiment
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_lu_experiments;'):
 | 
			
		||||
            e = Experiment(id=r[0], name=r[1], code=r[2],
 | 
			
		||||
                description=r[3], sort_order=r[6])
 | 
			
		||||
            e.save()
 | 
			
		||||
 | 
			
		||||
        # Treatment Type
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_lu_treatment_types;'):
 | 
			
		||||
            tt = TreatmentType(experiment_id=r[0], id=r[1], name=r[2], code=r[3],
 | 
			
		||||
                treatment_type=r[4], placement=r[5], description=r[6])
 | 
			
		||||
            tt.save()
 | 
			
		||||
 | 
			
		||||
        # Treatment
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_treatments;'):
 | 
			
		||||
            t = Treatment(id=r[0], treatment_type_id=r[1], container_id=r[2],
 | 
			
		||||
                study_location_id=r[3], species_id=r[4], sex=r[5])
 | 
			
		||||
            t.save()
 | 
			
		||||
 | 
			
		||||
        # Treatment Replicate
 | 
			
		||||
        for r in c.execute('''
 | 
			
		||||
                SELECT *, setup_date AS "setup_date [dtdt]"
 | 
			
		||||
                FROM tbl_treatment_replicates tr
 | 
			
		||||
                LEFT OUTER JOIN tbl_lu_record_flaws f ON f.flawid=tr.flawid;
 | 
			
		||||
            '''):
 | 
			
		||||
            if r[7]:
 | 
			
		||||
                flaw, _ = ExperimentFlaw.objects.get_or_create(name=r[10])
 | 
			
		||||
            else:
 | 
			
		||||
                flaw = None
 | 
			
		||||
            tr = TreatmentReplicate(treatment_id=r[0], id=r[1], name=r[2],
 | 
			
		||||
                setup_date=r[13], setup_sample_size=r[5], mass_g=r[6], flaw=flaw)
 | 
			
		||||
            tr.save()
 | 
			
		||||
 | 
			
		||||
        # Alive-Dead Count
 | 
			
		||||
        for r in c.execute('''
 | 
			
		||||
                SELECT *,
 | 
			
		||||
                  status_date AS "status_date [dtdt]",
 | 
			
		||||
                  status_time AS "status_time [dtdt]"
 | 
			
		||||
                FROM tbl_alive_dead_counts adc
 | 
			
		||||
                LEFT OUTER JOIN tbl_lu_record_flaws f ON f.flawid=adc.flawid;
 | 
			
		||||
            '''):
 | 
			
		||||
            if r[6]:
 | 
			
		||||
                flaw, _ = ExperimentFlaw.objects.get_or_create(name=r[9])
 | 
			
		||||
            else:
 | 
			
		||||
                flaw = None
 | 
			
		||||
            adc = AliveDeadCount(treatment_replicate_id=r[0], id=r[1],
 | 
			
		||||
                status_date=r[12], status_time=r[13], count_alive=r[4],
 | 
			
		||||
                count_dead=r[5], flaw=flaw)
 | 
			
		||||
            adc.save()
 | 
			
		||||
 | 
			
		||||
        # Experiment-Collection
 | 
			
		||||
        for r in c.execute('SELECT * FROM tbl_hash_collection_experiments;'):
 | 
			
		||||
            c = Collection.objects.get(id=r[0])
 | 
			
		||||
            e = Experiment.objects.get(id=r[1])
 | 
			
		||||
            e.collections.add(c)
 | 
			
		||||
            e.save()
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue