Initial recut of data import
This commit is contained in:
parent
6162bd984f
commit
75829eb4a1
5 changed files with 69 additions and 302 deletions
69
ccdb/utils/data.py
Normal file
69
ccdb/utils/data.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
import sqlite3
|
||||
import os
|
||||
import json
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
import requests
|
||||
import dateutil.parser as dp
|
||||
|
||||
|
||||
def get_data_sources():
|
||||
manifest_url = settings.MANIFEST_URL
|
||||
if not manifest_url:
|
||||
return None
|
||||
data_dir = 'data/'
|
||||
if not os.path.exists(data_dir):
|
||||
os.makedirs(data_dir)
|
||||
_fetch_data(data_dir, manifest_url)
|
||||
return {
|
||||
'db0': _get_db0(),
|
||||
}
|
||||
|
||||
|
||||
def _fetch_data(data_dir, url):
|
||||
manifest = _filename(data_dir, url)
|
||||
if not os.path.exists(manifest):
|
||||
_write_url(url, manifest)
|
||||
with open(manifest) as data:
|
||||
d = json.load(data)
|
||||
for f in d['files']:
|
||||
p = _filename(data_dir, f)
|
||||
if not os.path.exists(p):
|
||||
_write_url(f, p)
|
||||
|
||||
|
||||
def _filename(data_dir, url):
|
||||
return ''.join([data_dir, url.split('/')[-1]])
|
||||
|
||||
|
||||
def _write_url(url, filename):
|
||||
r = requests.get(url, stream=True)
|
||||
with open(filename, 'wb') as outfile:
|
||||
for chunk in r:
|
||||
outfile.write(chunk)
|
||||
|
||||
|
||||
def _get_db0():
|
||||
dbfile = 'data/Replica_Hibernators_Back_UAF_Laptop_29_June_2015.sqlite'
|
||||
return setup_sqlite(dbfile)
|
||||
|
||||
|
||||
def dtdt(s):
|
||||
"""
|
||||
This lets us parse whatever crazy date/time formats that
|
||||
come our way (looking at you, MS Access)
|
||||
"""
|
||||
return dp.parse(s)
|
||||
|
||||
|
||||
sqlite3.register_converter("dtdt", dtdt)
|
||||
|
||||
|
||||
def setup_sqlite(dbfile):
|
||||
if os.path.exists(dbfile):
|
||||
db = sqlite3.connect(dbfile, detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES)
|
||||
db.row_factory = sqlite3.Row
|
||||
return db.cursor()
|
||||
else:
|
||||
return None
|
|
@ -1,25 +0,0 @@
|
|||
import sqlite3
|
||||
import os
|
||||
|
||||
import dateutil.parser as dp
|
||||
|
||||
|
||||
def dtdt(s):
|
||||
"""
|
||||
This lets us parse whatever crazy date/time formats that
|
||||
come our way (looking at you, MS Access)
|
||||
"""
|
||||
return dp.parse(s)
|
||||
|
||||
|
||||
sqlite3.register_converter("dtdt", dtdt)
|
||||
|
||||
|
||||
def setup_sqlite():
|
||||
dbfile = 'data/CC_Database_101314.sqlite'
|
||||
if os.path.exists(dbfile):
|
||||
db = sqlite3.connect(dbfile, detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
|
||||
db.row_factory = sqlite3.Row
|
||||
return db.cursor()
|
||||
else:
|
||||
return None
|
|
@ -1,277 +0,0 @@
|
|||
import os
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db import IntegrityError
|
||||
|
||||
import requests
|
||||
|
||||
from ccdb.utils.data_import import setup_sqlite
|
||||
from ccdb.projects.models import Project, Grant, GrantReport
|
||||
from ccdb.misc.models import MeasurementUnit, MeasurementType, Container, \
|
||||
Material, Color
|
||||
from ccdb.locations.models import Region, Site, MunicipalLocation, \
|
||||
StudyLocation, StorageLocation
|
||||
from ccdb.species.models import Species, CollectionSpecies
|
||||
from ccdb.processing.models import ProcessType, Reagent, \
|
||||
Flaw as ProcessingFlaw,Processing
|
||||
from ccdb.collections_ccdb.models import CollectionType, CollectionMethod, \
|
||||
Flaw as CollectionFlaw, ADFGPermit, Collection
|
||||
from ccdb.experiments.models import Flaw as ExperimentFlaw, Experiment, \
|
||||
ProtocolAttachment, TreatmentType, Treatment, TreatmentReplicate, \
|
||||
AliveDeadCount
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'Imports prior data into the DB'
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('manifest_url', type=str)
|
||||
|
||||
def handle(self, **options):
|
||||
_fetch_data(options['manifest_url'], self.stdout.write)
|
||||
self.stdout.write('Fetched data')
|
||||
_import_admin_data()
|
||||
self.stdout.write('Imported data')
|
||||
|
||||
|
||||
def _fetch_data(url, write):
|
||||
data_dir = 'data/'
|
||||
r = requests.get(url)
|
||||
files = r.json()
|
||||
if not os.path.exists(data_dir):
|
||||
os.makedirs(data_dir)
|
||||
for f in files['files']:
|
||||
p = ''.join([data_dir, f.split('/')[-1]])
|
||||
if not os.path.exists(p):
|
||||
write('Grabbing {}'.format(p))
|
||||
r = requests.get(f, stream=True)
|
||||
with open(p, 'wb') as out_file:
|
||||
for chunk in r:
|
||||
out_file.write(chunk)
|
||||
|
||||
|
||||
def _import_admin_data():
|
||||
c = setup_sqlite()
|
||||
if c:
|
||||
# Projects
|
||||
for r in c.execute('SELECT * FROM tbl_lu_projects;'):
|
||||
p = Project(id=r[0], name=r[1], code=r[2], iacuc_number=r[3],
|
||||
description=r[4], sort_order=r[5])
|
||||
p.save()
|
||||
|
||||
# Grants
|
||||
for r in c.execute('SELECT * FROM tbl_lu_grants;'):
|
||||
g = Grant(id=r[0], title=r[1], code=r[2],
|
||||
description=r[3], sort_order=r[4])
|
||||
g.save()
|
||||
|
||||
# Project-Grants
|
||||
for r in c.execute('SELECT * FROM tbl_hash_project_grants;'):
|
||||
p = Project.objects.get(id=r[0])
|
||||
g = Grant.objects.get(id=r[1])
|
||||
p.grants.add(g)
|
||||
p.save()
|
||||
|
||||
# Grant Reports
|
||||
q = '''
|
||||
SELECT *, report_due_date AS "due_date [dtdt]"
|
||||
FROM tbl_lu_grant_reports;
|
||||
'''
|
||||
for r in c.execute(q):
|
||||
# No PK field in Andre's file
|
||||
gr = GrantReport(grant_id=r[0], title=r[1], report_type=r[2],
|
||||
description=r[3], due_date=r[8], submitted_date=r[5],
|
||||
attachment=r[6], sort_order=r[7])
|
||||
try:
|
||||
gr.save()
|
||||
except IntegrityError:
|
||||
pass
|
||||
|
||||
# Measurement Units
|
||||
for r in c.execute('SELECT * FROM tbl_lu_measurement_units;'):
|
||||
mu = MeasurementUnit(id=r[0], name=r[1], code=r[2],
|
||||
unit_class=r[3], description=r[4], sort_order=r[5])
|
||||
mu.save()
|
||||
|
||||
# Measurement Types
|
||||
for r in c.execute('SELECT * FROM tbl_lu_measurement_types;'):
|
||||
mt = MeasurementType(id=r[0], name=r[1], code=r[2],
|
||||
measurement_type_class=r[3], description=r[4],
|
||||
default_measurement_unit_id=r[5], sort_order=r[6])
|
||||
mt.save()
|
||||
|
||||
# Materials
|
||||
for r in c.execute('SELECT * FROM tbl_lu_materials;'):
|
||||
m = Material(id=r[0], name=r[1], code=r[2], material_class=r[3],
|
||||
description=r[4], sort_order=r[5])
|
||||
m.save()
|
||||
|
||||
# Colors
|
||||
for r in c.execute('SELECT * FROM tbl_lu_colors;'):
|
||||
cl = Color(id=r[0], name=r[1], code=r[2],
|
||||
color_number=r[3], sort_order=r[4])
|
||||
cl.save()
|
||||
|
||||
# Containers
|
||||
for r in c.execute('SELECT * FROM tbl_lu_containers;'):
|
||||
cl = Container(id=r[0], name=r[1], code=r[2], application=r[3],
|
||||
color_id=r[4], material_id=r[5], volume=r[6],
|
||||
measurement_unit_id=r[7], sort_order=r[8])
|
||||
cl.save()
|
||||
|
||||
# Regions
|
||||
for r in c.execute('SELECT * FROM tbl_lu_regions;'):
|
||||
re = Region(id=r[0], name=r[1], code=r[2], sort_order=r[3])
|
||||
re.save()
|
||||
|
||||
# Site
|
||||
for r in c.execute('SELECT * FROM tbl_lu_sites;'):
|
||||
s = Site(region_id=r[0], id=r[1], name=r[2], code=r[3],
|
||||
description=r[4], sort_order=r[5])
|
||||
s.save()
|
||||
|
||||
# Municipal Locations
|
||||
for r in c.execute('SELECT * FROM tbl_lu_municipal_locations;'):
|
||||
ml = MunicipalLocation(id=r[1], name=r[2], code=r[3],
|
||||
municipal_location_type=r[4], description=r[5], sort_order=r[6])
|
||||
ml.save()
|
||||
|
||||
# Study Locations
|
||||
for r in c.execute('SELECT * FROM tbl_lu_study_locations;'):
|
||||
sl = StudyLocation(site_id=r[0], id=r[1], name=r[2], code=r[3],
|
||||
study_location_type=r[4], treatment_type=r[5],
|
||||
municipal_location_id=r[6], collecting_location=r[7],
|
||||
description=r[13], sort_order=r[14])
|
||||
sl.save()
|
||||
|
||||
# Storage Location
|
||||
for r in c.execute('SELECT * FROM tbl_lu_storage_locations;'):
|
||||
bldg = "".join(e[0].upper() for e in r[2].split())
|
||||
temp_c = '20'
|
||||
if r[5]:
|
||||
temp_c = r[5]
|
||||
freezer = 'No Freezer'
|
||||
if r[4]:
|
||||
freezer = r[4]
|
||||
code = " ".join([bldg, str(temp_c)+'C', str(freezer)])
|
||||
sl = StorageLocation(id=r[0], facility=r[1], building=r[2],
|
||||
room=r[3], freezer=r[4], temp_c=r[5], code=code,
|
||||
description=r[6], sort_order=r[7])
|
||||
sl.save()
|
||||
|
||||
# Species
|
||||
for r in c.execute('SELECT * FROM tbl_lu_species;'):
|
||||
s = Species(id=r[0], common_name=r[1], genus=r[2], species=r[3],
|
||||
parasite=r[4], sort_order=r[5])
|
||||
s.save()
|
||||
|
||||
# Processing Type
|
||||
for r in c.execute('SELECT * FROM tbl_lu_process_types;'):
|
||||
pt = ProcessType(id=r[0], name=r[1], code=r[2], description=r[3],
|
||||
sort_order=r[4])
|
||||
pt.save()
|
||||
|
||||
# Reagent
|
||||
for r in c.execute('SELECT * FROM tbl_lu_reagents;'):
|
||||
rg = Reagent(id=r[0], name=r[1], code=r[2], reagent_class=r[3],
|
||||
sort_order=r[4])
|
||||
rg.save()
|
||||
|
||||
# Collection Type
|
||||
for r in c.execute('SELECT * FROM tbl_lu_collection_types;'):
|
||||
ct = CollectionType(id=r[0], name=r[1], code=r[2], sort_order=r[3])
|
||||
ct.save()
|
||||
|
||||
# Collection Method
|
||||
for r in c.execute('SELECT * FROM tbl_lu_collection_methods;'):
|
||||
cm = CollectionMethod(id=r[0], name=r[1], code=r[2],
|
||||
collection_method_class=r[3], sort_order=r[4])
|
||||
cm.save()
|
||||
|
||||
# Collection
|
||||
for r in c.execute('''
|
||||
SELECT *,
|
||||
collection_start_date AS "collection_start_date [dtdt]",
|
||||
collection_start_time AS "collection_start_time [dtdt]",
|
||||
collection_end_date AS "collection_end_date [dtdt]",
|
||||
collection_end_time AS "collection_end_time [dtdt]"
|
||||
FROM tbl_collections;
|
||||
'''):
|
||||
if r[14] is not '':
|
||||
permit, _ = ADFGPermit.objects.get_or_create(name=r[14])
|
||||
else:
|
||||
permit = None
|
||||
col = Collection(project_id=r[0], id=r[1], study_location_id=r[2],
|
||||
collection_type_id=r[3], collection_method_id=r[4],
|
||||
number_of_traps=r[5], collection_start_date=r[17],
|
||||
collection_start_time=r[18], collection_end_date=r[19],
|
||||
collection_end_time=r[20], storage_location_id=r[10],
|
||||
specimen_state=r[11], process_type_id=r[12], reagent_id=r[13],
|
||||
adfg_permit=permit)
|
||||
col.save()
|
||||
|
||||
# Collection Species
|
||||
for r in c.execute('SELECT * FROM tbl_hash_collection_species;'):
|
||||
# No PK field in Andre's file
|
||||
cs = CollectionSpecies(collection_id=r[0], species_id=r[1],
|
||||
sex=r[2], count=r[3], count_estimated=r[4])
|
||||
try:
|
||||
cs.save()
|
||||
except IntegrityError:
|
||||
pass
|
||||
|
||||
# Experiment
|
||||
for r in c.execute('SELECT * FROM tbl_lu_experiments;'):
|
||||
e = Experiment(id=r[0], name=r[1], code=r[2],
|
||||
description=r[3], sort_order=r[6])
|
||||
e.save()
|
||||
|
||||
# Treatment Type
|
||||
for r in c.execute('SELECT * FROM tbl_lu_treatment_types;'):
|
||||
tt = TreatmentType(experiment_id=r[0], id=r[1], name=r[2], code=r[3],
|
||||
treatment_type=r[4], placement=r[5], description=r[6])
|
||||
tt.save()
|
||||
|
||||
# Treatment
|
||||
for r in c.execute('SELECT * FROM tbl_treatments;'):
|
||||
t = Treatment(id=r[0], treatment_type_id=r[1], container_id=r[2],
|
||||
study_location_id=r[3], species_id=r[4], sex=r[5])
|
||||
t.save()
|
||||
|
||||
# Treatment Replicate
|
||||
for r in c.execute('''
|
||||
SELECT *, setup_date AS "setup_date [dtdt]"
|
||||
FROM tbl_treatment_replicates tr
|
||||
LEFT OUTER JOIN tbl_lu_record_flaws f ON f.flawid=tr.flawid;
|
||||
'''):
|
||||
if r[7]:
|
||||
flaw, _ = ExperimentFlaw.objects.get_or_create(name=r[10])
|
||||
else:
|
||||
flaw = None
|
||||
tr = TreatmentReplicate(treatment_id=r[0], id=r[1], name=r[2],
|
||||
setup_date=r[13], setup_sample_size=r[5], mass_g=r[6], flaw=flaw)
|
||||
tr.save()
|
||||
|
||||
# Alive-Dead Count
|
||||
for r in c.execute('''
|
||||
SELECT *,
|
||||
status_date AS "status_date [dtdt]",
|
||||
status_time AS "status_time [dtdt]"
|
||||
FROM tbl_alive_dead_counts adc
|
||||
LEFT OUTER JOIN tbl_lu_record_flaws f ON f.flawid=adc.flawid;
|
||||
'''):
|
||||
if r[6]:
|
||||
flaw, _ = ExperimentFlaw.objects.get_or_create(name=r[9])
|
||||
else:
|
||||
flaw = None
|
||||
adc = AliveDeadCount(treatment_replicate_id=r[0], id=r[1],
|
||||
status_date=r[12], status_time=r[13], count_alive=r[4],
|
||||
count_dead=r[5], flaw=flaw)
|
||||
adc.save()
|
||||
|
||||
# Experiment-Collection
|
||||
for r in c.execute('SELECT * FROM tbl_hash_collection_experiments;'):
|
||||
c = Collection.objects.get(id=r[0])
|
||||
e = Experiment.objects.get(id=r[1])
|
||||
e.collections.add(c)
|
||||
e.save()
|
Loading…
Add table
Reference in a new issue