import collections
import inspect
import json
import jsonschema
import os
import sys
from pprint import pprint
from datacatalog import settings
from datacatalog.extensible import ExtensibleAttrDict
from ...dicthelpers import data_merge
from ...jsonschemas import DateTimeEncoder, formatChecker, DateTimeConverter
from ...jsonschemas import validate as jsonschema_validate
from ...utils import safen_path, normalize, normpath
from ...stores import abspath
from ...filetypes import infer_filetype
from ...identifiers.typeduuid import uuid_to_hashid, catalog_uuid
from ..basestore import AgaveClient, LinkedStore, linkages
from ..basestore import HeritableDocumentSchema, JSONSchemaCollection
from ..basestore import CatalogUpdateFailure
FILE_ID_PREFIX = settings.FILE_ID_PREFIX
DEFAULT_LINK_FIELDS = [linkages.CHILD_OF, linkages.DERIVED_FROM,
linkages.DERIVED_USING, linkages.GENERATED_BY]
[docs]class FileUpdateFailure(CatalogUpdateFailure):
pass
[docs]class FileDocument(HeritableDocumentSchema):
"""Defines experiment-linked metadata for a file"""
def __init__(self, inheritance=True, **kwargs):
super(FileDocument, self).__init__(inheritance, **kwargs)
self.update_id()
[docs]class FileRecord(ExtensibleAttrDict):
"""New document for FileStore with schema enforcement"""
PARAMS = [
# ('uuid', False, 'uuid', None),
# ('child_of', False, 'child_of', []),
# ('generated_by', False, 'generated_by', []),
# ('derived_using', False, 'derived_using', []),
# ('derived_from', False, 'derived_from', []),
# ('notes', False, 'notes', []),
('level', False, 'level', 'Unknown'),
('storage_system', False,
'storage_system', settings.STORAGE_SYSTEM)]
def __init__(self, value, *args, **kwargs):
# if 'file_id' not in value:
# value['file_id'] = 'file.tacc.' + uuid.uuid1().hex
ovalue = dict(value)
# Validate incoming document
# value = dict(value)
# schema = FileDocument()
# for k in schema.filter_keys():
# try:
# del value[k]
# except KeyError:
# pass
# vvalue = json.loads(json.dumps(value, default=DateTimeConverter))
# jsonschema_validate(vvalue, schema.to_dict(),
# format_checker=formatChecker())
# Ensure the minimum set of other fields is populated
#
# We use a bespoke process rather than relying on the schema for now
# because file record creation cannot tolerate the overhead of
# materializing a class definition with python_jsonschema_objects
for param, req, attr, default in self.PARAMS:
val = kwargs.get(param, ovalue.get(param, default))
if req and val is not None:
kwargs[param] = val
super().__init__(value, *args, **kwargs)
self['name'] = safen_path(self['name'],
no_unicode=False,
no_spaces=True,
url_quote=False,
no_equals=True)
if self.get('storage_system', None) is None:
self['storage_system'] = settings.STORAGE_SYSTEM
[docs] def set_token(self, value):
self['_update_token'] = str(value)
[docs]class FileStore(AgaveClient, LinkedStore):
"""Manage storage and retrieval of FileDocuments"""
LINK_FIELDS = DEFAULT_LINK_FIELDS
def __init__(self, mongodb, agave=None, config={}, session=None, **kwargs):
super(FileStore, self).__init__(mongodb, config, session, agave=agave)
schema = FileDocument(**kwargs)
super(FileStore, self).update_attrs(schema)
self.setup(update_indexes=kwargs.get('update_indexes', False))
[docs] def add_update_document(self, document_dict, uuid=None, token=None, strategy='merge'):
# if not isinstance(document_dict, FileRecord):
# document_dict = FileRecord(document_dict)
# Generate file_id from name if not present
if 'file_id' not in document_dict:
document_dict['file_id'] = self.generate_string_id(document_dict)
resp = super().add_update_document(document_dict,
uuid=uuid, token=token,
strategy=strategy)
self.logger.info('add_update_document: {}'.format(resp))
new_resp = resp
return new_resp
[docs] @classmethod
def generate_string_id(cls, document_dict):
if 'file_id' not in document_dict:
filepath = normpath('/' + document_dict['name'])
agave_uri = 'agave://' + \
document_dict.get('storage_system', settings.STORAGE_SYSTEM) + filepath
file_id = FILE_ID_PREFIX + uuid_to_hashid(
catalog_uuid(agave_uri, uuid_type='file'))
return file_id
else:
raise KeyError('Unable to find field "name" in document dict')
[docs] @classmethod
def generate_string_id_v2_0(cls, document_dict):
if 'file_id' not in document_dict:
file_id = FILE_ID_PREFIX + uuid_to_hashid(
catalog_uuid(document_dict['name'], uuid_type='file'))
return file_id
[docs] def index(self, filename, storage_system=None, token=None, **kwargs):
"""Capture a skeleton metadata entry for a file
Args:
filename (str): Agave-canonical absolute path to the target
storage_system (str, optional): Agave storage system for the target
Returns:
dict: A LinkedStore document containing file details
"""
# print('FIXITY.STORE.INDEX ' + filename)
if storage_system is None:
storage_system = settings.STORAGE_SYSTEM
self.name = normpath(filename)
self.abs_filename = self._helper.mapped_posix_path(
self.name, storage_system=storage_system)
# self.abs_filename = abspath(self.name, storage_system=storage_system, agave=self._helper)
file_uuid = self.get_typeduuid(self.name)
db_record = self.coll.find_one({'uuid': file_uuid})
file_record = None
if db_record is None:
db_record = {'name': filename,
'storage_system': storage_system,
'uuid': file_uuid,
'type': kwargs.get('type', infer_filetype(
filename, check_exists=False).label),
'child_of': kwargs.get('child_of', []),
'generated_by': kwargs.get('generated_by', [])}
resp = self.add_update_document(
db_record, uuid=file_uuid, token=token, strategy='merge')
file_record = resp
else:
file_record = db_record
return file_record
[docs] def get_typeduuid(self, payload, binary=False):
if isinstance(payload, dict):
if 'name' in payload:
payload['name'] = safen_path(payload['name'])
# identifier_string = self.get_linearized_values(payload)
else:
payload = normpath(str(payload))
# identifier_string = normpath(str(payload))
self.logger.debug('file.payload: {}'.format(payload))
return super().get_typeduuid(payload, binary)
[docs]class StoreInterface(FileStore):
pass