Source code for datacatalog.formats.classify

import os
import sys
import importlib
import inspect
import itertools
from pprint import pprint
from . import *
from .converter import Converter, ConversionError
from ..utils import dynamic_import, detect_encoding

FORMATS = ['Transcriptic', 'Ginkgo', 'Biofab', 'SampleAttributes', 'Caltech', 'Marshall', 'Duke_Haase', 'Duke_Validation', 'Tulane']
"""Class names for document types that can be converted to Data Catalog records"""

[docs]class NoClassifierError(ConversionError): """Unable to classify a document, preventing its conversion""" pass
[docs]def get_converters(options={}): """Discover and return Converters Returns: list: One or more ``Converter`` objects """ matches = list() for pkg in FORMATS: converter = globals()[pkg](options=options) matches.append(converter) return matches
[docs]def get_converter(json_filepath, options={}, expect=None): exceptions = list() if expect is None: converters = get_converters(options) else: converters = [globals()[expect](options)] encoding = detect_encoding(json_filepath) if encoding is None: # use a sane default encoding = 'utf-8' elif encoding not in ('ascii', 'utf-8', 'ISO-8859-1'): raise ValueError("Unknown encoding: {}".format(encoding)) # print("Detected encoding {}".format(encoding)) for conv in converters: try: conv.validate_input(json_filepath, encoding) return conv except Exception as exc: exceptions.append(exc) raise NoClassifierError( 'Classification failed for {}: {}'.format( os.path.basename(json_filepath), exceptions))