Source code for biotaphy.client.ot_service_wrapper.open_tree

#!/usr/bin/python
# -*- coding: utf-8 -*-
"""Module for Open Tree of Life client."""
import json
from urllib.request import Request, urlopen


[docs]PRODUCTION_SERVER = 'https://api.opentreeoflife.org/v3'
[docs]INDUCED_SUBTREE_BASE_URL = '{}/tree_of_life/induced_subtree'.format(PRODUCTION_SERVER)
[docs]OTT_TAXON_INFO_URL = '{}/tnrs/match_names'.format(PRODUCTION_SERVER)
[docs]MAX_NAMES_PER_REQUEST = 250
# .....................................................................................
[docs]class LABEL_FORMAT: """Represents the label format constants used when calling induced subtree."""
[docs] NAME = 'name'
[docs] ID = 'id'
[docs] NAME_AND_ID = 'name_and_id'
# .....................................................................................
[docs]def sanitize_name(name): """Quick and dirty sanitization of a name string. Args: name (str): A name string to sanitize. Returns: str: A sanitized search name. """ parts = name.split(' ') sanitized_string = parts[0] # Genu if len(parts) > 1: # Check next element if parts[1].lower() in ['cf.', 'cf']: sanitized_string += ' cf. {}'.format(parts[2]) elif parts[1] == '×': # Hybrid sanitized_string += ' × {}'.format(parts[2]) elif parts[1].lower() in ['sp', 'sp.', 'spp', 'spp.']: # Unpublished sanitized_string = name elif parts[1].startswith('(') or parts[1][0].isupper(): # Genus with author pass # pragma: no cover else: # Get species name sanitized_string += ' {}'.format(parts[1]) if len(parts) > 2: if parts[2].lower() in ['subsp.', 'subsp']: # Subspecies sanitized_string += ' subsp. {}'.format(parts[3]) elif parts[2].lower() in ['var.', 'var']: # Variety sanitized_string += ' var. {}'.format(parts[3]) return sanitized_string
# .....................................................................................
[docs]def resolve_names_otol(names_list): """Get information from the OTL taxon match service for a list of names. Args: names_list (:obj:`list` of :obj:`str`): A list of taxon names to get information for. Returns: dict: A dictionary where keys are the searched taxon names and the values are dictionaries of values from Open Tree. """ taxa_info = {} not_found_taxa = [] headers = {'Content-Type': 'application/json'} names_lookup = {} search_names = [] for name in names_list: sanitized_name = sanitize_name(name) search_names.append(sanitized_name) names_lookup[sanitized_name] = name for i in range(0, len(search_names), MAX_NAMES_PER_REQUEST): request_body = { 'names': search_names[i:i+MAX_NAMES_PER_REQUEST], 'do_approximate_matching': True } req = Request( OTT_TAXON_INFO_URL, data=json.dumps(request_body).encode('utf8'), headers=headers ) resp = urlopen(req) resp_json = json.load(resp) # Add taxa that we didn't find to list not_found_taxa.extend(resp_json['unmatched_names']) for result in resp_json['results']: taxon = names_lookup[result['name']] # Get the original search name vals = {} cont = True for match in result['matches']: if cont: if not match['is_synonym']: cont = False vals['ott_id'] = match['taxon']['ott_id'] for tax_source in match['taxon']['tax_sources']: if tax_source.startswith('gbif'): vals['gbif_id'] = int(tax_source[5:]) vals['accepted_name'] = match['taxon']['name'] vals['synonyms'] = match['taxon']['synonyms'] if vals: taxa_info[taxon] = vals elif taxon not in not_found_taxa: # pragma: no cover not_found_taxa.append(taxon) for tax in not_found_taxa: taxa_info[tax] = None return taxa_info
# .....................................................................................
[docs]def induced_subtree(ott_ids, label_format=LABEL_FORMAT.NAME): """Retrieves a Newick tree containing the nodes represented by the ids. Calls the Open Tree 'induced_subtree' service to retrieve a tree, in Newick format, containing the nodes represented by the provided Open Tree IDs. Args: ott_ids (list) : A list of Open Tree IDs. These will be converted to integers in the request. label_format (str) : The label string format to use when creating the tree on the server. (see: LABEL_FORMAT) Returns: dict: A dictionary of the subtree response after JSON processing. """ # Ids need to be integers processed_ids = [int(ottid) for ottid in ott_ids] request_body = { 'ott_ids': processed_ids, 'label_format': label_format } headers = { 'Content-Type': 'application/json' } req = Request( INDUCED_SUBTREE_BASE_URL, data=json.dumps(request_body).encode('utf-8'), headers=headers ) resp_str = urlopen(req).read().decode('utf-8') return json.loads(resp_str)['newick']