Skip to content

Atlas Tags

Atlas allows you to visually and programatically associate tags to datapoints. Tags can be added collaboratively by anyone allowed to edit your Atlas Project.

You can access and operate on your assigned tags by using the tags attribute of an AtlasMap.

from nomic import AtlasProject

map = AtlasProject(name='My Project').maps[0]

map.tags

AtlasMapTags

Atlas Map Tag State

Tags are shared across all maps in your AtlasProject. You can manipulate tags by filtering over the associated pandas dataframe

from nomic import AtlasProject

project = AtlasProject(name='My Project')
map = project.maps[0]
print(map.tags)
      id_  oil  search_engines
0      0A    0               0
1      0g    0               0
2      0Q    0               0
3      0w    0               0
4      1A    1               0
...   ...  ...             ...
9998  JZQ    0               0
9999  JZU    0               0
Source code in nomic/data_operations.py
class AtlasMapTags:
    """
    Atlas Map Tag State

    Tags are shared across all maps in your AtlasProject. You can manipulate tags by filtering over
    the associated pandas dataframe

    === "Accessing Tags Example"
        ``` py
        from nomic import AtlasProject

        project = AtlasProject(name='My Project')
        map = project.maps[0]
        print(map.tags)
        ```
    === "Output"
        ```
              id_  oil  search_engines
        0      0A    0               0
        1      0g    0               0
        2      0Q    0               0
        3      0w    0               0
        4      1A    1               0
        ...   ...  ...             ...
        9998  JZQ    0               0
        9999  JZU    0               0
        ```
    """

    def __init__(self, projection: "AtlasProjection"):
        self.projection = projection
        self.project = projection.project
        self.id_field = self.projection.project.id_field
        self._tb: pa.Table = projection._fetch_tiles().select([self.id_field])

    @property
    def df(self) -> pd.DataFrame:
        """
        Pandas dataframe mapping each data point to its tags.

        === "Accessing Tags Example"
            ``` py
            from nomic import AtlasProject

            project = AtlasProject(name='My Project')
            map = project.maps[0]
            print(map.tags.df)
            ```
        === "Output"
            ```
                  id_  oil  search_engines
            0      0A    0               0
            1      0g    0               0
            2      0Q    0               0
            3      0w    0               0
            4      1A    1               0
            ...   ...  ...             ...
            9998  JZQ    0               0
            9999  JZU    0               0
            ```
        """

        id_frame = self._tb.to_pandas()
        tag_to_datums = self.get_tags()

        # encoded contains a multi-hot vector withs 1 for all rows that contain that tag
        encoded = {key: [] for key in list(tag_to_datums.keys())}
        for id in id_frame[self.id_field]:
            for key in encoded:
                if id in tag_to_datums[key]:
                    encoded[key].append(1)
                else:
                    encoded[key].append(0)

        tag_frame = pandas.DataFrame(encoded)

        return pd.concat([id_frame, tag_frame], axis=1)

    def get_tags(self) -> Dict[str, List[str]]:
        '''
        Retrieves back all tags made in the web browser for a specific map

        Returns:
            A dictionary mapping data points to tags.
        '''
        # now get the tags
        datums_and_tags = requests.post(
            self.project.atlas_api_path + '/v1/project/tag/read/all_by_datum',
            headers=self.project.header,
            json={
                'project_id': self.project.id,
            },
        ).json()['results']

        label_to_datums = {}
        for item in datums_and_tags:
            for label in item['labels']:
                if label not in label_to_datums:
                    label_to_datums[label] = set()
                label_to_datums[label].add(item['datum_id'])
        return label_to_datums

    def add(self, ids: List[str], tags: List[str]):
        '''
        Adds tags to datapoints.

        Args:
            ids: The datum ids you want to tag
            tags: A list containing the tags you want to apply to these data points.

        '''
        assert isinstance(ids, list), 'ids must be a list of strings'
        assert isinstance(tags, list), 'tags must be a list of strings'

        colname = json.dumps(
            {
                'project_id': self.project.id,
                'atlas_index_id': self.projection.atlas_index_id,
                'type': 'datum_id',
                'tags': tags,
            }
        )
        payload_table = pa.table([pa.array(ids, type=pa.string())], [colname])
        buffer = io.BytesIO()
        writer = ipc.new_file(buffer, payload_table.schema, options=ipc.IpcWriteOptions(compression='zstd'))
        writer.write_table(payload_table)
        writer.close()
        payload = buffer.getvalue()

        headers = self.project.header.copy()
        headers['Content-Type'] = 'application/octet-stream'
        response = requests.post(self.project.atlas_api_path + "/v1/project/tag/add", headers=headers, data=payload)
        if response.status_code != 200:
            raise Exception("Failed to add tags")

    def remove(self, ids: List[str], tags: List[str], delete_all: bool = False) -> bool:
        '''
        Deletes the specified tags from the given data points.

        Args:
            ids: The datum_ids to delete tags from.
            tags: The list of tags to delete from the data points. Each tag will be applied to all data points in `ids`.
            delete_all: If true, ignores ids parameter and deletes all specified tags from all data points.

        Returns:
            True on success

        '''
        assert isinstance(ids, list), 'datum_ids must be a list of strings'
        assert isinstance(tags, list), 'tags must be a list of strings'

        colname = json.dumps(
            {
                'project_id': self.project.id,
                'atlas_index_id': self.projection.atlas_index_id,
                'type': 'datum_id',
                'tags': tags,
                'delete_all': delete_all,
            }
        )
        payload_table = pa.table([pa.array(ids, type=pa.string())], [colname])
        buffer = io.BytesIO()
        writer = ipc.new_file(buffer, payload_table.schema, options=ipc.IpcWriteOptions(compression='zstd'))
        writer.write_table(payload_table)
        writer.close()
        payload = buffer.getvalue()

        headers = self.project.header.copy()
        headers['Content-Type'] = 'application/octet-stream'
        response = requests.post(self.project.atlas_api_path + "/v1/project/tag/delete", headers=headers, data=payload)
        if response.status_code != 200:
            raise Exception("Failed to delete tags")

    def __repr__(self) -> str:
        return str(self.df)
df: pd.DataFrame property

Pandas dataframe mapping each data point to its tags.

from nomic import AtlasProject

project = AtlasProject(name='My Project')
map = project.maps[0]
print(map.tags.df)
      id_  oil  search_engines
0      0A    0               0
1      0g    0               0
2      0Q    0               0
3      0w    0               0
4      1A    1               0
...   ...  ...             ...
9998  JZQ    0               0
9999  JZU    0               0
add(ids, tags)

Adds tags to datapoints.

Parameters:

  • ids (List[str]) –

    The datum ids you want to tag

  • tags (List[str]) –

    A list containing the tags you want to apply to these data points.

Source code in nomic/data_operations.py
def add(self, ids: List[str], tags: List[str]):
    '''
    Adds tags to datapoints.

    Args:
        ids: The datum ids you want to tag
        tags: A list containing the tags you want to apply to these data points.

    '''
    assert isinstance(ids, list), 'ids must be a list of strings'
    assert isinstance(tags, list), 'tags must be a list of strings'

    colname = json.dumps(
        {
            'project_id': self.project.id,
            'atlas_index_id': self.projection.atlas_index_id,
            'type': 'datum_id',
            'tags': tags,
        }
    )
    payload_table = pa.table([pa.array(ids, type=pa.string())], [colname])
    buffer = io.BytesIO()
    writer = ipc.new_file(buffer, payload_table.schema, options=ipc.IpcWriteOptions(compression='zstd'))
    writer.write_table(payload_table)
    writer.close()
    payload = buffer.getvalue()

    headers = self.project.header.copy()
    headers['Content-Type'] = 'application/octet-stream'
    response = requests.post(self.project.atlas_api_path + "/v1/project/tag/add", headers=headers, data=payload)
    if response.status_code != 200:
        raise Exception("Failed to add tags")
get_tags()

Retrieves back all tags made in the web browser for a specific map

Returns:

  • Dict[str, List[str]]

    A dictionary mapping data points to tags.

Source code in nomic/data_operations.py
def get_tags(self) -> Dict[str, List[str]]:
    '''
    Retrieves back all tags made in the web browser for a specific map

    Returns:
        A dictionary mapping data points to tags.
    '''
    # now get the tags
    datums_and_tags = requests.post(
        self.project.atlas_api_path + '/v1/project/tag/read/all_by_datum',
        headers=self.project.header,
        json={
            'project_id': self.project.id,
        },
    ).json()['results']

    label_to_datums = {}
    for item in datums_and_tags:
        for label in item['labels']:
            if label not in label_to_datums:
                label_to_datums[label] = set()
            label_to_datums[label].add(item['datum_id'])
    return label_to_datums
remove(ids, tags, delete_all=False)

Deletes the specified tags from the given data points.

Parameters:

  • ids (List[str]) –

    The datum_ids to delete tags from.

  • tags (List[str]) –

    The list of tags to delete from the data points. Each tag will be applied to all data points in ids.

  • delete_all (bool, default: False ) –

    If true, ignores ids parameter and deletes all specified tags from all data points.

Returns:

  • bool

    True on success

Source code in nomic/data_operations.py
def remove(self, ids: List[str], tags: List[str], delete_all: bool = False) -> bool:
    '''
    Deletes the specified tags from the given data points.

    Args:
        ids: The datum_ids to delete tags from.
        tags: The list of tags to delete from the data points. Each tag will be applied to all data points in `ids`.
        delete_all: If true, ignores ids parameter and deletes all specified tags from all data points.

    Returns:
        True on success

    '''
    assert isinstance(ids, list), 'datum_ids must be a list of strings'
    assert isinstance(tags, list), 'tags must be a list of strings'

    colname = json.dumps(
        {
            'project_id': self.project.id,
            'atlas_index_id': self.projection.atlas_index_id,
            'type': 'datum_id',
            'tags': tags,
            'delete_all': delete_all,
        }
    )
    payload_table = pa.table([pa.array(ids, type=pa.string())], [colname])
    buffer = io.BytesIO()
    writer = ipc.new_file(buffer, payload_table.schema, options=ipc.IpcWriteOptions(compression='zstd'))
    writer.write_table(payload_table)
    writer.close()
    payload = buffer.getvalue()

    headers = self.project.header.copy()
    headers['Content-Type'] = 'application/octet-stream'
    response = requests.post(self.project.atlas_api_path + "/v1/project/tag/delete", headers=headers, data=payload)
    if response.status_code != 200:
        raise Exception("Failed to delete tags")