Skip to content

PandasTDF

Low-level classes for direct access to the .tdf SQLite database and the Bruker TimsData C library. Prefer the high-level DDA/DIA API unless you need raw frame or scan data.

tdfpy.PandasTdf dataclass

PandasTdf(db_path: str)

A class for working with TDF (Bruker Data File) using pandas DataFrames.

calibration_info property

calibration_info: pd.DataFrame

The 'CALIBRATION_INFO' table as a pandas DataFrame. :return: table as a pandas DataFrame

dia_frame_msms_info property

dia_frame_msms_info: pd.DataFrame

The 'DIA_FRAME_MSMS_INFO' table as a pandas DataFrame. :return: table as a pandas DataFrame

dia_frame_msms_window_groups property

dia_frame_msms_window_groups: pd.DataFrame

The 'DIA_FRAME_MSMS_WINDOW_GROUPS' table as a pandas DataFrame. :return: table as a pandas DataFrame

dia_frame_msms_windows property

dia_frame_msms_windows: pd.DataFrame

The 'DIA_FRAME_MSMS_WINDOWS' table as a pandas DataFrame. :return: table as a pandas DataFrame

error_log property

error_log: pd.DataFrame

The 'ERROR_LOG' table as a pandas DataFrame. :return: table as a pandas DataFrame

frame_msms_info property

frame_msms_info: pd.DataFrame

The 'FRAME_MSMS_WINDOW' table as a pandas DataFrame. :return: table as a pandas DataFrame

frame_properties property

frame_properties: pd.DataFrame

The 'FRAME_PROPERTIES' table as a pandas DataFrame. :return: table as a pandas DataFrame

frames property

frames: pd.DataFrame

The 'FRAMES' table as a pandas DataFrame. :return: table as a pandas DataFrame

global_metadata property

global_metadata: pd.DataFrame

The 'GLOBAL_METADATA' table as a pandas DataFrame. :return: table as a pandas DataFrame

group_properties property

group_properties: pd.DataFrame

The 'GROUP_PROPERTIES' table as a pandas DataFrame. :return: table as a pandas DataFrame

mz_calibration property

mz_calibration: pd.DataFrame

The 'MZ_CALIBRATION' table as a pandas DataFrame. :return: table as a pandas DataFrame

pasef_frame_msms_info property

pasef_frame_msms_info: pd.DataFrame

The 'PASEF_FRAMES_MSMS_INFO' table as a pandas DataFrame. :return: table as a pandas DataFrame

precursors property

precursors: pd.DataFrame

The 'PRECURSORS' table as a pandas DataFrame. :return: table as a pandas DataFrame

properties property

properties: pd.DataFrame

The 'PROPERTIES' table as a pandas DataFrame. :return: table as a pandas DataFrame

property_definitions property

property_definitions: pd.DataFrame

The 'PROPERTY_DEFINITIONS' table as a pandas DataFrame. :return: table as a pandas DataFrame

property_groups property

property_groups: pd.DataFrame

The 'PROPERTY_GROUPS' table as a pandas DataFrame. :return: table as a pandas DataFrame

segments property

segments: pd.DataFrame

The 'SEGMENTS' table as a pandas DataFrame. :return: table as a pandas DataFrame

tims_calibration property

tims_calibration: pd.DataFrame

The 'TIMS_CALIBRATION' table as a pandas DataFrame. :return: table as a pandas DataFrame

prm_frame_measurement_mode property

prm_frame_measurement_mode: pd.DataFrame

The 'PRM_FRAME_MEASUREMENT_MODE' table as a pandas DataFrame. :return: table as a pandas DataFrame

prm_frame_msms_info property

prm_frame_msms_info: pd.DataFrame

The 'PRM_FRAME_MSMS_INFO' table as a pandas DataFrame. :return: table as a pandas DataFrame

prm_targets property

prm_targets: pd.DataFrame

The 'PRM_TARGETS' table as a pandas DataFrame. :return: table as a pandas DataFrame

is_dda property

is_dda: bool

Checks if the database contains DDA (Data-Dependent Acquisition) data.

Returns:

Name Type Description
bool bool

True if DDA data is present, False otherwise.

is_prm property

is_prm: bool

Checks if the database contains PRM (Parallel Reaction Monitoring) data.

Returns:

Name Type Description
bool bool

True if PRM data is present, False otherwise.

is_dia property

is_dia: bool

Checks if the database contains DIA (Data-Independent Acquisition) data.

Returns:

Name Type Description
bool bool

True if DIA data is present, False otherwise.

is_maldi property

is_maldi: bool

Checks if the database contains MALDI (Matrix-Assisted Laser Desorption/Ionization) data. Not supported in tdfpy, but this method can be used to check for MALDI data if it is added in the future.

Returns:

Name Type Description
bool bool

True if MALDI data is present, False otherwise.

get_table_names

get_table_names() -> list[str]

Retrieves the names of all tables in the SQLite database.

Returns:

Type Description
list[str]

list[str]: A list of table names in the database.

Source code in src/tdfpy/tdf.py
216
217
218
219
220
221
222
223
224
225
226
227
def get_table_names(self) -> list[str]:
    """
    Retrieves the names of all tables in the SQLite database.

    Returns:
        list[str]: A list of table names in the database.
    """
    with sqlite3.connect(self.db_path) as conn:
        cursor = conn.cursor()
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
        table_names = [table[0] for table in cursor.fetchall()]
    return table_names

tdfpy.TimsData

TimsData(
    analysis_directory: str,
    use_recalibrated_state: bool = False,
    pressure_compensation_strategy: PressureCompensationStrategy = PressureCompensationStrategy.NoPressureCompensation,
)
Source code in src/tdfpy/timsdata.py
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
def __init__(
    self,
    analysis_directory: str,
    use_recalibrated_state: bool = False,
    pressure_compensation_strategy: PressureCompensationStrategy = PressureCompensationStrategy.NoPressureCompensation,
) -> None:
    if not isinstance(analysis_directory, str):  # type: ignore[type-var]
        raise ValueError("analysis_directory must be a string.")

    if dll is None:
        raise ImportError(
            f"libtimsdata native library could not be loaded: {_dll_load_error}"
        ) from _dll_load_error

    self.analysis_directory = analysis_directory
    self.dll: CDLL = dll
    self.handle: int | None
    self.conn: sqlite3.Connection | None
    self.initial_frame_buffer_size: float

    self.handle = self.dll.tims_open_v2(
        analysis_directory.encode("utf-8"),
        1 if use_recalibrated_state else 0,
        pressure_compensation_strategy.value,
    )
    if self.handle == 0:
        _throwLastTimsDataError(self.dll)

    self.conn = sqlite3.connect(os.path.join(analysis_directory, "analysis.tdf"))

    self.initial_frame_buffer_size = 128  # may grow in readScans()

readScansDllBuffer

readScansDllBuffer(
    frame_id: int, scan_begin: int, scan_end: int
) -> npt.NDArray[np.uint32]

Read a range of scans from a frame, returning the data in the low-level buffer format defined for the 'tims_read_scans_v2' DLL function (see documentation in 'timsdata.h').

Source code in src/tdfpy/timsdata.py
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
def readScansDllBuffer(
    self, frame_id: int, scan_begin: int, scan_end: int
) -> npt.NDArray[np.uint32]:
    """Read a range of scans from a frame, returning the data in the low-level buffer format defined for
    the 'tims_read_scans_v2' DLL function (see documentation in 'timsdata.h').

    """

    # buffer-growing loop
    while True:
        cnt = int(
            self.initial_frame_buffer_size
        )  # necessary cast to run with python 3.5
        buf = np.empty(shape=cnt, dtype=np.uint32)
        buf_len = 4 * cnt

        required_len = self.dll.tims_read_scans_v2(
            self.handle,
            frame_id,
            scan_begin,
            scan_end,
            buf.ctypes.data_as(POINTER(c_uint32)),
            buf_len,
        )
        if required_len == 0:
            _throwLastTimsDataError(self.dll)

        if required_len > buf_len:
            if required_len > 16777216:
                # arbitrary limit for now...
                raise RuntimeError("Maximum expected frame size exceeded.")
            self.initial_frame_buffer_size = required_len / 4 + 1  # grow buffer
        else:
            break

    return buf

readScans

readScans(
    frame_id: int, scan_begin: int, scan_end: int
) -> list[
    tuple[npt.NDArray[np.uint32], npt.NDArray[np.uint32]]
]

Read a range of scans from a frame, returning a list of scans, each scan being represented as a tuple (index_array, intensity_array).

Source code in src/tdfpy/timsdata.py
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
def readScans(
    self, frame_id: int, scan_begin: int, scan_end: int
) -> list[tuple[npt.NDArray[np.uint32], npt.NDArray[np.uint32]]]:
    """Read a range of scans from a frame, returning a list of scans, each scan being represented as a
    tuple (index_array, intensity_array).

    """

    buf = self.readScansDllBuffer(frame_id, scan_begin, scan_end)

    result: list[tuple[npt.NDArray[np.uint32], npt.NDArray[np.uint32]]] = []
    d = scan_end - scan_begin
    for i in range(scan_begin, scan_end):
        npeaks = buf[i - scan_begin]
        indices = buf[d : d + npeaks]
        d += npeaks
        intensities = buf[d : d + npeaks]
        d += npeaks
        result.append((indices, intensities))

    return result

extractChromatograms

extractChromatograms(
    jobs: Iterator[ChromatogramJob],
    trace_sink: Callable[
        [
            int,
            npt.NDArray[np.int64],
            npt.NDArray[np.uint64],
        ],
        None,
    ],
) -> None

Efficiently extract several MS1-only extracted-ion chromatograms.

The argument 'jobs' defines which chromatograms are to be extracted; it must be an iterator (generator) object producing a stream of ChromatogramJob objects. The jobs must be produced in the order of ascending 'time_begin'.

The function 'trace_sink' is called for each extracted trace with three arguments: job ID, numpy array of frame IDs ("x-axis"), numpy array of chromatogram values ("y-axis").

For more information, see the documentation of the C-language API of the timsdata DLL.

Source code in src/tdfpy/timsdata.py
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
def extractChromatograms(
    self,
    jobs: Iterator[ChromatogramJob],
    trace_sink: Callable[
        [int, npt.NDArray[np.int64], npt.NDArray[np.uint64]], None
    ],
) -> None:
    """Efficiently extract several MS1-only extracted-ion chromatograms.

    The argument 'jobs' defines which chromatograms are to be extracted; it must be an iterator
    (generator) object producing a stream of ChromatogramJob objects. The jobs must be produced
    in the order of ascending 'time_begin'.

    The function 'trace_sink' is called for each extracted trace with three arguments: job ID,
    numpy array of frame IDs ("x-axis"), numpy array of chromatogram values ("y-axis").

    For more information, see the documentation of the C-language API of the timsdata DLL.

    """

    @CHROMATOGRAM_JOB_GENERATOR
    def wrap_gen(job: Any, user_data: Any) -> int:
        try:
            job[0] = next(jobs)
            return 1
        except StopIteration:
            return 2
        except Exception as e:
            logger.error("extractChromatograms: generator produced exception ", e)
            return 0

    @CHROMATOGRAM_TRACE_SINK
    def wrap_sink(
        job_id: int, num_points: int, frame_ids: Any, values: Any, user_data: Any
    ) -> int:
        try:
            trace_sink(
                job_id,
                np.array(frame_ids[0:num_points], dtype=np.int64),
                np.array(values[0:num_points], dtype=np.uint64),
            )
            return 1
        except Exception as e:
            logger.error("extractChromatograms: sink produced exception ", e)
            return 0

    unused_user_data = 0
    rc = self.dll.tims_extract_chromatograms(
        self.handle, wrap_gen, wrap_sink, unused_user_data
    )

    if rc == 0:
        _throwLastTimsDataError(self.dll)

tdfpy.timsdata_connect

timsdata_connect(analysis_dir: str) -> Iterator[TimsData]
Source code in src/tdfpy/timsdata.py
200
201
202
203
204
205
206
207
208
@contextmanager
def timsdata_connect(analysis_dir: str) -> Iterator["TimsData"]:
    td: TimsData | None = None
    try:
        td = TimsData(analysis_dir)
        yield td
    finally:
        if td:
            td.close()