binney

The binney library provides several interfaces

 1from .binney import *
 2from .binney import __all__
 3from .binney import __doc__
 4
 5from .binney import cli as cli
 6
 7from pathlib import Path
 8
 9try:
10    import polars as pl
11    class BinDirectoryDF(BinDirectory):
12        def read_all(self, overwrite=False) -> pl.LazyFrame:
13            """
14                Retrieve a lazy polars dataframe for processing all of the photons in this bin directory
15            """
16            files = self.convert_all(overwrite=overwrite)
17            return pl.scan_parquet(files)
18
19        def read_timerange(self, timerange: TimestampRange, overwrite=False) -> pl.LazyFrame:
20            """
21                Retrieve a lazy polars dataframe for processing a specific timerange of photons in
22                this bin directory
23            """
24            files = self.convert_timerange(timerange, overwrite=overwrite)
25            return pl.scan_parquet(files)
26
27        def read_timeranges(self, timeranges: list[TimestampRange], overwrite=False) -> pl.LazyFrame:
28            """
29                Retrieve a lazy polars dataframe for processing each timerange in timeranges
30            """
31            fileses = self.convert_timeranges(timeranges, overwrite=overwrite)
32            return [pl.scan_parquet(files) for files in fileses]
33
34    __all__ = [BinDirectoryDF] + __all__
35except ImportError:
36    pass
class BinDirectoryDF(binney.BinDirectory):
12    class BinDirectoryDF(BinDirectory):
13        def read_all(self, overwrite=False) -> pl.LazyFrame:
14            """
15                Retrieve a lazy polars dataframe for processing all of the photons in this bin directory
16            """
17            files = self.convert_all(overwrite=overwrite)
18            return pl.scan_parquet(files)
19
20        def read_timerange(self, timerange: TimestampRange, overwrite=False) -> pl.LazyFrame:
21            """
22                Retrieve a lazy polars dataframe for processing a specific timerange of photons in
23                this bin directory
24            """
25            files = self.convert_timerange(timerange, overwrite=overwrite)
26            return pl.scan_parquet(files)
27
28        def read_timeranges(self, timeranges: list[TimestampRange], overwrite=False) -> pl.LazyFrame:
29            """
30                Retrieve a lazy polars dataframe for processing each timerange in timeranges
31            """
32            fileses = self.convert_timeranges(timeranges, overwrite=overwrite)
33            return [pl.scan_parquet(files) for files in fileses]

Represents a folder full of bin files and provides access methods

This is intentionally opaque to force you to use the provided methods for operations which correctly handle counter overflows which may occur up to once during a gen2 observing night. Both sides of the range are inclusive

Instead of using this directly you probably want BinDirectoryDF which provides methods for getting a lazy polars.LazyFrame which supports a powerful and effecient query syntax (Requires the polars feature to be enabled, ie "pybinney[polars]" in your pyproject.toml)

Usage

bindir = BinDirectory("/nfs/dark/data/ScienceData/Subaru/20201006/", "./parquet-cache", True)
bindir.convert_all()

Parameters

BinDirectory(bindir: pathlib.Path | str, parquet_dir: pathlib.Path | str, /, *, progress: bool = False) -> BinDirectory

  • bindir: pathlib.Path | str A folder full of gen2 format .bin files
  • parquet_dir: pathlib.Path | str
  • progress: bool Show a progress bar during large conversions (BinDirectory.convert_all
def read_all(self, overwrite=False) -> polars.lazyframe.frame.LazyFrame:
13        def read_all(self, overwrite=False) -> pl.LazyFrame:
14            """
15                Retrieve a lazy polars dataframe for processing all of the photons in this bin directory
16            """
17            files = self.convert_all(overwrite=overwrite)
18            return pl.scan_parquet(files)

Retrieve a lazy polars dataframe for processing all of the photons in this bin directory

def read_timerange( self, timerange: TimestampRange, overwrite=False) -> polars.lazyframe.frame.LazyFrame:
20        def read_timerange(self, timerange: TimestampRange, overwrite=False) -> pl.LazyFrame:
21            """
22                Retrieve a lazy polars dataframe for processing a specific timerange of photons in
23                this bin directory
24            """
25            files = self.convert_timerange(timerange, overwrite=overwrite)
26            return pl.scan_parquet(files)

Retrieve a lazy polars dataframe for processing a specific timerange of photons in this bin directory

def read_timeranges( self, timeranges: list[TimestampRange], overwrite=False) -> polars.lazyframe.frame.LazyFrame:
28        def read_timeranges(self, timeranges: list[TimestampRange], overwrite=False) -> pl.LazyFrame:
29            """
30                Retrieve a lazy polars dataframe for processing each timerange in timeranges
31            """
32            fileses = self.convert_timeranges(timeranges, overwrite=overwrite)
33            return [pl.scan_parquet(files) for files in fileses]

Retrieve a lazy polars dataframe for processing each timerange in timeranges

class BinDirectory:

Represents a folder full of bin files and provides access methods

This is intentionally opaque to force you to use the provided methods for operations which correctly handle counter overflows which may occur up to once during a gen2 observing night. Both sides of the range are inclusive

Instead of using this directly you probably want BinDirectoryDF which provides methods for getting a lazy polars.LazyFrame which supports a powerful and effecient query syntax (Requires the polars feature to be enabled, ie "pybinney[polars]" in your pyproject.toml)

Usage

bindir = BinDirectory("/nfs/dark/data/ScienceData/Subaru/20201006/", "./parquet-cache", True)
bindir.convert_all()

Parameters

BinDirectory(bindir: pathlib.Path | str, parquet_dir: pathlib.Path | str, /, *, progress: bool = False) -> BinDirectory

  • bindir: pathlib.Path | str A folder full of gen2 format .bin files
  • parquet_dir: pathlib.Path | str
  • progress: bool Show a progress bar during large conversions (BinDirectory.convert_all
def convert_all(self, /, overwrite):

Convert all bin files in the bin directory into parquet files

If overwrite is set this will overwrite existing parquet files otherwise it will only overwrite a parquet file if the corresponding bin file has changed since the parquet file was last written

def convert_timerange(self, /, trange, overwrite):

Convert a TimestampRange length

def convert_timeranges(self, /, tranges, overwrite):

Convert a set of TimestampRanges

class TimestampRange:

Represents a range of us gen2 timestamps

This is intentionally opaque to force you to use the provided methods for operations which correctly handle counter overflows which may occur up to once during a gen2 observing night. Both sides of the range are inclusive

Usage

timerange = TimestampRange(10, 100)
assert(timerange.inside(50))
other = TimestampRange(100, 500)
assert(other.overlaps(timerange))

Parameters

TimestampRange(start: int, stop: int) -> TimestampRange

  • start: int The start time for this timerange in ticks
  • stop: int The stop time for this timerange in ticks
def inside(self, /, timestamp):

Check if a given timestamp is inside the range

def overlaps(self, /, other):

Check if another timestamp range overlaps with this one

def grow(self, /, tolerance):

Grow the range by tolerance ticks on either side, can be negative to shrink a range. This returns a new timestamp range instead of mutating the current one.

class BinSerializer:

Serializes one or more polars dataframes into gen2 .bin files

This expects a polars dataframe with the same schema provided by BinDirectoryDF.read_timerange, namely:

Schema({
    'xy': UInt16,
    'timestamp': Int64,
    'phase': Int32,
    'baseline': Int32,
    'bar': Int64
})

It also expects every cell in the dataframe to be non-empty and every pixel to be present exactly once in the board-to-pixel mapping

Usage

# Board 234 with pixels (x: 1, y: 1) and (x: 10, y: 7)
# Board 236 with pixel (x: 0, y: 0)
# Assume the year part of the timestamp is in 2022
# A skew of -1ms between the nominal bin edges and the serialized ones
serializer = BinSerializer(
    directory="./mybins",
    board2pixel={
        234: [(1 << 8) | 1, (10 << 8) | 7],
        236: [0]
    },
    year=2022,
    skew=-1000
)
serializer.serialize_pydataframe(mydf)
serializer.serialize_pydataframe(mydf)

Parameters

BinSerializer(directory: pathlib.Path | str, board2pixel: dict[int, list[int]], year: int, skew: int) -> BinSerializer

  • directory: pathlib.Path | str The output directory for bin files
  • board2pixel: dict[int, list[int]] A dictionary mapping board numbers to a set of pixel xy numbers which must be uniquely mapped to a board
  • year: int The year to serialize as, gen2 timestamps cover a roughly 390 day range, they are referenced to the microseconds from the start of the year the run was taken in
  • skew: int The skew between the nominal second each bin file covers and the true second it covers in microseconds
def serialize_pydataframe(self, /, df):

Serialize a polars.DataFrame with appropriate schema into bin files

Currently this will overwrite any existing bin files in the same folder if they exist in the same time range and were created by a different BinSerializer method. It will not overwrite binfiles written by previous calls to this method by virtue of refusing to serialize subsequent dataframes if it has previously written a dataframe with a timestamp range coming within 1 second of this one. This requirement may be relaxed in a future release if it is necessary.