binney
The binney library provides several interfaces
1from .binney import * 2from .binney import __all__ 3from .binney import __doc__ 4 5from .binney import cli as cli 6 7from pathlib import Path 8 9try: 10 import polars as pl 11 class BinDirectoryDF(BinDirectory): 12 def read_all(self, overwrite=False) -> pl.LazyFrame: 13 """ 14 Retrieve a lazy polars dataframe for processing all of the photons in this bin directory 15 """ 16 files = self.convert_all(overwrite=overwrite) 17 return pl.scan_parquet(files) 18 19 def read_timerange(self, timerange: TimestampRange, overwrite=False) -> pl.LazyFrame: 20 """ 21 Retrieve a lazy polars dataframe for processing a specific timerange of photons in 22 this bin directory 23 """ 24 files = self.convert_timerange(timerange, overwrite=overwrite) 25 return pl.scan_parquet(files) 26 27 def read_timeranges(self, timeranges: list[TimestampRange], overwrite=False) -> pl.LazyFrame: 28 """ 29 Retrieve a lazy polars dataframe for processing each timerange in timeranges 30 """ 31 fileses = self.convert_timeranges(timeranges, overwrite=overwrite) 32 return [pl.scan_parquet(files) for files in fileses] 33 34 __all__ = [BinDirectoryDF] + __all__ 35except ImportError: 36 pass
12 class BinDirectoryDF(BinDirectory): 13 def read_all(self, overwrite=False) -> pl.LazyFrame: 14 """ 15 Retrieve a lazy polars dataframe for processing all of the photons in this bin directory 16 """ 17 files = self.convert_all(overwrite=overwrite) 18 return pl.scan_parquet(files) 19 20 def read_timerange(self, timerange: TimestampRange, overwrite=False) -> pl.LazyFrame: 21 """ 22 Retrieve a lazy polars dataframe for processing a specific timerange of photons in 23 this bin directory 24 """ 25 files = self.convert_timerange(timerange, overwrite=overwrite) 26 return pl.scan_parquet(files) 27 28 def read_timeranges(self, timeranges: list[TimestampRange], overwrite=False) -> pl.LazyFrame: 29 """ 30 Retrieve a lazy polars dataframe for processing each timerange in timeranges 31 """ 32 fileses = self.convert_timeranges(timeranges, overwrite=overwrite) 33 return [pl.scan_parquet(files) for files in fileses]
Represents a folder full of bin files and provides access methods
This is intentionally opaque to force you to use the provided methods for operations which correctly handle counter overflows which may occur up to once during a gen2 observing night. Both sides of the range are inclusive
Instead of using this directly you probably want BinDirectoryDF
which provides methods for getting a lazy polars.LazyFrame
which
supports a powerful and effecient query syntax (Requires the polars
feature to be enabled, ie "pybinney[polars]"
in your pyproject.toml
)
Usage
bindir = BinDirectory("/nfs/dark/data/ScienceData/Subaru/20201006/", "./parquet-cache", True)
bindir.convert_all()
Parameters
BinDirectory(bindir: pathlib.Path | str, parquet_dir: pathlib.Path | str, /, *, progress: bool = False) -> BinDirectory
bindir: pathlib.Path | str
A folder full of gen2 format.bin
filesparquet_dir: pathlib.Path | str
progress: bool
Show a progress bar during large conversions (BinDirectory.convert_all
13 def read_all(self, overwrite=False) -> pl.LazyFrame: 14 """ 15 Retrieve a lazy polars dataframe for processing all of the photons in this bin directory 16 """ 17 files = self.convert_all(overwrite=overwrite) 18 return pl.scan_parquet(files)
Retrieve a lazy polars dataframe for processing all of the photons in this bin directory
20 def read_timerange(self, timerange: TimestampRange, overwrite=False) -> pl.LazyFrame: 21 """ 22 Retrieve a lazy polars dataframe for processing a specific timerange of photons in 23 this bin directory 24 """ 25 files = self.convert_timerange(timerange, overwrite=overwrite) 26 return pl.scan_parquet(files)
Retrieve a lazy polars dataframe for processing a specific timerange of photons in this bin directory
28 def read_timeranges(self, timeranges: list[TimestampRange], overwrite=False) -> pl.LazyFrame: 29 """ 30 Retrieve a lazy polars dataframe for processing each timerange in timeranges 31 """ 32 fileses = self.convert_timeranges(timeranges, overwrite=overwrite) 33 return [pl.scan_parquet(files) for files in fileses]
Retrieve a lazy polars dataframe for processing each timerange in timeranges
Inherited Members
Represents a folder full of bin files and provides access methods
This is intentionally opaque to force you to use the provided methods for operations which correctly handle counter overflows which may occur up to once during a gen2 observing night. Both sides of the range are inclusive
Instead of using this directly you probably want BinDirectoryDF
which provides methods for getting a lazy polars.LazyFrame
which
supports a powerful and effecient query syntax (Requires the polars
feature to be enabled, ie "pybinney[polars]"
in your pyproject.toml
)
Usage
bindir = BinDirectory("/nfs/dark/data/ScienceData/Subaru/20201006/", "./parquet-cache", True)
bindir.convert_all()
Parameters
BinDirectory(bindir: pathlib.Path | str, parquet_dir: pathlib.Path | str, /, *, progress: bool = False) -> BinDirectory
bindir: pathlib.Path | str
A folder full of gen2 format.bin
filesparquet_dir: pathlib.Path | str
progress: bool
Show a progress bar during large conversions (BinDirectory.convert_all
Represents a range of us gen2 timestamps
This is intentionally opaque to force you to use the provided methods for operations which correctly handle counter overflows which may occur up to once during a gen2 observing night. Both sides of the range are inclusive
Usage
timerange = TimestampRange(10, 100)
assert(timerange.inside(50))
other = TimestampRange(100, 500)
assert(other.overlaps(timerange))
Parameters
TimestampRange(start: int, stop: int) -> TimestampRange
start: int
The start time for this timerange in ticksstop: int
The stop time for this timerange in ticks
Serializes one or more polars dataframes into gen2 .bin
files
This expects a polars dataframe with the same schema provided by
BinDirectoryDF.read_timerange
, namely:
Schema({
'xy': UInt16,
'timestamp': Int64,
'phase': Int32,
'baseline': Int32,
'bar': Int64
})
It also expects every cell in the dataframe to be non-empty and every pixel to be present exactly once in the board-to-pixel mapping
Usage
# Board 234 with pixels (x: 1, y: 1) and (x: 10, y: 7)
# Board 236 with pixel (x: 0, y: 0)
# Assume the year part of the timestamp is in 2022
# A skew of -1ms between the nominal bin edges and the serialized ones
serializer = BinSerializer(
directory="./mybins",
board2pixel={
234: [(1 << 8) | 1, (10 << 8) | 7],
236: [0]
},
year=2022,
skew=-1000
)
serializer.serialize_pydataframe(mydf)
serializer.serialize_pydataframe(mydf)
Parameters
BinSerializer(directory: pathlib.Path | str, board2pixel: dict[int, list[int]], year: int, skew: int) -> BinSerializer
directory: pathlib.Path | str
The output directory for bin filesboard2pixel: dict[int, list[int]]
A dictionary mapping board numbers to a set of pixel xy numbers which must be uniquely mapped to a boardyear: int
The year to serialize as, gen2 timestamps cover a roughly 390 day range, they are referenced to the microseconds from the start of the year the run was taken inskew: int
The skew between the nominal second each bin file covers and the true second it covers in microseconds
Serialize a polars.DataFrame
with appropriate schema into bin files
Currently this will overwrite any existing bin files in the same folder if they exist in the same time range and were created by a different BinSerializer method. It will not overwrite binfiles written by previous calls to this method by virtue of refusing to serialize subsequent dataframes if it has previously written a dataframe with a timestamp range coming within 1 second of this one. This requirement may be relaxed in a future release if it is necessary.