Skip to content

ge.io — API reference

read_csv

Python
read_csv(path, sep: str = ',', skiprows: Optional[int] = None, encoding: str = 'utf-8', units_row: bool = False) -> dict

Read a geotech CSV file.

PARAMETER DESCRIPTION
path

File path.

TYPE: str or Path

sep

Field separator. Default ','.

TYPE: str DEFAULT: ','

skiprows

Number of header rows to skip. If None, auto-detected.

TYPE: int DEFAULT: None

encoding

File encoding. Default 'utf-8'.

TYPE: str DEFAULT: 'utf-8'

units_row

If True, treat the row after the header as a units row (returned in the 'units' dict) and skip it for numeric parsing.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
dict

{'data': dict[col -> ndarray], 'df': pd.DataFrame|None, 'units': dict|None, 'header_row': int}

Source code in geoeq/io/csv_reader.py
Python
def read_csv(path, sep: str = ",", skiprows: Optional[int] = None,
              encoding: str = "utf-8", units_row: bool = False) -> dict:
    """Read a geotech CSV file.

    Parameters
    ----------
    path : str or Path
        File path.
    sep : str
        Field separator. Default ','.
    skiprows : int, optional
        Number of header rows to skip. If None, auto-detected.
    encoding : str
        File encoding. Default 'utf-8'.
    units_row : bool
        If True, treat the row after the header as a units row (returned
        in the 'units' dict) and skip it for numeric parsing.

    Returns
    -------
    dict
        ``{'data': dict[col -> ndarray], 'df': pd.DataFrame|None,
        'units': dict|None, 'header_row': int}``
    """
    path = Path(path)
    lines = path.read_text(encoding=encoding).splitlines()

    # Auto-detect header row: first line whose every field, when stripped
    # of brackets, fails to parse as float.
    if skiprows is None:
        skiprows = 0
        for i, line in enumerate(lines[:20]):
            toks = [t.strip() for t in line.split(sep)]
            if not toks or all(t == "" for t in toks):
                continue
            nonnum = sum(1 for t in toks if not _is_number(t))
            if nonnum == len(toks) and len(toks) >= 2:
                skiprows = i
                break

    header = [t.strip() for t in lines[skiprows].split(sep)]
    data_start = skiprows + 1
    units = None
    if units_row and data_start < len(lines):
        units_toks = [t.strip() for t in lines[data_start].split(sep)]
        if not all(_is_number(t) for t in units_toks):
            units = dict(zip(header, units_toks))
            data_start += 1

    rows = []
    for line in lines[data_start:]:
        if not line.strip():
            continue
        toks = [t.strip() for t in line.split(sep)]
        if len(toks) != len(header):
            continue
        try:
            rows.append([float(t) if _is_number(t) else np.nan for t in toks])
        except ValueError:
            continue
    arr = np.array(rows, dtype=float) if rows else np.empty((0, len(header)))
    data = {col: arr[:, i] for i, col in enumerate(header)}

    df = None
    try:
        import pandas as pd
        df = pd.DataFrame(data)
    except ImportError:
        pass

    return {"data": data, "df": df, "units": units,
            "header_row": skiprows, "n_rows": len(rows)}

read_ags

Python
read_ags(path, encoding: str = 'utf-8') -> Dict[str, dict]

Parse an AGS4 file and return all groups as dicts.

PARAMETER DESCRIPTION
path

AGS file path.

TYPE: str or Path

encoding

File encoding (AGS4 specifies UTF-8). Default 'utf-8'.

TYPE: str DEFAULT: 'utf-8'

RETURNS DESCRIPTION
dict

{group_name: {'headings': [...], 'units': [...], 'data': [{...}, ...]}}

Reference

AGS (2017) -- AGS4 data-transfer specification.

Source code in geoeq/io/ags_reader.py
Python
def read_ags(path, encoding: str = "utf-8") -> Dict[str, dict]:
    """Parse an AGS4 file and return all groups as dicts.

    Parameters
    ----------
    path : str or Path
        AGS file path.
    encoding : str
        File encoding (AGS4 specifies UTF-8). Default 'utf-8'.

    Returns
    -------
    dict
        ``{group_name: {'headings': [...], 'units': [...], 'data': [{...}, ...]}}``

    Reference
    ---------
    AGS (2017) -- AGS4 data-transfer specification.
    """
    path = Path(path)
    text = path.read_text(encoding=encoding, errors="replace")

    groups: Dict[str, dict] = {}
    current_group = None
    headings: List[str] = []
    units: List[str] = []
    rows: List[dict] = []

    for line_tokens in csv.reader(text.splitlines(), quotechar='"'):
        if not line_tokens:
            continue
        tag = line_tokens[0]
        if tag == "GROUP":
            # Commit previous group.
            if current_group is not None:
                groups[current_group] = {
                    "headings": headings, "units": units, "data": rows}
            current_group = line_tokens[1] if len(line_tokens) > 1 else None
            headings, units, rows = [], [], []
        elif tag == "HEADING":
            headings = list(line_tokens[1:])
        elif tag == "UNIT":
            units = list(line_tokens[1:])
        elif tag == "TYPE":
            # Field type row -- ignored for read.
            pass
        elif tag == "DATA":
            values = list(line_tokens[1:])
            row = dict(zip(headings, values))
            rows.append(row)

    # Commit last.
    if current_group is not None:
        groups[current_group] = {
            "headings": headings, "units": units, "data": rows}

    return groups

read_gef

Python
read_gef(path, encoding: str = 'latin-1') -> Dict

Read a Dutch GEF-CPT file.

PARAMETER DESCRIPTION
path

Path to the .gef file.

TYPE: str or Path

encoding

File encoding (most GEF files are 'latin-1'). Default.

TYPE: str DEFAULT: 'latin-1'

RETURNS DESCRIPTION
dict

{'header': {...}, 'columns': [...], 'units': [...], 'data': {colname: ndarray, ...}}.

Reference

NEN -- GEF file specification (CPT-Report).

Source code in geoeq/io/gef_reader.py
Python
def read_gef(path, encoding: str = "latin-1") -> Dict:
    """Read a Dutch GEF-CPT file.

    Parameters
    ----------
    path : str or Path
        Path to the .gef file.
    encoding : str
        File encoding (most GEF files are 'latin-1'). Default.

    Returns
    -------
    dict
        ``{'header': {...}, 'columns': [...], 'units': [...],
        'data': {colname: ndarray, ...}}``.

    Reference
    ---------
    NEN -- GEF file specification (CPT-Report).
    """
    path = Path(path)
    text = path.read_text(encoding=encoding, errors="replace")

    header: Dict[str, str] = {}
    column_info: List[dict] = []
    sep = " "
    void_values: List[float] = []
    in_header = True
    data_lines: List[str] = []

    for raw_line in text.splitlines():
        line = raw_line.strip()
        if in_header:
            if line.startswith("#EOH"):
                in_header = False
                continue
            if line.startswith("#"):
                # parse #KEY= value
                if "=" in line:
                    key, _, value = line[1:].partition("=")
                    key = key.strip().upper()
                    value = value.strip()
                    if key == "COLUMNSEPARATOR":
                        sep = value if value else " "
                    elif key == "COLUMNINFO":
                        toks = [t.strip() for t in value.split(",")]
                        if len(toks) >= 3:
                            try:
                                idx = int(toks[0])
                            except ValueError:
                                idx = len(column_info) + 1
                            column_info.append({
                                "index": idx,
                                "unit": toks[1] if len(toks) > 1 else "",
                                "name": toks[2] if len(toks) > 2 else f"col{idx}",
                                "quantity": toks[3] if len(toks) > 3 else "",
                            })
                    elif key == "COLUMNVOID":
                        toks = [t.strip() for t in value.split(",")]
                        try:
                            void_values.append(float(toks[-1]))
                        except (ValueError, IndexError):
                            pass
                    else:
                        header[key] = value
        else:
            if line:
                data_lines.append(line)

    # Parse data
    rows = []
    for line in data_lines:
        parts = [p for p in line.replace("\t", " ").split(sep) if p] \
            if sep != " " else line.split()
        try:
            vals = [float(p) for p in parts]
            rows.append(vals)
        except ValueError:
            continue

    arr = np.array(rows, dtype=float) if rows else \
        np.empty((0, len(column_info)))
    # Replace void values with NaN.
    for v in void_values:
        arr[arr == v] = np.nan

    column_info.sort(key=lambda c: c["index"])
    names = [c["name"] for c in column_info]
    units = [c["unit"] for c in column_info]
    data = {n: arr[:, i] for i, n in enumerate(names) if i < arr.shape[1]}

    return {
        "header": header, "columns": names, "units": units,
        "data": data, "column_info": column_info,
    }

CPT

Python
CPT(depth: Sequence[float], qc: Sequence[float], fs: Sequence[float] = None, u2: Sequence[float] = None, title: str = 'CPT')

Container for a single CPT sounding.

ATTRIBUTE DESCRIPTION
depth

Depth (m).

TYPE: ndarray

qc

Cone tip resistance (MPa or kPa -- callers must be consistent).

TYPE: ndarray

fs

Sleeve friction (kPa).

TYPE: ndarray

u2

Pore pressure behind cone (kPa). Default zeros.

TYPE: ndarray

title

Sounding name.

TYPE: str

Source code in geoeq/io/cpt_container.py
Python
def __init__(
    self,
    depth: Sequence[float],
    qc: Sequence[float],
    fs: Sequence[float] = None,
    u2: Sequence[float] = None,
    title: str = "CPT",
):
    self.depth = np.asarray(depth, dtype=float)
    self.qc = np.asarray(qc, dtype=float)
    self.fs = (np.asarray(fs, dtype=float)
               if fs is not None else np.zeros_like(self.depth))
    self.u2 = (np.asarray(u2, dtype=float)
               if u2 is not None else np.zeros_like(self.depth))
    self.title = title
    self._normalized = None

from_gef classmethod

Python
from_gef(path: Union[str, Path]) -> 'CPT'

Build a CPT from a GEF file.

Source code in geoeq/io/cpt_container.py
Python
@classmethod
def from_gef(cls, path: Union[str, Path]) -> "CPT":
    """Build a CPT from a GEF file."""
    from geoeq.io.gef_reader import read_gef
    gef = read_gef(path)
    d = gef["data"]
    # Common GEF column names: depth, qc, fs, u2 or "Sondeerlengte"...
    depth = _pick(d, ("depth", "z", "sondeerlengte"))
    qc = _pick(d, ("qc", "qc1", "konusw"))
    fs = _pick(d, ("fs", "fs1", "wrijving"), default=None)
    u2 = _pick(d, ("u2", "u", "waterdruk"), default=None)
    return cls(depth=depth, qc=qc, fs=fs, u2=u2,
                title=Path(path).stem)

from_ags classmethod

Python
from_ags(path: Union[str, Path]) -> 'CPT'

Build a CPT from an AGS4 file (uses STCN / SCPT / CPTU groups).

Source code in geoeq/io/cpt_container.py
Python
@classmethod
def from_ags(cls, path: Union[str, Path]) -> "CPT":
    """Build a CPT from an AGS4 file (uses STCN / SCPT / CPTU groups)."""
    from geoeq.io.ags_reader import read_ags
    ags = read_ags(path)
    group = None
    for g in ("SCPT", "STCN", "CPTU", "CPTC"):
        if g in ags:
            group = ags[g]
            break
    if group is None:
        raise ValueError("No CPT-like group (SCPT/STCN/CPTU) in AGS file.")
    depth_key = next((h for h in group["headings"]
                      if "DPTH" in h.upper() or "DEPTH" in h.upper()),
                     group["headings"][0])
    qc_key = next((h for h in group["headings"]
                   if "QC" in h.upper() or "RES" in h.upper()), None)
    fs_key = next((h for h in group["headings"]
                   if "FS" in h.upper()), None)
    u_key = next((h for h in group["headings"]
                  if h.upper() in ("SCPT_U2", "CPTU_U2", "U2")), None)
    rows = group["data"]
    def col(key):
        if key is None:
            return None
        vals = []
        for r in rows:
            try:
                vals.append(float(r.get(key, "nan")))
            except (TypeError, ValueError):
                vals.append(np.nan)
        return np.asarray(vals)
    return cls(depth=col(depth_key), qc=col(qc_key),
                fs=col(fs_key), u2=col(u_key),
                title=Path(path).stem)