Skip to content

Commit

Permalink
No public description
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 638306209
Change-Id: I29149616d8d244df0e1b28241221518aa8833dab
  • Loading branch information
marcenacp authored and peterjliu committed May 29, 2024
1 parent 6d8eb31 commit ddc9bf1
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 4 deletions.
5 changes: 3 additions & 2 deletions nanodo/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.
"""Data pipeline."""

from collections.abc import Mapping, Sequence
import dataclasses
import enum
import functools
Expand Down Expand Up @@ -112,11 +113,11 @@ def get_tokenizer(self) -> spm.SentencePieceProcessor:


def _py_tokenize(
features: dict[str, str],
features: Mapping[str, str],
spt: _SPTokenizer,
pad_len: int | None = None,
pad_id: int = PAD_ID,
) -> list[int]:
) -> Sequence[int]:
"""Tokenizes text into ids, optionally pads or truncates to pad_len."""
text = features['text']
tokenizer = spt.get_tokenizer()
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ dependencies = [
"optax>=0.2.2",
"orbax>=0.1.7",
"sentencepiece>=0.2.0",
"tensorflow-datasets>=4.9.4",
# TODO: Temporary fix while waiting for TFDS to be released.
"tfds-nightly",
"tensorflow>=2.16.1",
]

Expand Down
1 change: 0 additions & 1 deletion tests/train_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ def test_train_and_evaluate(self, preprocessing):

c = _get_config(self)
c.checkpoint = True
c.pygrain_worker_count = 0

cfg = model.DoConfig(**c.model, V=c.V)
m = model.TransformerDo(cfg)
Expand Down

0 comments on commit ddc9bf1

Please sign in to comment.