Skip to content

Commit

Permalink
Merge pull request #23 from lgingerich/release/v0.1.6
Browse files Browse the repository at this point in the history
fix: handle bigquery partition time range limits
  • Loading branch information
lgingerich authored Feb 27, 2025
2 parents 110ab3c + 1ea8b8a commit 65283ac
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 15 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ Features include:

> ⚠️ **Note**: This project is in active development and may have frequent breaking changes. It is not recommended for production use yet.
> ⚠️ **Note**: BigQuery only supports partitioned tables with a maximum of 10 years of history. It is not recommended to use this indexer with Ethereum.

## Prerequisites
- Rust 1.75+
Expand Down
16 changes: 10 additions & 6 deletions src/indexer/rpc/blocks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use crate::models::datasets::transactions::{
ZKsyncRpcTransactionData,
};
use crate::models::errors::BlockError;
use crate::utils::hex_to_u64;
use crate::utils::{hex_to_u64, sanitize_block_time};

pub trait BlockParser {
fn parse_header(&self, chain: Chain) -> Result<Vec<RpcHeaderData>>;
Expand All @@ -30,13 +30,17 @@ impl BlockParser for AnyRpcBlock {
let inner = self.header.inner.clone();
let other = self.other.clone();

// Get the block timestamp and convert to DateTime
let original_time =
DateTime::from_timestamp(inner.timestamp as i64, 0).expect("invalid timestamp");

// Sanitize the block time if it's block 0 with a 1970 date
let block_time = sanitize_block_time(inner.number, original_time);

// Define common fields that exist across all chains
let common = CommonRpcHeaderData {
block_time: DateTime::from_timestamp(inner.timestamp as i64, 0)
.expect("invalid timestamp"),
block_date: DateTime::from_timestamp(inner.timestamp as i64, 0)
.expect("invalid timestamp")
.date_naive(),
block_time,
block_date: block_time.date_naive(),
block_number: inner.number,
block_hash: self.header.hash,
parent_hash: inner.parent_hash,
Expand Down
25 changes: 17 additions & 8 deletions src/indexer/rpc/receipts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::models::datasets::transactions::{
CommonRpcTransactionReceiptData, EthereumRpcTransactionReceiptData, RpcTransactionReceiptData,
ZKsyncRpcTransactionReceiptData,
};
use crate::utils::hex_to_u64;
use crate::utils::{hex_to_u64, sanitize_block_time};

pub trait ReceiptParser {
fn parse_transaction_receipts(&self, chain: Chain) -> Result<Vec<RpcTransactionReceiptData>>;
Expand Down Expand Up @@ -94,14 +94,23 @@ impl ReceiptParser for Vec<AnyTransactionReceipt> {
.clone()
.into_iter()
.map(|log| {
// Get original block time from timestamp if available
let original_time = log
.block_timestamp
.and_then(|ts| DateTime::from_timestamp(ts as i64, 0));

// Sanitize the block time if it's block 0 with a 1970 date
let block_time = if let (Some(block_num), Some(time)) =
(log.block_number, original_time)
{
Some(sanitize_block_time(block_num, time))
} else {
original_time
};

let common = CommonRpcLogReceiptData {
block_time: log
.block_timestamp
.and_then(|ts| DateTime::from_timestamp(ts as i64, 0)),
block_date: log
.block_timestamp
.and_then(|ts| DateTime::from_timestamp(ts as i64, 0))
.map(|dt| dt.date_naive()),
block_time,
block_date: block_time.map(|time| time.date_naive()),
block_number: log.block_number,
block_hash: log.block_hash,
tx_hash: log.transaction_hash,
Expand Down
32 changes: 31 additions & 1 deletion src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,45 @@ pub mod rate_limiter;
pub mod retry;

use anyhow::{Context, Result};
use chrono::{DateTime, NaiveDate, Utc};
use std::{fs, path::Path};
use tracing::info;
use tracing::{info, warn};

use crate::models::common::Config;

pub fn hex_to_u64(hex: String) -> Option<u64> {
u64::from_str_radix(hex.trim_start_matches("0x"), 16).ok()
}

// Sanitizes block dates for block 0 to avoid BigQuery partitioning errors.
// BigQuery only supports partitioning up to 3650 days in the past (i.e. 10 years).
// If block 0 has a date in 1970 (Unix epoch), replaces it with January 1, 2020.
// January 1, 2020 is an arbitrary date which will allow this indexer to work properly
// until the year 2030.
// NOTE: This indexer with BigQuery should not be used for Ethereum. While it could work
// if the sanitized date was set to 2015 when Ethereum was launched, that will soon be
// exceeded and then the indexer will fail for all chains. As this is tailored towards
// usage with L2s, we will focus on safe usage with L2s.

pub fn sanitize_block_time(block_number: u64, datetime: DateTime<Utc>) -> DateTime<Utc> {
// If this is block 0 with a Unix epoch date (or very close to it)
if block_number == 0 && datetime.format("%Y").to_string() == "1970" {
// Use January 1, 2020 as the fallback date
let fallback_date = NaiveDate::from_ymd_opt(2020, 1, 1).unwrap();
let fallback_time = datetime.time();
let fallback_datetime =
DateTime::<Utc>::from_naive_utc_and_offset(fallback_date.and_time(fallback_time), Utc);

warn!(
"Sanitized block 0 time from {} (Unix epoch) to {} to avoid BigQuery partitioning errors",
datetime, fallback_datetime
);
fallback_datetime
} else {
datetime
}
}

pub fn load_config<P: AsRef<Path>>(file_name: P) -> Result<Config> {
// Build the path to the config file
let manifest_dir = env!("CARGO_MANIFEST_DIR").to_string();
Expand Down

0 comments on commit 65283ac

Please sign in to comment.