Skip to content

Commit

Permalink
polars
Browse files Browse the repository at this point in the history
  • Loading branch information
szabgab committed Jan 17, 2025
1 parent fd531ad commit 8e4b1db
Show file tree
Hide file tree
Showing 4 changed files with 976 additions and 0 deletions.
5 changes: 5 additions & 0 deletions python/examples/polars/getting_started.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
name,birthdate,weight,height
Alice Archer,1997-01-10,57.9,1.56
Ben Brown,1985-02-15,72.5,1.77
Chloe Cooper,1983-03-22,53.6,1.65
Daniel Donovan,1981-04-30,83.1,1.75
303 changes: 303 additions & 0 deletions python/examples/polars/getting_started.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,303 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "bc5909c3-0e34-46b7-af44-d8b59bbd1817",
"metadata": {},
"outputs": [],
"source": [
"!pip install polars"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0422d14b-b3d5-407b-8594-31633a056594",
"metadata": {},
"outputs": [],
"source": [
"import polars as pl\n",
"import datetime as dt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ef584243-839b-47a3-a34a-52b5d8d5d4c2",
"metadata": {},
"outputs": [],
"source": [
"df = pl.DataFrame(\n",
" {\n",
" \"name\": [\"Alice Archer\", \"Ben Brown\", \"Chloe Cooper\", \"Daniel Donovan\"],\n",
" \"birthdate\": [\n",
" dt.date(1997, 1, 10),\n",
" dt.date(1985, 2, 15),\n",
" dt.date(1983, 3, 22),\n",
" dt.date(1981, 4, 30),\n",
" ],\n",
" \"weight\": [57.9, 72.5, 53.6, 83.1], # (kg)\n",
" \"height\": [1.56, 1.77, 1.65, 1.75], # (m)\n",
" }\n",
")\n",
"\n",
"print(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2943f6c6-2989-4b66-ac3f-37f218d578bb",
"metadata": {},
"outputs": [],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7904a22a-aced-4618-951c-80afaeaf7ba5",
"metadata": {},
"outputs": [],
"source": [
"dir(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6cb1363a-0706-4fb8-9ce1-284c2ce14720",
"metadata": {},
"outputs": [],
"source": [
"df.head(2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "832f56a2-a543-4db5-ab41-be2c8cfa989a",
"metadata": {},
"outputs": [],
"source": [
"df.tail(2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a0ffa702-8f7e-488d-9175-82a2ae9c8738",
"metadata": {},
"outputs": [],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "362dc935-407a-44be-bbc9-1a7111b852ba",
"metadata": {},
"outputs": [],
"source": [
"df.write_csv(\"getting_started.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7bfde105-95dc-4b3f-9763-1f2e132b9b0e",
"metadata": {},
"outputs": [],
"source": [
"df_csv = pl.read_csv(\"getting_started.csv\", try_parse_dates=True)\n",
"print(df_csv)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70a7e566-33e1-471d-a71d-f7e645787355",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "58e1ecba-46f7-47f6-ad33-f708f07ee28d",
"metadata": {},
"outputs": [],
"source": [
"result = df.select(\n",
" pl.col(\"name\"),\n",
" (pl.col(\"weight\", \"height\") * 0.95).round(2).name.suffix(\"-5%\"),\n",
")\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "39d7eecf-dea3-409d-b904-5b260de8ca7d",
"metadata": {},
"outputs": [],
"source": [
"result = df.with_columns(\n",
" birth_year=pl.col(\"birthdate\").dt.year(),\n",
" bmi=pl.col(\"weight\") / (pl.col(\"height\") ** 2),\n",
")\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dbfce594-67a9-47d4-bdc1-5488f532163d",
"metadata": {},
"outputs": [],
"source": [
"result = df.filter(pl.col(\"birthdate\").dt.year() < 1990)\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1870d98c-d320-4fd3-8f03-4f43c8958a7c",
"metadata": {},
"outputs": [],
"source": [
"result = df.filter(\n",
" pl.col(\"birthdate\").is_between(dt.date(1982, 12, 31), dt.date(1996, 1, 1)),\n",
" pl.col(\"height\") < 1.7,\n",
")\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ac8411e7-5f8a-46ad-aca4-83892af2b80b",
"metadata": {},
"outputs": [],
"source": [
"result = df.group_by(\n",
" (pl.col(\"birthdate\").dt.year() // 10 * 10).alias(\"decade\"),\n",
" maintain_order=True,\n",
").len()\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "be3c0d55-c16c-4cda-8d04-c0fb56007833",
"metadata": {},
"outputs": [],
"source": [
"result = df.group_by(\n",
" (pl.col(\"birthdate\").dt.year() // 10 * 10).alias(\"decade\"),\n",
" maintain_order=True,\n",
").agg(\n",
" pl.len().alias(\"sample_size\"),\n",
" pl.col(\"weight\").mean().round(2).alias(\"avg_weight\"),\n",
" pl.col(\"height\").max().alias(\"tallest\"),\n",
")\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29c60262-5dbf-49d6-b27f-fd81e11068fb",
"metadata": {},
"outputs": [],
"source": [
"result = (\n",
" df.with_columns(\n",
" (pl.col(\"birthdate\").dt.year() // 10 * 10).alias(\"decade\"),\n",
" pl.col(\"name\").str.split(by=\" \").list.first(),\n",
" )\n",
" .select(\n",
" pl.all().exclude(\"birthdate\"),\n",
" )\n",
" .group_by(\n",
" pl.col(\"decade\"),\n",
" maintain_order=True,\n",
" )\n",
" .agg(\n",
" pl.col(\"name\"),\n",
" pl.col(\"weight\", \"height\").mean().round(2).name.prefix(\"avg_\"),\n",
" )\n",
")\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe9d8375-97a1-40f1-8c17-7b4e2994059e",
"metadata": {},
"outputs": [],
"source": [
"df2 = pl.DataFrame(\n",
" {\n",
" \"name\": [\"Ben Brown\", \"Daniel Donovan\", \"Alice Archer\", \"Chloe Cooper\"],\n",
" \"parent\": [True, False, False, False],\n",
" \"siblings\": [1, 2, 3, 4],\n",
" }\n",
")\n",
"print(df2)\n",
"print(df.join(df2, on=\"name\", how=\"left\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "477e99a9-a13a-457c-bcb2-2f45154a86f7",
"metadata": {},
"outputs": [],
"source": [
"df3 = pl.DataFrame(\n",
" {\n",
" \"name\": [\"Ethan Edwards\", \"Fiona Foster\", \"Grace Gibson\", \"Henry Harris\"],\n",
" \"birthdate\": [\n",
" dt.date(1977, 5, 10),\n",
" dt.date(1975, 6, 23),\n",
" dt.date(1973, 7, 22),\n",
" dt.date(1971, 8, 3),\n",
" ],\n",
" \"weight\": [67.9, 72.5, 57.6, 93.1], # (kg)\n",
" \"height\": [1.76, 1.6, 1.66, 1.8], # (m)\n",
" }\n",
")\n",
"print(df3)\n",
"print(pl.concat([df, df3], how=\"vertical\"))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit 8e4b1db

Please sign in to comment.