您可以使用 Select 器和一些基本的字符串操作来实现这一点.
根据您对这个问题发展的预期,您可以直接跳到
正则表达式,或使用polars.selectors.ends_with/string.removesuffix
字符串后缀操作
此方法使用
- polars.selectors.ends_with # find columns ending with string
- string.removesuffix # remove suffix from end of string
正在翻译为
import polars as pl
from polars import selectors as cs
import numpy as np
import re
from functools import partial
df = pl.DataFrame(
{
"nrs": [1, 2, 3, None, 5],
"names_A0": ["foo", "ham", "spam", "egg", None],
"random_A0": np.random.rand(5),
"A_A2": [True, True, False, False, False],
}
)
digit = 0
suffix = f'_A{digit}'
print(
# keep original A0 columns
df.with_columns(
cs.ends_with(suffix).name.map(lambda s: s.removesuffix(suffix))
),
# shape: (5, 6)
# ┌──────┬──────────┬───────────┬───────┬───────┬──────────┐
# │ nrs ┆ names_A0 ┆ random_A0 ┆ A_A2 ┆ names ┆ random │
# │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
# │ i64 ┆ str ┆ f64 ┆ bool ┆ str ┆ f64 │
# ╞══════╪══════════╪═══════════╪═══════╪═══════╪══════════╡
# │ 1 ┆ foo ┆ 0.713324 ┆ true ┆ foo ┆ 0.713324 │
# │ 2 ┆ ham ┆ 0.980031 ┆ true ┆ ham ┆ 0.980031 │
# │ 3 ┆ spam ┆ 0.242768 ┆ false ┆ spam ┆ 0.242768 │
# │ null ┆ egg ┆ 0.528783 ┆ false ┆ egg ┆ 0.528783 │
# │ 5 ┆ null ┆ 0.583206 ┆ false ┆ null ┆ 0.583206 │
# └──────┴──────────┴───────────┴───────┴───────┴──────────┘
# drop original A0 columns
df.select(
~cs.ends_with(suffix),
cs.ends_with(suffix).name.map(lambda s: s.removesuffix(suffix))
),
# shape: (5, 4)
# ┌──────┬───────┬───────┬──────────┐
# │ nrs ┆ A_A2 ┆ names ┆ random │
# │ --- ┆ --- ┆ --- ┆ --- │
# │ i64 ┆ bool ┆ str ┆ f64 │
# ╞══════╪═══════╪═══════╪══════════╡
# │ 1 ┆ true ┆ foo ┆ 0.713324 │
# │ 2 ┆ true ┆ ham ┆ 0.980031 │
# │ 3 ┆ false ┆ spam ┆ 0.242768 │
# │ null ┆ false ┆ egg ┆ 0.528783 │
# │ 5 ┆ false ┆ null ┆ 0.583206 │
# └──────┴───────┴───────┴──────────┘
sep='\n\n'
)
正则表达式
或者,您可以使用正则表达式来检测一系列后缀模式
- polars.selectors.matches # find columns matching a pattern
- re.sub # substitute in string based on pattern
我们需要确保我们的模式以'$'
结束,以锚定模式
到字符串的末尾.
import polars as pl
from polars import selectors as cs
import numpy as np
import re
from functools import partial
df = pl.DataFrame(
{
"nrs": [1, 2, 3, None, 5],
"names_A0": ["foo", "ham", "spam", "egg", None],
"random_A0": np.random.rand(5),
"A_A2": [True, True, False, False, False],
}
)
digit=0
suffix = fr'_A{digit}$'
print(
# keep original A0 columns
df.with_columns(
cs.matches(suffix).name.map(lambda s: re.sub(suffix, '', s))
),
# shape: (5, 6)
# ┌──────┬──────────┬───────────┬───────┬───────┬──────────┐
# │ nrs ┆ names_A0 ┆ random_A0 ┆ A_A2 ┆ names ┆ random │
# │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
# │ i64 ┆ str ┆ f64 ┆ bool ┆ str ┆ f64 │
# ╞══════╪══════════╪═══════════╪═══════╪═══════╪══════════╡
# │ 1 ┆ foo ┆ 0.713324 ┆ true ┆ foo ┆ 0.713324 │
# │ 2 ┆ ham ┆ 0.980031 ┆ true ┆ ham ┆ 0.980031 │
# │ 3 ┆ spam ┆ 0.242768 ┆ false ┆ spam ┆ 0.242768 │
# │ null ┆ egg ┆ 0.528783 ┆ false ┆ egg ┆ 0.528783 │
# │ 5 ┆ null ┆ 0.583206 ┆ false ┆ null ┆ 0.583206 │
# └──────┴──────────┴───────────┴───────┴───────┴──────────┘
# drop original A0 columns
df.select(
~cs.matches(suffix),
cs.matches(suffix).name.map(lambda s: re.sub(suffix, '', s))
),
# shape: (5, 4)
# ┌──────┬───────┬───────┬──────────┐
# │ nrs ┆ A_A2 ┆ names ┆ random │
# │ --- ┆ --- ┆ --- ┆ --- │
# │ i64 ┆ bool ┆ str ┆ f64 │
# ╞══════╪═══════╪═══════╪══════════╡
# │ 1 ┆ true ┆ foo ┆ 0.713324 │
# │ 2 ┆ true ┆ ham ┆ 0.980031 │
# │ 3 ┆ false ┆ spam ┆ 0.242768 │
# │ null ┆ false ┆ egg ┆ 0.528783 │
# │ 5 ┆ false ┆ null ┆ 0.583206 │
# └──────┴───────┴───────┴──────────┘
sep='\n\n'
)