您可以try 将字符串"转换"为适当的Json(使用re
),然后使用标准的json.loads
(Regexre
demo):
import re
import json
import pandas as pd
s = "[{idEvento.$oid=63ffaec3cdc01e6352729bad, dataHoraEvento.$date=1677690003377, codigoTipoEvento=1, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=63ffb5c8cdc01e6352729bae, dataHoraEvento.$date=1677691800676, codigoTipoEvento=3, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=6405cc8711c78c20369b4033, dataHoraEvento.$date=1678090851560, codigoTipoEvento=8, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=6422b4c97e45dd75abb4f831, dataHoraEvento.$date=1679985307560, codigoTipoEvento=6, mesAnoReferenciaContabilizacao=032023, _class=br.com.bb.rcp.model.vantagens.HistoricoContabil}, {idEvento.$oid=6422b4c97e45dd75abb4f832, dataHoraEvento.$date=1679985309584, codigoTipoEvento=6, mesAnoReferenciaContabilizacao=032023, _class=br.com.bb.rcp.model.vantagens.HistoricoContabil}]"
s = re.sub(r"([^ =,\[\]\{\}]+)=([^ =,\[\]\{\}]+)", r'"\g<1>":"\g<2>"', s)
data = json.loads(s)
df = pd.DataFrame(data)
print(df)
打印:
idEvento.$oid dataHoraEvento.$date codigoTipoEvento mesAnoReferenciaContabilizacao _class
0 63ffaec3cdc01e6352729bad 1677690003377 1 032023 NaN
1 63ffb5c8cdc01e6352729bae 1677691800676 3 032023 NaN
2 6405cc8711c78c20369b4033 1678090851560 8 032023 NaN
3 6422b4c97e45dd75abb4f831 1679985307560 6 032023 br.com.bb.rcp.model.vantagens.HistoricoContabil
4 6422b4c97e45dd75abb4f832 1679985309584 6 032023 br.com.bb.rcp.model.vantagens.HistoricoContabil
注意:此方法适用于本例,但可能需要针对实际情况调整模式.
编辑:要应用于数据帧,请执行以下操作:
请考虑以下数据帧:
df = pd.DataFrame(
{
"col1": [
"[{idEvento.$oid=01_63ffaec3cdc01e6352729bad, dataHoraEvento.$date=1677690003377, codigoTipoEvento=1, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=63ffb5c8cdc01e6352729bae, dataHoraEvento.$date=1677691800676, codigoTipoEvento=3, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=6405cc8711c78c20369b4033, dataHoraEvento.$date=1678090851560, codigoTipoEvento=8, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=6422b4c97e45dd75abb4f831, dataHoraEvento.$date=1679985307560, codigoTipoEvento=6, mesAnoReferenciaContabilizacao=032023, _class=br.com.bb.rcp.model.vantagens.HistoricoContabil}, {idEvento.$oid=6422b4c97e45dd75abb4f832, dataHoraEvento.$date=1679985309584, codigoTipoEvento=6, mesAnoReferenciaContabilizacao=032023, _class=br.com.bb.rcp.model.vantagens.HistoricoContabil}]",
"[{idEvento.$oid=02_63ffaec3cdc01e6352729bad, dataHoraEvento.$date=1677690003377, codigoTipoEvento=1, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=63ffb5c8cdc01e6352729bae, dataHoraEvento.$date=1677691800676, codigoTipoEvento=3, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=6405cc8711c78c20369b4033, dataHoraEvento.$date=1678090851560, codigoTipoEvento=8, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=6422b4c97e45dd75abb4f831, dataHoraEvento.$date=1679985307560, codigoTipoEvento=6, mesAnoReferenciaContabilizacao=032023, _class=br.com.bb.rcp.model.vantagens.HistoricoContabil}, {idEvento.$oid=6422b4c97e45dd75abb4f832, dataHoraEvento.$date=1679985309584, codigoTipoEvento=6, mesAnoReferenciaContabilizacao=032023, _class=br.com.bb.rcp.model.vantagens.HistoricoContabil}]",
"[{idEvento.$oid=03_63ffaec3cdc01e6352729bad, dataHoraEvento.$date=1677690003377, codigoTipoEvento=1, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=63ffb5c8cdc01e6352729bae, dataHoraEvento.$date=1677691800676, codigoTipoEvento=3, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=6405cc8711c78c20369b4033, dataHoraEvento.$date=1678090851560, codigoTipoEvento=8, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=6422b4c97e45dd75abb4f831, dataHoraEvento.$date=1679985307560, codigoTipoEvento=6, mesAnoReferenciaContabilizacao=032023, _class=br.com.bb.rcp.model.vantagens.HistoricoContabil}, {idEvento.$oid=6422b4c97e45dd75abb4f832, dataHoraEvento.$date=1679985309584, codigoTipoEvento=6, mesAnoReferenciaContabilizacao=032023, _class=br.com.bb.rcp.model.vantagens.HistoricoContabil}]",
]
}
)
col1
0 [{idEvento.$oid=01_63ffaec3cdc01e6352729bad, dataHoraEvento.$date=1677690003377, codigoTipoEvento=1, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=63ffb5c8cdc01e6352729bae, dataHoraEvento.$date=1677691800676, codigoTipoEvento=3, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=6405cc8711c78c20369b4033, dataHoraEvento.$date=1678090851560, codigoTipoEvento=8, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=6422b4c97e45dd75abb4f831, dataHoraEvento.$date=1679985307560, codigoTipoEvento=6, mesAnoReferenciaContabilizacao=032023, _class=br.com.bb.rcp.model.vantagens.HistoricoContabil}, {idEvento.$oid=6422b4c97e45dd75abb4f832, dataHoraEvento.$date=1679985309584, codigoTipoEvento=6, mesAnoReferenciaContabilizacao=032023, _class=br.com.bb.rcp.model.vantagens.HistoricoContabil}]
1 [{idEvento.$oid=02_63ffaec3cdc01e6352729bad, dataHoraEvento.$date=1677690003377, codigoTipoEvento=1, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=63ffb5c8cdc01e6352729bae, dataHoraEvento.$date=1677691800676, codigoTipoEvento=3, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=6405cc8711c78c20369b4033, dataHoraEvento.$date=1678090851560, codigoTipoEvento=8, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=6422b4c97e45dd75abb4f831, dataHoraEvento.$date=1679985307560, codigoTipoEvento=6, mesAnoReferenciaContabilizacao=032023, _class=br.com.bb.rcp.model.vantagens.HistoricoContabil}, {idEvento.$oid=6422b4c97e45dd75abb4f832, dataHoraEvento.$date=1679985309584, codigoTipoEvento=6, mesAnoReferenciaContabilizacao=032023, _class=br.com.bb.rcp.model.vantagens.HistoricoContabil}]
2 [{idEvento.$oid=03_63ffaec3cdc01e6352729bad, dataHoraEvento.$date=1677690003377, codigoTipoEvento=1, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=63ffb5c8cdc01e6352729bae, dataHoraEvento.$date=1677691800676, codigoTipoEvento=3, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=6405cc8711c78c20369b4033, dataHoraEvento.$date=1678090851560, codigoTipoEvento=8, mesAnoReferenciaContabilizacao=032023}, {idEvento.$oid=6422b4c97e45dd75abb4f831, dataHoraEvento.$date=1679985307560, codigoTipoEvento=6, mesAnoReferenciaContabilizacao=032023, _class=br.com.bb.rcp.model.vantagens.HistoricoContabil}, {idEvento.$oid=6422b4c97e45dd75abb4f832, dataHoraEvento.$date=1679985309584, codigoTipoEvento=6, mesAnoReferenciaContabilizacao=032023, _class=br.com.bb.rcp.model.vantagens.HistoricoContabil}]
然后:
def fn(x):
x = re.sub(r"([^ =,\[\]\{\}]+)=([^ =,\[\]\{\}]+)", r'"\g<1>":"\g<2>"', x)
return json.loads(x)
out = df["col1"].apply(fn).explode().apply(pd.Series)
print(out)
打印:
idEvento.$oid dataHoraEvento.$date codigoTipoEvento mesAnoReferenciaContabilizacao _class
0 01_63ffaec3cdc01e6352729bad 1677690003377 1 032023 NaN
0 63ffb5c8cdc01e6352729bae 1677691800676 3 032023 NaN
0 6405cc8711c78c20369b4033 1678090851560 8 032023 NaN
0 6422b4c97e45dd75abb4f831 1679985307560 6 032023 br.com.bb.rcp.model.vantagens.HistoricoContabil
0 6422b4c97e45dd75abb4f832 1679985309584 6 032023 br.com.bb.rcp.model.vantagens.HistoricoContabil
1 02_63ffaec3cdc01e6352729bad 1677690003377 1 032023 NaN
1 63ffb5c8cdc01e6352729bae 1677691800676 3 032023 NaN
1 6405cc8711c78c20369b4033 1678090851560 8 032023 NaN
1 6422b4c97e45dd75abb4f831 1679985307560 6 032023 br.com.bb.rcp.model.vantagens.HistoricoContabil
1 6422b4c97e45dd75abb4f832 1679985309584 6 032023 br.com.bb.rcp.model.vantagens.HistoricoContabil
2 03_63ffaec3cdc01e6352729bad 1677690003377 1 032023 NaN
2 63ffb5c8cdc01e6352729bae 1677691800676 3 032023 NaN
2 6405cc8711c78c20369b4033 1678090851560 8 032023 NaN
2 6422b4c97e45dd75abb4f831 1679985307560 6 032023 br.com.bb.rcp.model.vantagens.HistoricoContabil
2 6422b4c97e45dd75abb4f832 1679985309584 6 032023 br.com.bb.rcp.model.vantagens.HistoricoContabil