这是我的密码.我的工作是使用代码检测到文件夹中的一堆文本文件,然后将字符串解析为CSV文件输出的数据.你能就如何做这件事给我一些提示吗?我在苦苦挣扎.
我的代码的第一步是检测数据在txt文件中的位置.我发现所有数据都以‘Read’开头,然后我找到了每个文件中数据的开始行.在那之后,我在如何将数据输出导出为CSV文件方面遇到了困难.
import os
import argparse
import csv
from typing import List
def validate_directory(path):
if os.path.isdir(path):
return path
else:
raise NotADirectoryError(path)
def get_data_from_file(file) -> List[str]:
ignore_list = ["Read Segment", "Read Disk", "Read a line", "Read in"]
data = []
with open(file, "r", encoding="latin1") as f:
try:
lines = f.readlines()
except Exception as e:
print(f"Unable to process {file}: {e}")
return []
for line_number, line in enumerate(lines, start=1):
if not any(variation in line for variation in ignore_list):
if line.strip().startswith("Read ") and not line.strip().startswith("Read ("): # TODO: fix this with better regex
data.append(f'Found "Read" at line {line_number} in {file}')
print(f'Found "Read" at {file}:{line_number}')
print(lines[line_number-1])
return data
def list_read_data(directory_path: str) -> List[str]:
total_data = []
for root, _, files in os.walk(directory_path):
for file_name in files:
if file_name.endswith(".txt"):
data = get_data_from_file(os.path.join(root, file_name))
total_data.extend(data)
return total_data
def write_results_to_csv(output_file: str, data: List[str]):
with open(output_file, "w", newline="", encoding="utf-8") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Results"])
for line in data:
writer.writerow([line])
def main(directory_path: str, output_file: str):
data = list_read_data(directory_path)
write_results_to_csv(output_file, data)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Process the 2020Model folder for input data."
)
parser.add_argument(
"--directory", type=validate_directory, help="folder to be processed"
)
parser.add_argument("--output", type=str, help="Output file name (e.g., outputfile.csv)", default="outputfile.csv")
args = parser.parse_args()
main(os.path.abspath(args.directory), args.output)
以下是我理想的CSV输出数据:
1985 | 1986 | 1986 | 1987 | 1988 | 1989 | 1990 | 1991 | 1992 | 1993 | 1994 |
---|---|---|---|---|---|---|---|---|---|---|
37839 | 36962 | 37856 | 41971 | 40838 | 44640.87 | 42826.34 | 44883.03 | 43077.59 | 45006.49 | 46789 |
你能给我一些提示吗?
- 将字符串解析放在哪里?
- 如何输出为CSV文件.
下面是一个示例txt文件:
Select Year(2007-2025)
Read TotPkSav
/2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
00 27 53 78 108 133 151 161 169 177 186 195 205 216 229 242 257 273 288