Data processing
The main ones organized here are .txt
, .json
, .csv
, .excel
, .pkl
Data storage and conversion between files
1. txt part
1.1 List[dict] conversion to txt
specific code
def save_jsonlist_txt(json_data, target_path): """ Save json list to txt file """ with open(target_path, "w", encoding="utf-8") as file: for item in json_data: item = str(item).replace("\ ", "\\ ") file.write(item) file.write("\ ") def load_jsonlist_txt(source_path): """ Load json list from txt file """ json_list = [] try: with open(source_path, "r", encoding="utf-8") as file: for line in file: try: json_data = eval(line.strip()) json_list.append(json_data) except json.JSONDecodeError: print(f"Unable to parse Json data: {<!-- -->line.strip()}") return json_list except FileNotFoundError: print(f"File '{<!-- -->source_path} not found'")
1.2 List[str] mutual conversion txt
def save_str_list_txt(str_list, target_path): """ Save str list to txt file """ with open(target_path, "w", encoding="utf-8") as file: for item in str_list: item = item.replace("\ ", "\\ ") file.write(item) file.write("\ ") def load_strlist_txt(file_path): """ Load str list from txt file """ txt_list = [] try: with open(file_path, "r", encoding="utf-8") as file: for line in file.readlines(): txt_list.append(line.strip()) return txt_list except FileNotFoundError: print(f"File '{<!-- -->file_path} not found'")
1.3 Overall code for txt related operations
import json def save_json_list_txt(json_data, target_path): """ Save json list to txt file """ with open(target_path, "w", encoding="utf-8") as file: for item in json_data: item = str(item).replace("\ ", "\\ ") file.write(item) # file.write(str(item)) file.write("\ ") def load_json_list_txt(source_path): """ Load json list from txt file """ json_list = [] try: with open(source_path, "r", encoding="utf-8") as file: for line in file: try: json_data = eval(line.strip()) json_list.append(json_data) except json.JSONDecodeError: print(f"Unable to parse Json data: {<!-- -->line.strip()}") return json_list except FileNotFoundError: print(f"File '{<!-- -->source_path} not found'") def save_str_list_txt(str_list, target_path): """ Save str list to txt file """ with open(target_path, "w", encoding="utf-8") as file: for item in str_list: item = item.replace("\ ", "\\ ") file.write(item) file.write("\ ") def load_str_list_txt(file_path): """ Load str list from txt file """ txt_list = [] try: with open(file_path, "r", encoding="utf-8") as file: for line in file.readlines(): txt_list.append(line.strip()) return txt_list except FileNotFoundError: print(f"File '{<!-- -->file_path} not found'") if __name__ == '__main__': human_list = [ {<!-- --> "name": "jordan\ ", "age": 30, "city": "Nanchang", "list": [3, 6, 0] }, {<!-- --> "name": "John", "age": 30, "city": "Guangzhou", "list": [3, 6, 1] }, {<!-- --> "name": "JiWang", "age": 30, "city": "Shenzhen", "list": [2, 3, 1] }, ] day_list = ["Monday\ \ ", "Tuesday", "Wednesday", "Thursday", "Week⑤", "Saturday", "Sunday"] save_json_list_txt(human_list, "human_list.txt") new_human_list = load_json_list_txt("human_list.txt") print(new_human_list) # print(type(new_human_list)) # print(type(new_human_list[0])) save_str_list_txt(day_list, "day.txt") new_day_list = load_str_list_txt("day.txt") print(new_day_list)
2. json format part
It is recommended here to understand the differences between json.load, json.loads, json.dump and json.dumps
2.1 List[dict] conversion to json
Specific method code
def save_jsonline_json(json_list, target_path): """ json_list ---> jsonline :param json_list: :param target_path: :return: """ with open(target_path, 'w', encoding="utf-8") as json_file: for item in json_list: json.dump(item, json_file, ensure_ascii=False) json_file.write("\ ") print(f"The Json list has been saved to the {<!-- -->target_path} file. Each line is a json object") def load_jsonline_json(source_file): """ jsonline --> json_list :param source_file: :return: json_list """ json_list = [] with open(source_file, "r", encoding="utf-8") as json_file: for line in json_file: try: json_data = json.loads(line.strip()) json_list.append(json_data) except json.JSONDecodeError as e: print(f"JSON parsing error: {<!-- -->e}") return json_list
2.2 ditc, List[dict] convert json files to each other
scene 1:
Scene 2:
def save_json(json_list, target_path): """save_json""" with open(target_path, "w", encoding="utf-8") as json_file: json.dump(json_list, json_file, indent=4, ensure_ascii=False) print(f"The Json list has been saved to the {<!-- -->target_path} file.") def load_json(source_file): """load_json""" json_list = [] with open(source_file, "r", encoding="utf-8") as json_file: try: json_list = json.load(json_file) except json.JSONDecodeError as e: print(f"JSON parsing error: {<!-- -->e}") return json_list
2.3 List[dict] conversion to excel
Scenes:
Here you need to pay attention to the json converted from excel. It is of str type like list, and the list type will not be retained. For further operations, eval and other operations are required.
def save_jsonlist_excel(json_list, excel_target_file): """jsonlist_trans_excel""" df = pd.DataFrame(json_list) df.to_excel(excel_target_file, index=False) def load_jsonlist_excel(excel_source_path): """excel_trans_jsonlist""" df = pd.read_excel(excel_source_path) json_list = df.to_dict(orient="records") return json_list
2.4 Convert jsonlist to csv
def save_jsonlist_csv(data_list, csv_target_path): """save_jsonlist_csv""" with open(csv_target_path, 'w', newline='', encoding="utf-8") as csv_file: fieldnames = data_list[0].keys() # Use the key of the first dictionary as the column name csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames) csv_writer.writeheader() #Write CSV header (column name) for data in data_list: csv_writer.writerow(data)
2.5 Overall code for json related operations
import json import pandas as pd import csv def save_json(json_list, target_path): """save_json""" with open(target_path, "w", encoding="utf-8") as json_file: json.dump(json_list, json_file, indent=4, ensure_ascii=False) print(f"The Json list has been saved to the {target_path} file.") def load_json(source_file): """load_json""" json_list = [] with open(source_file, "r", encoding="utf-8") as json_file: try: json_list = json.load(json_file) except json.JSONDecodeError as e: print(f"JSON parsing error: {e}") return json_list def save_jsonline_json(json_list, target_path): """ json_list ---> jsonline :param json_list: :param target_path: :return: """ with open(target_path, 'w', encoding="utf-8") as json_file: for item in json_list: json.dump(item, json_file, ensure_ascii=False) json_file.write("\ ") print(f"The Json list has been saved to the {<!-- -->target_path} file. Each line is a json object") def load_jsonline_json(source_file): """ jsonline --> json_list :param source_file: :return: json_list """ json_list = [] with open(source_file, "r", encoding="utf-8") as json_file: for line in json_file: try: json_data = json.loads(line.strip()) json_list.append(json_data) except json.JSONDecodeError as e: print(f"JSON parsing error: {<!-- -->e}") return json_list def save_jsonlist_excel(json_list, excel_target_file): """jsonlist_trans_excel""" df = pd.DataFrame(json_list) df.to_excel(excel_target_file, index=False) def load_jsonlist_excel(excel_source_path): """excel_trans_jsonlist""" df = pd.read_excel(excel_source_path) json_list = df.to_dict(orient="records") return json_list def save_jsonlist_csv(data_list, csv_target_path): """save_jsonlist_csv""" with open(csv_target_path, 'w', newline='', encoding="utf-8") as csv_file: fieldnames = data_list[0].keys() # Use the key of the first dictionary as the column name csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames) csv_writer.writeheader() #Write CSV header (column name) for data in data_list: csv_writer.writerow(data) if __name__ == '__main__': human_list = [{"name": "jordan", "age": 30, "city": "Nanchang", "list": [3, 6, 0]}, {"name": "John", "age": 30, "city": "Guangzhou", "list": [3, 6, 1]}, {"name": "JiWang", "age": 30, "city": "Shenzhen", "list": [2, 3, 1]}] human = { "name": "jordan", "age": 30, "city": "Nanchang", "list": [3, 6, 0] } # json_path = "json/humanline_list.json" # save_jsonline_json(human_list, json_path) # new_human_list = load_jsonline_json(json_path) # print(new_human_list) # save_json(human_list, "json/human_list.json") # human_json = load_json("json/human_list.json") # print(human_json) # save_jsonlist_excel(human_list, "excel/human.xlsx") #dic_list = load_jsonlist_excel("excel/human.xlsx") # print(dic_list) # print(dic_list[0]["list"]) # print(type(dic_list[0]["list"])) save_jsonlist_csv(human_list, "csv/human_list.csv") # # save_json(human, "json/human.json") # human_json = load_json("json/human.json") # print(human_json) # new_json_list = load_json_list(json_path) # print(new_json_list)
3. Excel related code operations
3.1 excel <----> List[dict]
function code
def save_jsonlist_excel(json_list, excel_target_file): """jsonlist_trans_excel""" df = pd.DataFrame(json_list) df.to_excel(excel_target_file, index=False) def load_jsonlist_excel(excel_source_path): """excel_trans_jsonlist""" df = pd.read_excel(excel_source_path) json_list = df.to_dict(orient="records") return json_list
3.2 Load data of a certain column
function code
def load_one_column(excel_source_path, column_name): df = pd.read_excel(excel_source_path) column = df[column_name] return list(column)
3.3 Add a new column
def add_list_column(excel_source_path, data_list, column_name): df = pd.read_excel(excel_source_path) df[column_name] = data_list df.to_excel(excel_source_path, index=False)
3.4 Extract key information columns from the excel table to generate a new excel table
def excel_key_info(excel_source_path, excel_target_path, column_name_list): """ Extract key information columns from the excel table to generate a new excel table :param excel_source_path: source file :param excel_target_path: target file :param column_name_list: required column name :return: no return value """ main_df = pd.read_excel(excel_source_path, usecols=column_name_list) # main_df.to_excel(excel_target_path, index=False)
3.5 Convert excel table content into jsonline file
Transfer all information or key information
def excel_trans_jsonline(excel_source_path, json_target_path, column_name_list=None): """ Convert excel table content to jsonline file If there are values in column_name_list, only the key information will be converted, otherwise all will be converted. """ if column_name_list: main_df = pd.read_excel(excel_source_path, usecols=column_name_list) else: main_df = pd.read_excel(excel_source_path) json_list = main_df.to_dict(orient="records") with open(json_target_path, 'w', encoding="utf-8") as json_file: for item in json_list: json.dump(item, json_file, ensure_ascii=False) json_file.write("\ ") print(f"The Json list has been saved to the {<!-- -->excel_source_path} file. Each line is a json object")
3.6 Read the first n rows of data
def load_top_n_excel(excel_source_path, rows): """Read the first n rows of data""" df = pd.read_excel(excel_source_path).head(rows)
3.7 Merge multiple excel tables
def merge_excel(excel_list_path, excel_target_path): """Merge multiple excel tables""" df_list = [] for current_excel_path in excel_list_path: current_df = pd.read_excel(current_excel_path) df_list.append(current_df) merge_df = pd.concat(df_list) merge_df.to_excel(excel_target_path, index=False)
3.8 Delete/Add qualified rows
def condition_delete_rows(excel_source_path, excel_target_path, column_name, delete_list): """Delete/Add qualifying data""" df = pd.read_excel(excel_source_path) df = df[~df[column_name].isin(delete_list)] # If there is no ~, it means adding df.to_excel(excel_target_path, index=False)
3.9 Overall code
import pandas as pd import json def save_jsonlist_excel(json_list, excel_target_file): """jsonlist_trans_excel""" df = pd.DataFrame(json_list) df.to_excel(excel_target_file, index=False) def load_jsonlist_excel(excel_source_path): """excel_trans_jsonlist""" df = pd.read_excel(excel_source_path) json_list = df.to_dict(orient="records") return json_list def load_one_column(excel_source_path, column_name): df = pd.read_excel(excel_source_path) column = df[column_name] return list(column) def add_list_column(excel_source_path, data_list, column_name): df = pd.read_excel(excel_source_path) df[column_name] = data_list df.to_excel(excel_source_path, index=False) def excel_key_info(excel_source_path, excel_target_path, column_name_list): """ Extract key information columns from the excel table to generate a new excel table :param excel_source_path: source file :param excel_target_path: target file :param column_name_list: required column name :return: no return value """ main_df = pd.read_excel(excel_source_path, usecols=column_name_list) # main_df.to_excel(excel_target_path, index=False) def excel_trans_jsonline(excel_source_path, json_target_path, column_name_list=None): """ Convert excel table content to jsonline file If there are values in column_name_list, only the key information will be converted, otherwise all will be converted. """ if column_name_list: main_df = pd.read_excel(excel_source_path, usecols=column_name_list) else: main_df = pd.read_excel(excel_source_path) json_list = main_df.to_dict(orient="records") with open(json_target_path, 'w', encoding="utf-8") as json_file: for item in json_list: json.dump(item, json_file, ensure_ascii=False) json_file.write("\ ") print(f"The Json list has been saved to the {<!-- -->excel_source_path} file. Each line is a json object") def load_top_n_excel(excel_source_path, rows): """Read the first n rows of data""" df = pd.read_excel(excel_source_path).head(rows) print(df) def merge_excel(excel_list_path, excel_target_path): """Merge multiple excel tables""" df_list = [] for current_excel_path in excel_list_path: current_df = pd.read_excel(current_excel_path) df_list.append(current_df) merge_df = pd.concat(df_list) merge_df.to_excel(excel_target_path, index=False) def condition_delete_rows(excel_source_path, excel_target_path, column_name, delete_list): """Delete/Add qualifying data""" df = pd.read_excel(excel_source_path) df = df[~df[column_name].isin(delete_list)] # If there is no ~, it means adding df.to_excel(excel_target_path, index=False) if __name__ == '__main__': human_list = [{"name": "jordan", "age": 30, "city": "Nanchang", "list": [3, 6, 0]}, {"name": "John", "age": 30, "city": "Guangzhou", "list": [3, 6, 1]}, {"name": "JiWang", "age": 30, "city": "Shenzhen", "list": [2, 3, 1]}, {"name": "kobe", "age": 38, "city": "Nanchang", "list": [1, 2, 3]}] index_list = [1, 2, 3, 4] save_jsonlist_excel(human_list, "human.xlsx") city_list = load_one_column("human.xlsx", "city") print(city_list) print(type(city_list)) add_list_column("human.xlsx", index_list, "index") condition_delete_rows("human.xlsx", "human_delete.xlsx", "city", ["Shenzhen", "Guangzhou"])
4. The difference between json.load, loads, dump, dumps
json.load
, json.loads
, json.dump
, and json.dumps
are used for processing in Python Functions for JSON data, their functions and usage are different:
4.1 json.load():
Reads JSON data from a file and parses it into Python objects.
Mostly used to load files in json format and return a json object or json list
import json with open('data.json', 'r') as file: data = json.load(file)
4.2 json.loads(): str -> dict/json
Parses a JSON-formatted string into a Python object.
import json json_string = '{"name": "John", "age": 30, "city": "New York"}' data = json.loads(json_string) # str ---> dict/json print(type(data)) print(data)
Output results
4.3 json.dump():
Serialize Python objects to JSON format and write to a file.
import json data = {<!-- -->"name": "John", "age": 30, "city": "New York"} with open('output.json', 'w') as file: json.dump(data, file)
4.4 json.dumps(): json/dict –> str
Serializes a Python object into a JSON-formatted string.
Note the role of ensure_ascii=False
import json data = {<!-- -->"name": "China", "age": 30, "city": "New York"} json_string_one = json.dumps(data) json_string_two = json.dumps(data, ensure_ascii=False) print(type(json_string_one)) print(json_string_one) print(json_string_two)
Output result:
4.5 Summary:
json.load
is used to load JSON data from a file.
json.loads
is used to load JSON data from a JSON string.
json.dump
is used to write JSON data to a file.
json.dumps
is used to convert Python objects into JSON-formatted strings.