Data processing files mutual conversion

Data processing

The main ones organized here are .txt, .json, .csv, .excel, .pkl Data storage and conversion between files

1. txt part

1.1 List[dict] conversion to txt

specific code

def save_jsonlist_txt(json_data, target_path):
    """ Save json list to txt file
    """
    with open(target_path, "w", encoding="utf-8") as file:
        for item in json_data:
            item = str(item).replace("\
", "\\
")
            file.write(item)
            file.write("\
")


def load_jsonlist_txt(source_path):
    """ Load json list from txt file
    """
    json_list = []
    try:
        with open(source_path, "r", encoding="utf-8") as file:
            for line in file:
                try:
                    json_data = eval(line.strip())
                    json_list.append(json_data)
                except json.JSONDecodeError:
                    print(f"Unable to parse Json data: {<!-- -->line.strip()}")
            return json_list
    except FileNotFoundError:
        print(f"File '{<!-- -->source_path} not found'")

1.2 List[str] mutual conversion txt

def save_str_list_txt(str_list, target_path):
    """ Save str list to txt file
    """
    with open(target_path, "w", encoding="utf-8") as file:
        for item in str_list:
            item = item.replace("\
", "\\
")
            file.write(item)
            file.write("\
")


def load_strlist_txt(file_path):
    """ Load str list from txt file
    """
    txt_list = []
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            for line in file.readlines():
                txt_list.append(line.strip())
            return txt_list
    except FileNotFoundError:
        print(f"File '{<!-- -->file_path} not found'")

1.3 Overall code for txt related operations

import json


def save_json_list_txt(json_data, target_path):
    """ Save json list to txt file
    """
    with open(target_path, "w", encoding="utf-8") as file:
        for item in json_data:
            item = str(item).replace("\
", "\\
")
            file.write(item)
            # file.write(str(item))
            file.write("\
")


def load_json_list_txt(source_path):
    """ Load json list from txt file
    """
    json_list = []
    try:
        with open(source_path, "r", encoding="utf-8") as file:
            for line in file:
                try:
                    json_data = eval(line.strip())
                    json_list.append(json_data)
                except json.JSONDecodeError:
                    print(f"Unable to parse Json data: {<!-- -->line.strip()}")
            return json_list
    except FileNotFoundError:
        print(f"File '{<!-- -->source_path} not found'")


def save_str_list_txt(str_list, target_path):
    """ Save str list to txt file
    """
    with open(target_path, "w", encoding="utf-8") as file:
        for item in str_list:
            item = item.replace("\
", "\\
")
            file.write(item)
            file.write("\
")


def load_str_list_txt(file_path):
    """ Load str list from txt file
    """
    txt_list = []
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            for line in file.readlines():
                txt_list.append(line.strip())
            return txt_list
    except FileNotFoundError:
        print(f"File '{<!-- -->file_path} not found'")




if __name__ == '__main__':

    human_list = [
        {<!-- -->
            "name": "jordan\
",
            "age": 30,
            "city": "Nanchang",
            "list": [3, 6, 0]
        },
        {<!-- -->
            "name": "John",
            "age": 30,
            "city": "Guangzhou",
            "list": [3, 6, 1]
        },
        {<!-- -->
            "name": "JiWang",
            "age": 30,
            "city": "Shenzhen",
            "list": [2, 3, 1]
        },
    ]
    day_list = ["Monday\
\
", "Tuesday", "Wednesday", "Thursday", "Week⑤", "Saturday", "Sunday"]

    save_json_list_txt(human_list, "human_list.txt")
    new_human_list = load_json_list_txt("human_list.txt")
    print(new_human_list)
    # print(type(new_human_list))
    # print(type(new_human_list[0]))
    save_str_list_txt(day_list, "day.txt")
    new_day_list = load_str_list_txt("day.txt")
    print(new_day_list)

2. json format part

It is recommended here to understand the differences between json.load, json.loads, json.dump and json.dumps

2.1 List[dict] conversion to json


Specific method code

def save_jsonline_json(json_list, target_path):
    """
    json_list ---> jsonline
    :param json_list:
    :param target_path:
    :return:
    """
    with open(target_path, 'w', encoding="utf-8") as json_file:
        for item in json_list:
            json.dump(item, json_file, ensure_ascii=False)
            json_file.write("\
")
    print(f"The Json list has been saved to the {<!-- -->target_path} file. Each line is a json object")


def load_jsonline_json(source_file):
    """
    jsonline --> json_list
    :param source_file:
    :return: json_list
    """
    json_list = []
    with open(source_file, "r", encoding="utf-8") as json_file:
        for line in json_file:
            try:
                json_data = json.loads(line.strip())
                json_list.append(json_data)
            except json.JSONDecodeError as e:
                print(f"JSON parsing error: {<!-- -->e}")
        return json_list

2.2 ditc, List[dict] convert json files to each other

scene 1:

Scene 2:

def save_json(json_list, target_path):
    """save_json"""
    with open(target_path, "w", encoding="utf-8") as json_file:
        json.dump(json_list, json_file, indent=4, ensure_ascii=False)
    print(f"The Json list has been saved to the {<!-- -->target_path} file.")

def load_json(source_file):
    """load_json"""
    json_list = []
    with open(source_file, "r", encoding="utf-8") as json_file:
        try:
            json_list = json.load(json_file)
        except json.JSONDecodeError as e:
            print(f"JSON parsing error: {<!-- -->e}")
        return json_list

2.3 List[dict] conversion to excel

Scenes:

Here you need to pay attention to the json converted from excel. It is of str type like list, and the list type will not be retained. For further operations, eval and other operations are required.

def save_jsonlist_excel(json_list, excel_target_file):
    """jsonlist_trans_excel"""
    df = pd.DataFrame(json_list)
    df.to_excel(excel_target_file, index=False)


def load_jsonlist_excel(excel_source_path):
    """excel_trans_jsonlist"""
    df = pd.read_excel(excel_source_path)
    json_list = df.to_dict(orient="records")
    return json_list

2.4 Convert jsonlist to csv

def save_jsonlist_csv(data_list, csv_target_path):
"""save_jsonlist_csv"""
    with open(csv_target_path, 'w', newline='', encoding="utf-8") as csv_file:
        fieldnames = data_list[0].keys() # Use the key of the first dictionary as the column name
        csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        csv_writer.writeheader() #Write CSV header (column name)
        for data in data_list:
            csv_writer.writerow(data)

2.5 Overall code for json related operations

import json
import pandas as pd
import csv


def save_json(json_list, target_path):
    """save_json"""
    with open(target_path, "w", encoding="utf-8") as json_file:
        json.dump(json_list, json_file, indent=4, ensure_ascii=False)
    print(f"The Json list has been saved to the {target_path} file.")


def load_json(source_file):
    """load_json"""
    json_list = []
    with open(source_file, "r", encoding="utf-8") as json_file:
        try:
            json_list = json.load(json_file)
        except json.JSONDecodeError as e:
            print(f"JSON parsing error: {e}")
        return json_list


def save_jsonline_json(json_list, target_path):
    """
    json_list ---> jsonline
    :param json_list:
    :param target_path:
    :return:
    """
    with open(target_path, 'w', encoding="utf-8") as json_file:
        for item in json_list:
            json.dump(item, json_file, ensure_ascii=False)
            json_file.write("\
")
    print(f"The Json list has been saved to the {<!-- -->target_path} file. Each line is a json object")


def load_jsonline_json(source_file):
    """
    jsonline --> json_list
    :param source_file:
    :return: json_list
    """
    json_list = []
    with open(source_file, "r", encoding="utf-8") as json_file:
        for line in json_file:
            try:
                json_data = json.loads(line.strip())
                json_list.append(json_data)
            except json.JSONDecodeError as e:
                print(f"JSON parsing error: {<!-- -->e}")
        return json_list


def save_jsonlist_excel(json_list, excel_target_file):
    """jsonlist_trans_excel"""
    df = pd.DataFrame(json_list)
    df.to_excel(excel_target_file, index=False)


def load_jsonlist_excel(excel_source_path):
    """excel_trans_jsonlist"""
    df = pd.read_excel(excel_source_path)
    json_list = df.to_dict(orient="records")
    return json_list


def save_jsonlist_csv(data_list, csv_target_path):
    """save_jsonlist_csv"""
    with open(csv_target_path, 'w', newline='', encoding="utf-8") as csv_file:
        fieldnames = data_list[0].keys() # Use the key of the first dictionary as the column name
        csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        csv_writer.writeheader() #Write CSV header (column name)
        for data in data_list:
            csv_writer.writerow(data)



if __name__ == '__main__':

    human_list = [{"name": "jordan", "age": 30, "city": "Nanchang", "list": [3, 6, 0]},
                  {"name": "John", "age": 30, "city": "Guangzhou", "list": [3, 6, 1]},
                  {"name": "JiWang", "age": 30, "city": "Shenzhen", "list": [2, 3, 1]}]

    human = {
            "name": "jordan",
            "age": 30,
            "city": "Nanchang",
            "list": [3, 6, 0]
        }

    # json_path = "json/humanline_list.json"
    # save_jsonline_json(human_list, json_path)
    # new_human_list = load_jsonline_json(json_path)
    # print(new_human_list)
    # save_json(human_list, "json/human_list.json")
    # human_json = load_json("json/human_list.json")
    # print(human_json)

    # save_jsonlist_excel(human_list, "excel/human.xlsx")
    #dic_list = load_jsonlist_excel("excel/human.xlsx")
    # print(dic_list)
    # print(dic_list[0]["list"])
    # print(type(dic_list[0]["list"]))
    save_jsonlist_csv(human_list, "csv/human_list.csv")
    #
    # save_json(human, "json/human.json")
    # human_json = load_json("json/human.json")
    # print(human_json)
    # new_json_list = load_json_list(json_path)
    # print(new_json_list)

3. Excel related code operations

3.1 excel <----> List[dict]


function code

def save_jsonlist_excel(json_list, excel_target_file):
    """jsonlist_trans_excel"""
    df = pd.DataFrame(json_list)
    df.to_excel(excel_target_file, index=False)


def load_jsonlist_excel(excel_source_path):
    """excel_trans_jsonlist"""
    df = pd.read_excel(excel_source_path)
    json_list = df.to_dict(orient="records")
    return json_list

3.2 Load data of a certain column


function code

def load_one_column(excel_source_path, column_name):
    df = pd.read_excel(excel_source_path)
    column = df[column_name]
    return list(column)

3.3 Add a new column

def add_list_column(excel_source_path, data_list, column_name):
    df = pd.read_excel(excel_source_path)
    df[column_name] = data_list
    df.to_excel(excel_source_path, index=False)

3.4 Extract key information columns from the excel table to generate a new excel table

def excel_key_info(excel_source_path, excel_target_path, column_name_list):
    """
    Extract key information columns from the excel table to generate a new excel table
    :param excel_source_path: source file
    :param excel_target_path: target file
    :param column_name_list: required column name
    :return: no return value
    """
    main_df = pd.read_excel(excel_source_path, usecols=column_name_list) #
    main_df.to_excel(excel_target_path, index=False)

3.5 Convert excel table content into jsonline file

Transfer all information or key information

def excel_trans_jsonline(excel_source_path, json_target_path, column_name_list=None):
    """
    Convert excel table content to jsonline file
    If there are values in column_name_list, only the key information will be converted, otherwise all will be converted.
    """
    if column_name_list:
        main_df = pd.read_excel(excel_source_path, usecols=column_name_list)
    else:
        main_df = pd.read_excel(excel_source_path)
    json_list = main_df.to_dict(orient="records")
    with open(json_target_path, 'w', encoding="utf-8") as json_file:
        for item in json_list:
            json.dump(item, json_file, ensure_ascii=False)
            json_file.write("\
")
    print(f"The Json list has been saved to the {<!-- -->excel_source_path} file. Each line is a json object")

3.6 Read the first n rows of data

def load_top_n_excel(excel_source_path, rows):
    """Read the first n rows of data"""
    df = pd.read_excel(excel_source_path).head(rows)

3.7 Merge multiple excel tables

def merge_excel(excel_list_path, excel_target_path):
    """Merge multiple excel tables"""
    df_list = []
    for current_excel_path in excel_list_path:
        current_df = pd.read_excel(current_excel_path)
        df_list.append(current_df)

    merge_df = pd.concat(df_list)
    merge_df.to_excel(excel_target_path, index=False)

3.8 Delete/Add qualified rows

def condition_delete_rows(excel_source_path, excel_target_path, column_name, delete_list):
    """Delete/Add qualifying data"""
    df = pd.read_excel(excel_source_path)
    df = df[~df[column_name].isin(delete_list)] # If there is no ~, it means adding
    df.to_excel(excel_target_path, index=False)

3.9 Overall code

import pandas as pd
import json


def save_jsonlist_excel(json_list, excel_target_file):
    """jsonlist_trans_excel"""
    df = pd.DataFrame(json_list)
    df.to_excel(excel_target_file, index=False)


def load_jsonlist_excel(excel_source_path):
    """excel_trans_jsonlist"""
    df = pd.read_excel(excel_source_path)
    json_list = df.to_dict(orient="records")
    return json_list


def load_one_column(excel_source_path, column_name):
    df = pd.read_excel(excel_source_path)
    column = df[column_name]
    return list(column)


def add_list_column(excel_source_path, data_list, column_name):
    df = pd.read_excel(excel_source_path)
    df[column_name] = data_list
    df.to_excel(excel_source_path, index=False)


def excel_key_info(excel_source_path, excel_target_path, column_name_list):
    """
    Extract key information columns from the excel table to generate a new excel table
    :param excel_source_path: source file
    :param excel_target_path: target file
    :param column_name_list: required column name
    :return: no return value
    """
    main_df = pd.read_excel(excel_source_path, usecols=column_name_list) #
    main_df.to_excel(excel_target_path, index=False)


def excel_trans_jsonline(excel_source_path, json_target_path, column_name_list=None):
    """
    Convert excel table content to jsonline file
    If there are values in column_name_list, only the key information will be converted, otherwise all will be converted.
    """
    if column_name_list:
        main_df = pd.read_excel(excel_source_path, usecols=column_name_list)
    else:
        main_df = pd.read_excel(excel_source_path)
    json_list = main_df.to_dict(orient="records")
    with open(json_target_path, 'w', encoding="utf-8") as json_file:
        for item in json_list:
            json.dump(item, json_file, ensure_ascii=False)
            json_file.write("\
")
    print(f"The Json list has been saved to the {<!-- -->excel_source_path} file. Each line is a json object")


def load_top_n_excel(excel_source_path, rows):
    """Read the first n rows of data"""
    df = pd.read_excel(excel_source_path).head(rows)
    print(df)


def merge_excel(excel_list_path, excel_target_path):
    """Merge multiple excel tables"""
    df_list = []
    for current_excel_path in excel_list_path:
        current_df = pd.read_excel(current_excel_path)
        df_list.append(current_df)

    merge_df = pd.concat(df_list)
    merge_df.to_excel(excel_target_path, index=False)


def condition_delete_rows(excel_source_path, excel_target_path, column_name, delete_list):
    """Delete/Add qualifying data"""
    df = pd.read_excel(excel_source_path)
    df = df[~df[column_name].isin(delete_list)] # If there is no ~, it means adding
    df.to_excel(excel_target_path, index=False)


if __name__ == '__main__':
    human_list = [{"name": "jordan", "age": 30, "city": "Nanchang", "list": [3, 6, 0]},
                  {"name": "John", "age": 30, "city": "Guangzhou", "list": [3, 6, 1]},
                  {"name": "JiWang", "age": 30, "city": "Shenzhen", "list": [2, 3, 1]},
                  {"name": "kobe", "age": 38, "city": "Nanchang", "list": [1, 2, 3]}]
    index_list = [1, 2, 3, 4]
    save_jsonlist_excel(human_list, "human.xlsx")
    city_list = load_one_column("human.xlsx", "city")
    print(city_list)
    print(type(city_list))
    add_list_column("human.xlsx", index_list, "index")

    condition_delete_rows("human.xlsx", "human_delete.xlsx", "city", ["Shenzhen", "Guangzhou"])

4. The difference between json.load, loads, dump, dumps

json.load, json.loads, json.dump, and json.dumps are used for processing in Python Functions for JSON data, their functions and usage are different:

4.1 json.load():

Reads JSON data from a file and parses it into Python objects.
Mostly used to load files in json format and return a json object or json list

import json

with open('data.json', 'r') as file:
    data = json.load(file)

4.2 json.loads(): str -> dict/json

Parses a JSON-formatted string into a Python object.

import json

json_string = '{"name": "John", "age": 30, "city": "New York"}'
data = json.loads(json_string) # str ---> dict/json
print(type(data))
print(data)

Output results

4.3 json.dump():

Serialize Python objects to JSON format and write to a file.

import json

data = {<!-- -->"name": "John", "age": 30, "city": "New York"}
with open('output.json', 'w') as file:
    json.dump(data, file)

4.4 json.dumps(): json/dict –> str

Serializes a Python object into a JSON-formatted string.
Note the role of ensure_ascii=False

import json
data = {<!-- -->"name": "China", "age": 30, "city": "New York"}
json_string_one = json.dumps(data)
json_string_two = json.dumps(data, ensure_ascii=False)
print(type(json_string_one))
print(json_string_one)
print(json_string_two)

Output result:

4.5 Summary:

json.load is used to load JSON data from a file.
json.loads is used to load JSON data from a JSON string.
json.dump is used to write JSON data to a file.
json.dumps is used to convert Python objects into JSON-formatted strings.