Python office automation, effectively bid farewell to tedious operations, 955 is not a dream, it is recommended to collect!

Report Consolidation

Merge all excel files under the folder into one file. But here only supports merging sheet1 in the excel file. If the merged excel file has multiple sheets, please modify the merge_excel() function of the code yourself. The merge steps are as follows:

  1. Get all files under the path.

  2. Create a new excel file to store all the data.

  3. Open the excel files that need to be merged one by one, read the data line by line, then use a list to save each line of data, and finally store all the data in the list.

  4. Write line by line to the excel file.

# coding:utf-8

import xlrd
import os
import xlwt
from xlutils.copy import copy

def get_allfile_msg(file_dir):
    for root, dirs, files in os. walk(file_dir):
        '''
        print(root) #current directory path
        print(dirs) #All subdirectories under the current path
        print(files) #All non-directory subfiles in the current path
        '''
        return root, dirs, [file for file in files if file.endswith('.xls') or file.endswith('.xlsx')]


def get_allfile_url(root, files):
    """
    Add '/' and the file name to the path of the directory to form the path of the file
    :param root: path
    :param files: collection of file names
    :return: none
    """
    allFile_url = []
    for file_name in files:
        file_url = root + '/' + file_name
        allFile_url.append(file_url)
    return allFile_url


def all_to_one(root, allFile_url, file_name='allExcel.xls', title=None, have_title=True):
    """
    Merge files
    :param root: path to the output file
    :param allFile_url: Save the collection of all excel file paths
    :param file_name: The file name of the output file
    :param title: the header of the excel table
    :param have_title: Whether there is a title (bool type), the default is true, and the 0th line of the excel file is not read
    :return: none
    """
    # First create an excel file in this directory to store the data of all excel files
    file_name = root + '/' + file_name
    create_excel(file_name, title)

    list_row_data = []
    for f in range(0, len(allFile_url)):
    #for f in allFile_url:
        # open the excel file
        print('Open %s file' % allFile_url[f])
        excel = xlrd.open_workbook(allFile_url[f])
        # Get the sheet according to the index, here is to get the first sheet
        table = excel. sheet_by_index(0)
        print('The number of rows in this file is: %d, and the number of columns is: %d' % (table.nrows, table.ncols))

        # Get all the rows of the excel file
        for i in range(table.nrows):
            # Where the yezi table header is modified, if the table header is 2 lines, it is 2, and if 1 line is 1
            if have_title and i < top and f != 0:
                continue
            else:
                row = table.row_values(i) # Get the value of the entire row and return a list
                list_row_data.append(row)

    print('total data volume is %d' % len(list_row_data))
    # write all files
    add_row(list_row_data, file_name)

# Create an excel file whose file name is file_name and whose header is title
def create_excel(file_name, title):
    print('Create file %s' % file_name)
    a = xlwt. Workbook()
    # Create a new sheet
    table = a.add_sheet('sheet1', cell_overwrite_ok=True)
    # data input
    #for i in range(len(title)):
    # table. write(0, i, title[i])
    a. save(file_name)

# Add n lines of data to the file
def add_row(list_row_data, file_name):
    # open the excel file
    allExcel1 = xlrd.open_workbook(file_name)
    sheet = allExcel1. sheet_by_index(0)
    # copy a file, ready to add content to it
    allExcel2 = copy(allExcel1)
    sheet2 = allExcel2. get_sheet(0)

    # data input
    i = 0
    for row_data in list_row_data:
        for j in range(len(row_data)):
            sheet2.write(sheet.nrows + i, j, row_data[j])
        i + = 1
    # Save the file and overwrite the original file
    allExcel2. save(file_name)
    print('merge completed')

if __name__ == '__main__':
    # Set folder path
    # "" is a special character in the string. After adding r, it becomes the original string, and the characters "\t" and "\r" in the string will not be processed string escape
    file_dir = '.\01 report merge\word'
    #The number of header rows at the top of the template, the current row number minus 1
    top = 2
    # Set the file name to save the data
    file_name = 'save_demo.xls'

    # Get the path of the folder, all folders under the path, and all files
    root, dirs, files = get_allfile_msg(file_dir)
    # Piece together the directory path + file name, the path of the composed file, and store it in a list
    allFile_url = get_allfile_url(root, files)
    # The have_title parameter defaults to True, and does not read the first line of the excel file when it is True
    all_to_one(root, allFile_url, file_name=file_name, title=None, have_title=True)

Batch word to pdf

import win32com.client
import pythoncom
import os

class Word_2_PDF(object):

    def __init__(self, filepath, Debug=False):
        """
        :param filepath:
        :param Debug: Whether the control process is visualized
        """
        self.wordApp = win32com.client.Dispatch('word.Application')
        self.wordApp.Visible = Debug
        self.myDoc = self.wordApp.Documents.Open(filepath)

    def export_pdf(self, output_file_path):
        """
        Convert Word documents to PDF files
        :param output_file_path:
        :return:
        """
        self.myDoc.ExportAsFixedFormat(output_file_path, 17, Item=7, CreateBookmarks=0)

    def close(self):
        self. wordApp. Quit()

if __name__ == '__main__':

    rootpath = os.getcwd() # folder path
    save_path = os.getcwd() # PDF storage location
    pythoncom.CoInitialize()

    os_dict = {root:[dirs, files] for root, dirs, files in os.walk(rootpath)}
    for parent, dirnames, filenames in os. walk(rootpath):
        for filename in filenames:
            if u'.doc' in filename and u'~$' not in filename:
                  # Save directly as a PDF file
                #print(rootpath + filename)
                a = Word_2_PDF(rootpath + '\' + filename, True)
                title = filename.split('.')[0] # delete .docx
                a.export_pdf(rootpath + '\' + title + '.pdf')
    print('conversion completed')

Contract generation

from openpyxl import load_workbook
from docx import Document
from os import listdir
'''
define a replacement function
'''
def replace_text(old_text, new_text):
    # read all natural segments
    all_paragraphs = document.paragraphs
    for paragraph in all_paragraphs:
        #Loop to read all the runs, and replace the old and new text
        for run in paragraph.runs:
            run_text = run.text.replace(old_text, new_text)
            run.text = run_text
    # read all tables
    all_tables = document.tables
    for table in all_tables:
        for row in table.rows:
            #Loop to read all the cells in the table, and replace the old and new text
            for cell in row.cells:
                cell_text = cell.text.replace(old_text, new_text)
                cell.text = cell_text
'''
Get the file name of Excel and Word
'''
for file in listdir():
    print(file, 'listdir')
    if 'template.docx' in file:
        docx_name = file
    if 'information.xlsx' in file:
        xlsx_name = file
'''
Read data in Excel
'''
wb = load_workbook(xlsx_name)
sheetx0 = wb.sheetnames
sheetx = wb[sheetx0[0]]

#The new file is named after the first column of data
filename_pos = 1
'''
loop read and replace
'''
#Cycle by column in the contract element Excel
for row in range(3,sheetx. max_row + 1):
    document = Document(docx_name)
    #openpyxl may read empty cells when using sheetx.max_column, here to remove
    if sheetx.cell(row=row,column=1).value!=None:
        #Cycle by row in the contract element Excel
        for l in range(1,sheetx.max_column + 1):
            #In the contract element Excel, read the number of the first column line by line
            old_text = sheetx.cell(row=1,column=l).value
            #Contract elements Excel reads new elements line by line for the current column of the loop
            new_text = sheetx.cell(row=row,column=l).value
            replace_text(str(old_text),str(new_text)) #replace
            #Define the file name as the content of the first line of the current column
            filename = str(sheetx.cell(row=row,column=filename_pos).value)
        #Save according to the defined file name
        document.save("%s.docx"%(filename))
print('The contract is generated!')

Python experience sharing

It is good to learn Python whether it is employment data analysis or doing side jobs to make money, but you still need a learning plan to learn Python. Finally, everyone will share a full set of Python learning materials to help those who want to learn Python!

Python learning route

Here we sort out the commonly used technical points of Python, and summarize the knowledge points in various fields. You can find the corresponding learning resources according to the above knowledge points.

Learning software

Python commonly used development software will save you a lot of time.

Learning video

To learn programming, you must watch a lot of videos. Only by combining books and videos can you get twice the result with half the effort.

100 practice questions

Actual case

Optical theory is useless. Learning programming should not be done on paper, but by hands-on practice, and apply the knowledge you have learned to practice.

Finally, I wish everyone progress every day! !

The above full version of the full set of Python learning materials has been uploaded to the official CSDN. If you need it, you can directly scan the QR code of the CSDN official certification below on WeChat to get it for free [guaranteed 100% free].