Report Consolidation
Merge all excel files under the folder into one file. But here only supports merging sheet1 in the excel file. If the merged excel file has multiple sheets, please modify the merge_excel() function of the code yourself. The merge steps are as follows:
-
Get all files under the path.
-
Create a new excel file to store all the data.
-
Open the excel files that need to be merged one by one, read the data line by line, then use a list to save each line of data, and finally store all the data in the list.
-
Write line by line to the excel file.
# coding:utf-8 import xlrd import os import xlwt from xlutils.copy import copy def get_allfile_msg(file_dir): for root, dirs, files in os. walk(file_dir): ''' print(root) #current directory path print(dirs) #All subdirectories under the current path print(files) #All non-directory subfiles in the current path ''' return root, dirs, [file for file in files if file.endswith('.xls') or file.endswith('.xlsx')] def get_allfile_url(root, files): """ Add '/' and the file name to the path of the directory to form the path of the file :param root: path :param files: collection of file names :return: none """ allFile_url = [] for file_name in files: file_url = root + '/' + file_name allFile_url.append(file_url) return allFile_url def all_to_one(root, allFile_url, file_name='allExcel.xls', title=None, have_title=True): """ Merge files :param root: path to the output file :param allFile_url: Save the collection of all excel file paths :param file_name: The file name of the output file :param title: the header of the excel table :param have_title: Whether there is a title (bool type), the default is true, and the 0th line of the excel file is not read :return: none """ # First create an excel file in this directory to store the data of all excel files file_name = root + '/' + file_name create_excel(file_name, title) list_row_data = [] for f in range(0, len(allFile_url)): #for f in allFile_url: # open the excel file print('Open %s file' % allFile_url[f]) excel = xlrd.open_workbook(allFile_url[f]) # Get the sheet according to the index, here is to get the first sheet table = excel. sheet_by_index(0) print('The number of rows in this file is: %d, and the number of columns is: %d' % (table.nrows, table.ncols)) # Get all the rows of the excel file for i in range(table.nrows): # Where the yezi table header is modified, if the table header is 2 lines, it is 2, and if 1 line is 1 if have_title and i < top and f != 0: continue else: row = table.row_values(i) # Get the value of the entire row and return a list list_row_data.append(row) print('total data volume is %d' % len(list_row_data)) # write all files add_row(list_row_data, file_name) # Create an excel file whose file name is file_name and whose header is title def create_excel(file_name, title): print('Create file %s' % file_name) a = xlwt. Workbook() # Create a new sheet table = a.add_sheet('sheet1', cell_overwrite_ok=True) # data input #for i in range(len(title)): # table. write(0, i, title[i]) a. save(file_name) # Add n lines of data to the file def add_row(list_row_data, file_name): # open the excel file allExcel1 = xlrd.open_workbook(file_name) sheet = allExcel1. sheet_by_index(0) # copy a file, ready to add content to it allExcel2 = copy(allExcel1) sheet2 = allExcel2. get_sheet(0) # data input i = 0 for row_data in list_row_data: for j in range(len(row_data)): sheet2.write(sheet.nrows + i, j, row_data[j]) i + = 1 # Save the file and overwrite the original file allExcel2. save(file_name) print('merge completed') if __name__ == '__main__': # Set folder path # "" is a special character in the string. After adding r, it becomes the original string, and the characters "\t" and "\r" in the string will not be processed string escape file_dir = '.\01 report merge\word' #The number of header rows at the top of the template, the current row number minus 1 top = 2 # Set the file name to save the data file_name = 'save_demo.xls' # Get the path of the folder, all folders under the path, and all files root, dirs, files = get_allfile_msg(file_dir) # Piece together the directory path + file name, the path of the composed file, and store it in a list allFile_url = get_allfile_url(root, files) # The have_title parameter defaults to True, and does not read the first line of the excel file when it is True all_to_one(root, allFile_url, file_name=file_name, title=None, have_title=True)
Batch word to pdf
import win32com.client import pythoncom import os class Word_2_PDF(object): def __init__(self, filepath, Debug=False): """ :param filepath: :param Debug: Whether the control process is visualized """ self.wordApp = win32com.client.Dispatch('word.Application') self.wordApp.Visible = Debug self.myDoc = self.wordApp.Documents.Open(filepath) def export_pdf(self, output_file_path): """ Convert Word documents to PDF files :param output_file_path: :return: """ self.myDoc.ExportAsFixedFormat(output_file_path, 17, Item=7, CreateBookmarks=0) def close(self): self. wordApp. Quit() if __name__ == '__main__': rootpath = os.getcwd() # folder path save_path = os.getcwd() # PDF storage location pythoncom.CoInitialize() os_dict = {root:[dirs, files] for root, dirs, files in os.walk(rootpath)} for parent, dirnames, filenames in os. walk(rootpath): for filename in filenames: if u'.doc' in filename and u'~$' not in filename: # Save directly as a PDF file #print(rootpath + filename) a = Word_2_PDF(rootpath + '\' + filename, True) title = filename.split('.')[0] # delete .docx a.export_pdf(rootpath + '\' + title + '.pdf') print('conversion completed')
Contract generation
from openpyxl import load_workbook from docx import Document from os import listdir ''' define a replacement function ''' def replace_text(old_text, new_text): # read all natural segments all_paragraphs = document.paragraphs for paragraph in all_paragraphs: #Loop to read all the runs, and replace the old and new text for run in paragraph.runs: run_text = run.text.replace(old_text, new_text) run.text = run_text # read all tables all_tables = document.tables for table in all_tables: for row in table.rows: #Loop to read all the cells in the table, and replace the old and new text for cell in row.cells: cell_text = cell.text.replace(old_text, new_text) cell.text = cell_text ''' Get the file name of Excel and Word ''' for file in listdir(): print(file, 'listdir') if 'template.docx' in file: docx_name = file if 'information.xlsx' in file: xlsx_name = file ''' Read data in Excel ''' wb = load_workbook(xlsx_name) sheetx0 = wb.sheetnames sheetx = wb[sheetx0[0]] #The new file is named after the first column of data filename_pos = 1 ''' loop read and replace ''' #Cycle by column in the contract element Excel for row in range(3,sheetx. max_row + 1): document = Document(docx_name) #openpyxl may read empty cells when using sheetx.max_column, here to remove if sheetx.cell(row=row,column=1).value!=None: #Cycle by row in the contract element Excel for l in range(1,sheetx.max_column + 1): #In the contract element Excel, read the number of the first column line by line old_text = sheetx.cell(row=1,column=l).value #Contract elements Excel reads new elements line by line for the current column of the loop new_text = sheetx.cell(row=row,column=l).value replace_text(str(old_text),str(new_text)) #replace #Define the file name as the content of the first line of the current column filename = str(sheetx.cell(row=row,column=filename_pos).value) #Save according to the defined file name document.save("%s.docx"%(filename)) print('The contract is generated!')
Python experience sharing
It is good to learn Python whether it is employment data analysis or doing side jobs to make money, but you still need a learning plan to learn Python. Finally, everyone will share a full set of Python learning materials to help those who want to learn Python!
Python learning route
Here we sort out the commonly used technical points of Python, and summarize the knowledge points in various fields. You can find the corresponding learning resources according to the above knowledge points.
Learning software
Python commonly used development software will save you a lot of time.
Learning video
To learn programming, you must watch a lot of videos. Only by combining books and videos can you get twice the result with half the effort.
100 practice questions
Actual case
Optical theory is useless. Learning programming should not be done on paper, but by hands-on practice, and apply the knowledge you have learned to practice.
Finally, I wish everyone progress every day! !
The above full version of the full set of Python learning materials has been uploaded to the official CSDN. If you need it, you can directly scan the QR code of the CSDN official certification below on WeChat to get it for free [guaranteed 100% free].