1. Data source
-
Data source: UCI
-
data style
The last column of data is to judge whether it is an object, and the other columns are parameter values.
Second, how to use
Perceptron (perception)
The perceptron is a linear classifier, which is the simplest neural network with only one neuron, and performs binary classification by positive or negative results.
Through gradient descent, the coefficients are continuously optimized to find the optimal coefficients:
3. Code implementation
Starting from data reading, no third-party library is called, and purely manual push.
-
import base library
from random import seed, rand range from csv import reader
-
Read csv file and convert string to float
def csv_loader(file): dataset=list() with open(file,'r') as f: csv_reader =reader(f) for row in csv_reader: if not row: continue dataset.append(row) return dataset def str_to_float_converter(dataset): for i in range(len(dataset[0])-1): for row in dataset: row[i]=float(row[i].strip())
-
The last column of the original data set is converted to 0 or 1
def last_column_to_int(dataset): col_value=[row[-1] for row in dataset] uni_value=set(col_value) search_tool=dict() for i, value in enumerate(uni_value): search_tool[value]=i for row in dataset: row[-1]= search_tool[row[-1]]
-
Split data using k-fold cross validation
def k_folds_cross_validation(dataset,n_folds): dataset_split = list() dataset_copy=list(dataset) fold_size=int(len(dataset)/n_folds) for i in range(n_folds): fold = list() while len(fold) < fold_size: index = randrange(len(dataset_copy)) fold.append(dataset_copy.pop(index)) dataset_split.append(fold) return dataset_split
-
Calculation accuracy
def calculate_accuracy(actual,predicted): correct=0 for i in range(len(actual)): if actual[i] == predicted[i]: correct + =1 return correct/float(len(actual)) * 100.0
-
model testing
Using K-fold cross-validation, evaluate the model accuracy for each fold.
Among them: algo is the placeholder for the algorithm. After the specific algorithm is written, it can be iterated at runtime.
def model_test(dataset, algo, n_folds, *args): folds=k_folds_cross_validation(dataset,n_folds) scores = list() for fold in folds: train=list(folds) train. remove(fold) train=sum(train,[]) test=list() for row in fold: row_copy=list(row) test.append(row_copy) row_copy[-1]=None predicted = algo(train,test,*args) actual =[row[-1] for row in fold] accuracy = calculate_accuracy(actual,predicted) scores.append(accuracy) return scores
-
Base Model for Predicting Data
The perceptron is essentially a linear model, which can be classified into two categories by the positive and negative results
def predict(row,weights): activation = weights[0] for i in range(len(row)-1): activation + = weights[i + 1] * row[i] return 1.0 if activation >= 0.0 else 0
-
Estimating Coefficients Using Stochastic Gradient Descent
def estimats_weights(train,learning_rate,n_epochs): weights=[0.0 for i in range(len(train[0]))] for epoch in range(n_epochs): sse=0 for row in train: prediction = predict(row,weights) error = prediction-row[-1] sse = error **2 weights[0] = weights[0] - learning_rate * error for i in range(len(row)-1): weights[i + 1] = weights[i + 1] - learning_rate * error * row[i] print('This is epoch < %s >, sum_error is < %.4f >' %(epoch,sse)) return weights
-
Perceptron function, used to predict the test data
def perception(train,test,learning_rate,n_epochs): predictions = list() weights = estimats_weights(train,learning_rate,n_epochs) for row in test: prediction = predict(row,weights) predictions.append(prediction) return predictions
-
Operation and parameter adjustment
seed(1) file='./download_datas/sonar.all-data.csv' dataset=csv_loader(file) str_to_float_converter(dataset) last_column_to_int(dataset) n_folds=5 learning_rate=0.0001 n_epochs=12 algo=perception scores=model_test(dataset,algo,n_folds,learning_rate,n_epochs) print('The score of my model are : %s ' %scores) print('The average accuracy is : %.3f%% ' %(sum(scores)/float(len(scores))))
operation result:
The score of my model are : [80.48780487804879, 82.92682926829268, 73.17073170731707, 82.92682926829268, 70.73170731707317] The average accuracy is : 78.049%
Fourth, complete code
#1. Import the basic library from random import seed, rand range from csv import reader #2. Read csv file and convert string to float def csv_loader(file): dataset=list() with open(file,'r') as f: csv_reader =reader(f) for row in csv_reader: if not row: continue dataset.append(row) return dataset def str_to_float_converter(dataset): for i in range(len(dataset[0])-1): for row in dataset: row[i]=float(row[i].strip()) #3. The last column of the original data set is converted to 0 or 1 def last_column_to_int(dataset): col_value=[row[-1] for row in dataset] uni_value=set(col_value) search_tool=dict() for i, value in enumerate(uni_value): search_tool[value]=i for row in dataset: row[-1] = search_tool[row[-1]] #4. Use k_folds cross validation (cross validation) def k_folds_cross_validation(dataset, n_folds): dataset_split = list() dataset_copy=list(dataset) fold_size=int(len(dataset)/n_folds) for i in range(n_folds): fold = list() while len(fold) < fold_size: index = randrange(len(dataset_copy)) fold.append(dataset_copy.pop(index)) dataset_split.append(fold) return dataset_split #5. Computational Accuracy def calculate_accuracy(actual, predicted): correct=0 for i in range(len(actual)): if actual[i] == predicted[i]: correct + =1 return correct/float(len(actual)) * 100.0 #6.whether the algo is good or not? def model_test(dataset, algo, n_folds, *args): folds=k_folds_cross_validation(dataset,n_folds) scores = list() for fold in folds: train=list(folds) train. remove(fold) train=sum(train,[]) test=list() for row in fold: row_copy=list(row) test.append(row_copy) row_copy[-1]=None predicted = algo(train,test,*args) actual =[row[-1] for row in fold] accuracy = calculate_accuracy(actual,predicted) scores.append(accuracy) return scores #7. Predictive models def predict(row,weights): activation = weights[0] for i in range(len(row)-1): activation + = weights[i + 1] * row[i] return 1.0 if activation >= 0.0 else 0 #8. Estimate coefficients using stochastic gradient descent def estimats_weights(train, learning_rate, n_epochs): weights=[0.0 for i in range(len(train[0]))] for epoch in range(n_epochs): sse=0 for row in train: prediction = predict(row,weights) error = prediction-row[-1] sse = error **2 weights[0] = weights[0] - learning_rate * error for i in range(len(row)-1): weights[i + 1] = weights[i + 1] - learning_rate * error * row[i] print('This is epoch < %s >, sum_error is < %.4f >' %(epoch,sse)) return weights #9. Perceptron function, used to predict test data def perception(train, test, learning_rate, n_epochs): predictions = list() weights = estimats_weights(train,learning_rate,n_epochs) for row in test: prediction = predict(row,weights) predictions.append(prediction) return predictions #10. Operation and parameter adjustment seed(1) file='./download_datas/sonar.all-data.csv' dataset=csv_loader(file) str_to_float_converter(dataset) last_column_to_int(dataset) n_folds=5 learning_rate=0.0001 n_epochs=12 algo=perception scores=model_test(dataset,algo,n_folds,learning_rate,n_epochs) print('The score of my model are : %s ' %scores) print('The average accuracy is : %.3f%% ' %(sum(scores)/float(len(scores))))
The knowledge points of the article match the official knowledge files, and you can further learn relevant knowledge algorithm skill treeHome pageOverview 41884 people are studying systematically