Perceptron’s UCI sonar data to judge objects

1. Data source

Data source: UCI

data style

The last column of data is to judge whether it is an object, and the other columns are parameter values.

Second, how to use

Perceptron (perception)

The perceptron is a linear classifier, which is the simplest neural network with only one neuron, and performs binary classification by positive or negative results.

$\text{[math]}$

Through gradient descent, the coefficients are continuously optimized to find the optimal coefficients:

$\text{[math]}$

3. Code implementation

Starting from data reading, no third-party library is called, and purely manual push.

import base library

from random import seed, rand range
from csv import reader

Read csv file and convert string to float

def csv_loader(file):
    dataset=list()
    with open(file,'r') as f:
        csv_reader =reader(f)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    return dataset

def str_to_float_converter(dataset):
    for i in range(len(dataset[0])-1):
        for row in dataset:
            row[i]=float(row[i].strip())

The last column of the original data set is converted to 0 or 1

def last_column_to_int(dataset):
    col_value=[row[-1] for row in dataset]
    uni_value=set(col_value)
    search_tool=dict()
    for i, value in enumerate(uni_value):
        search_tool[value]=i
    for row in dataset:
        row[-1]= search_tool[row[-1]]

Split data using k-fold cross validation

def k_folds_cross_validation(dataset,n_folds):
    dataset_split = list()
    dataset_copy=list(dataset)
    fold_size=int(len(dataset)/n_folds)
    for i in range(n_folds):
        fold = list()
        while len(fold) < fold_size:
            index = randrange(len(dataset_copy))
            fold.append(dataset_copy.pop(index))
        dataset_split.append(fold)
    return dataset_split

Calculation accuracy

def calculate_accuracy(actual,predicted):
    correct=0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct + =1
    return correct/float(len(actual)) * 100.0

model testing

Using K-fold cross-validation, evaluate the model accuracy for each fold.

Among them: algo is the placeholder for the algorithm. After the specific algorithm is written, it can be iterated at runtime.

def model_test(dataset, algo, n_folds, *args):
    folds=k_folds_cross_validation(dataset,n_folds)
    scores = list()
    for fold in folds:
        train=list(folds)
        train. remove(fold)
        train=sum(train,[])
        test=list()
        for row in fold:
            row_copy=list(row)
            test.append(row_copy)
            row_copy[-1]=None
        predicted = algo(train,test,*args)
        actual =[row[-1] for row in fold]
        accuracy = calculate_accuracy(actual,predicted)
        scores.append(accuracy)
    return scores

Base Model for Predicting Data

The perceptron is essentially a linear model, which can be classified into two categories by the positive and negative results

def predict(row,weights):
    activation = weights[0]
    for i in range(len(row)-1):
        activation + = weights[i + 1] * row[i]
    return 1.0 if activation >= 0.0 else 0

Estimating Coefficients Using Stochastic Gradient Descent

def estimats_weights(train,learning_rate,n_epochs):
    weights=[0.0 for i in range(len(train[0]))]
    for epoch in range(n_epochs):
        sse=0
        for row in train:
            prediction = predict(row,weights)
            error = prediction-row[-1]
            sse = error **2
            weights[0] = weights[0] - learning_rate * error
            for i in range(len(row)-1):
                weights[i + 1] = weights[i + 1] - learning_rate * error * row[i]
        print('This is epoch < %s >, sum_error is < %.4f >' %(epoch,sse))
    return weights

Perceptron function, used to predict the test data

def perception(train,test,learning_rate,n_epochs):
    predictions = list()
    weights = estimats_weights(train,learning_rate,n_epochs)
    for row in test:
        prediction = predict(row,weights)
        predictions.append(prediction)
    return predictions

Operation and parameter adjustment

seed(1)
file='./download_datas/sonar.all-data.csv'
dataset=csv_loader(file)
str_to_float_converter(dataset)
last_column_to_int(dataset)

n_folds=5
learning_rate=0.0001
n_epochs=12
algo=perception

scores=model_test(dataset,algo,n_folds,learning_rate,n_epochs)
print('The score of my model are : %s ' %scores)
print('The average accuracy is : %.3f%% ' %(sum(scores)/float(len(scores))))

operation result:

The score of my model are : [80.48780487804879, 82.92682926829268, 73.17073170731707, 82.92682926829268, 70.73170731707317]
The average accuracy is : 78.049%

Fourth, complete code

#1. Import the basic library
from random import seed, rand range
from csv import reader

#2. Read csv file and convert string to float
def csv_loader(file):
    dataset=list()
    with open(file,'r') as f:
        csv_reader =reader(f)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    return dataset

def str_to_float_converter(dataset):
    for i in range(len(dataset[0])-1):
        for row in dataset:
            row[i]=float(row[i].strip())

#3. The last column of the original data set is converted to 0 or 1
def last_column_to_int(dataset):
    col_value=[row[-1] for row in dataset]
    uni_value=set(col_value)
    search_tool=dict()
    for i, value in enumerate(uni_value):
        search_tool[value]=i
    for row in dataset:
        row[-1] = search_tool[row[-1]]

#4. Use k_folds cross validation (cross validation)
def k_folds_cross_validation(dataset, n_folds):
    dataset_split = list()
    dataset_copy=list(dataset)
    fold_size=int(len(dataset)/n_folds)
    for i in range(n_folds):
        fold = list()
        while len(fold) < fold_size:
            index = randrange(len(dataset_copy))
            fold.append(dataset_copy.pop(index))
        dataset_split.append(fold)
    return dataset_split

#5. Computational Accuracy
def calculate_accuracy(actual, predicted):
    correct=0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct + =1
    return correct/float(len(actual)) * 100.0

#6.whether the algo is good or not?
def model_test(dataset, algo, n_folds, *args):
    folds=k_folds_cross_validation(dataset,n_folds)
    scores = list()
    for fold in folds:
        train=list(folds)
        train. remove(fold)
        train=sum(train,[])
        test=list()
        for row in fold:
            row_copy=list(row)
            test.append(row_copy)
            row_copy[-1]=None
        predicted = algo(train,test,*args)
        actual =[row[-1] for row in fold]
        accuracy = calculate_accuracy(actual,predicted)
        scores.append(accuracy)
    return scores

#7. Predictive models
def predict(row,weights):
    activation = weights[0]
    for i in range(len(row)-1):
        activation + = weights[i + 1] * row[i]
    return 1.0 if activation >= 0.0 else 0

#8. Estimate coefficients using stochastic gradient descent
def estimats_weights(train, learning_rate, n_epochs):
    weights=[0.0 for i in range(len(train[0]))]
    for epoch in range(n_epochs):
        sse=0
        for row in train:
            prediction = predict(row,weights)
            error = prediction-row[-1]
            sse = error **2
            weights[0] = weights[0] - learning_rate * error
            for i in range(len(row)-1):
                weights[i + 1] = weights[i + 1] - learning_rate * error * row[i]
        print('This is epoch < %s >, sum_error is < %.4f >' %(epoch,sse))
    return weights

#9. Perceptron function, used to predict test data
def perception(train, test, learning_rate, n_epochs):
    predictions = list()
    weights = estimats_weights(train,learning_rate,n_epochs)
    for row in test:
        prediction = predict(row,weights)
        predictions.append(prediction)
    return predictions

#10. Operation and parameter adjustment
seed(1)
file='./download_datas/sonar.all-data.csv'
dataset=csv_loader(file)
str_to_float_converter(dataset)
last_column_to_int(dataset)

n_folds=5
learning_rate=0.0001
n_epochs=12
algo=perception

scores=model_test(dataset,algo,n_folds,learning_rate,n_epochs)
print('The score of my model are : %s ' %scores)
print('The average accuracy is : %.3f%% ' %(sum(scores)/float(len(scores))))

The knowledge points of the article match the official knowledge files, and you can further learn relevant knowledge algorithm skill treeHome pageOverview 41884 people are studying systematically