import numpy as np
import numpy.matlib
import matplotlib.pyplot as plt
import time
def sigmoid(inX):
    return 1.0 / (1 + np.exp(-inX))
def trainLogRegres(train_x, train_y, opts):
    
    startTime = time.time()
    numSamples, numFeatures = train_x.shape
    alpha = opts['alpha']; maxIter = opts['maxIter']
    theta = np.matlib.ones((numFeatures, 1))
    
    for k in range(maxIter):
        if opts['optimizeType'] == 'gradDescent': 
            output = sigmoid(train_x * theta)
            error = train_y - output
            theta = theta + alpha * train_x.T * error
        elif opts['optimizeType'] == 'stocGradDescent': 
            for i in range(numSamples):
                output = sigmoid(train_x[i, :] * theta)
                error = train_y[i, 0] - output
                theta = theta + alpha * train_x[i, :].T * error
        elif opts['optimizeType'] == 'smoothStocGradDescent': 
            
            dataIndex = range(numSamples)
            for i in range(numSamples):
                alpha = 4.0 / (1.0 + k + i) + 0.01
                randIndex = int(np.random.uniform(0, len(dataIndex)))
                output = sigmoid(train_x[randIndex, :] * theta)
                error = train_y[randIndex, 0] - output
                theta = theta + alpha * train_x[randIndex, :].T * error
                del(dataIndex[randIndex]) 
        else:
            raise NameError('Not support optimize method type!')
    print 'Congratulations, training complete! Took %fs!' % (time.time() - startTime)
    return theta
def testLogRegres(theta, test_x, test_y):
    numSamples, numFeatures = np.shape(test_x)
    matchCount = 0
    for i in xrange(numSamples):
        predict = sigmoid(test_x[i, :] * theta)[0, 0] > 0.5
        if predict == bool(test_y[i, 0]):
            matchCount += 1
    accuracy = float(matchCount) / numSamples
    return accuracy
def showLogRegres(theta, train_x, train_y):
    
    numSamples, numFeatures = np.shape(train_x)
    if numFeatures != 3:
        print "Sorry! I can not draw because the dimension of your data is not 2!"
        return 1
    
    for i in xrange(numSamples):
        if int(train_y[i, 0]) == 0:
            plt.plot(train_x[i, 1], train_x[i, 2], 'ro')
        elif int(train_y[i, 0]) == 1:
            plt.plot(train_x[i, 1], train_x[i, 2], 'bo')
    
    min_x = min(train_x[:, 1])[0, 0]
    max_x = max(train_x[:, 1])[0, 0]
    theta = theta.getA()  
    y_min_x = float(-theta[0] - theta[1] * min_x) / theta[2]
    y_max_x = float(-theta[0] - theta[1] * max_x) / theta[2]
    plt.plot([min_x, max_x], [y_min_x, y_max_x], '-g')
    plt.xlabel('X1'); plt.ylabel('X2')
    plt.show()
def loadData():
    data=np.loadtxt('lr_nonlinear_data.txt')
    train_x=data[:,0:2]
    train_y=data[:,2:]
    train_x=np.insert(train_x,0,1,axis=1)
    return np.mat(train_x), np.mat(train_y)
print "step 1: load data..."
train_x, train_y = loadData()
test_x = train_x; test_y = train_y
print "step 2: training..."
opts = {'alpha': 0.01, 'maxIter': 500, 'optimizeType': 'smoothStocGradDescent'}
optimalTheta = trainLogRegres(train_x, train_y, opts)
print "step 3: testing..."
accuracy = testLogRegres(optimalTheta, test_x, test_y)
print "step 4: show the result..."
print 'The classify accuracy is: %.3f%%' % (accuracy * 100)
showLogRegres(optimalTheta, train_x, train_y)
print sigmoid(train_x*optimalTheta)