Computational Physics Python Reference Guide

This is a website for reference when using various python modules for computational physics. Made by Dracentis for CMSE201 and CMSE202 at Michigan State University.

import numpy as np
import pandas as pd
import seaborn as sns
from scipy.integrate import odeint
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
%matplotlib inline
import statsmodels.api as sm
from sklearn import metrics
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from sklearn.svm import SVC

NumPy

Example use of numpy to load data in comma seperated format:

column1, column2 = np.loadtxt('data.csv', usecols = (0,1), unpack=True, skiprows = 1, delimiter=',')

Polyfit Example:

#Find the best parameters to fit our data
parameters = np.polyfit(x, y, 2)

#Create a function from the parameters
fit_function = np.poly1d(parameters)

#Use that function to calculate the values our fit line
expected_y = fit_function(x)

#Plot the original data and our fit line
plt.scatter(x, y, label = 'data')
plt.plot(x, expected_y, color = 'orange', label = 'fit')

Pandas

Example use of pandas to load data in comma seperated format:

data_frame = pd.read_csv('data.csv',encoding='ISO-8859-1')

Dataframe reference:

#create DataFrame
data_frame = pd.DataFrame({"name":["james","jim","joan","jill"],
                           "age":[10, 20, 30, 40],
                           "weight":[150, 140, 130, 120],
                           "complaint": ["sprain", "cut", "headache", "break"]})

#displays the top part of the dataframe
data_frame.head()

#displays the bottom part of the dataframe
data_frame.tail()

#set row index to the age column
data_frame.set_index("age", inplace=True)

#reset row index to 0-based
data_frame.reset_index(inplace=True)

#returns row with row index
data_frame.loc["rowname"]

#returns row with 0-based index
data_frame.iloc[2]

#adds/replaces a columns
data_frame = data_frame.assign(col_name = col_content)

#removes columns from the dataframe
data_frame = data_frame.drop(columns = ["year", "month", "day"])

#displays various stats about the data frame
data_frame.describe()

#a list of columns in the dataframe
data_frame.columns

#converts a dataframe with year month day columns to a series of datetimes
pd.to_datetime(data_frame)

#produces a histogram of a column
data_frame['col1'].plot.hist()

#produces a scatter plot of two columns
data_frame.plot.scatter(x='col2',y='col1')

#produces a boxplot of two columns
data_frame.boxplot(column = ['col1','col2'])

Series reference:

#create Series
series = pd.Series([1,2,3,4], index=['one', 'two', 'three', 'four'])

#mean of series
series.mean() 

#list of indexes in the series
series.index.values

#list of values in the series
series.values

Seaborn

Various seaborn functions:

sns.set() #Sets styling to seaborn
sns.reset_orig() #Returns styling to default

sns.despine() #Removes borders

#List of various styles:
sns.set_style("ticks")
sns.set_style("darkgrid")
sns.set_style("whitegrid")
sns.set_style("dark")
sns.set_style("white")

#List of various contexts:
sns.set_context("paper")
sns.set_context("talk")
sns.set_context("poster")
sns.set_context("notebook")

#Jointplot scatter
sns.jointplot(data_frame['col1'], data_frame['col2'], kind="reg")

Correlation heatmap:

sea.set(font_scale=.7)
hm = sea.heatmap(data=data.drop(columns=["year", "Country name"]).corr(), annot=True)
hm.set_ylim(12.0,0.0)

SciPy

Curvefit Example:

#function that we will fit the data to
def curve_function(x, a, b, c):
    return a * np.cos(b * x + c)

#use curve_fit to get the parameters for the function
paramters, matrix = curve_fit(curve_function, x, y)

a = paramters[0]  # get fitted a parameter
b = paramters[1]  # get fitted b parameter
c = paramters[2]  # get fitted c parameter

#use our function to calculate the expected values of y
y_expected = curve_function(x, a, b, c)

# plot our data and fit line
plt.scatter(x, y, label = "data")
plt.plot(x, y_expected, color = "orange", label = "fit")

Example use of odeint:

Odeint allows you to solve differential equations in python.

For example, if we have a system of variables X, Y, and Z with time derivatives:

$\frac{dX}{dt}=Y+at$

$\frac{dY}{dt}=Z$

$\frac{dZ}{dt}=b$

where a and b are parameters for the derivatives.

We can use the following model to solve the equations:

def derivatives(state, time, a, b): # Put the parameters here
    
    X = state[0]
    Y = state[1]
    Z = state[2]
    
    dXdt = Y + a * time # Set the derivative equations here
    dYdt = Z
    dZdt = b
    
    return [dXdt, dYdt, dZdt]

a = 0.5 # Set the values of your parameters here
b = 4/3

init = [1,.1, 2] # Set the initial conditions here
time = np.linspace(0,50,500) # Set the time that you plan on integrating across
# For this example we will run from 0 to 50 with 500 intervals

solution = odeint(derivatives, init, time, args=(a, b))# Call odeint with the initial condition and parameters

X = solution[:,0]
Y = solution[:,1]
Z = solution[:,2]

Matplotlib

Example use of matplotlib:

plt.figure(1, figsize=(12,7)) # creates and sets the current figure with figure number 1
# change this '1' to a different number if you need more than one figure

plt.subplot(224) # selects the fourth plot in a 2 by 2 grid of subplots
# this line is unnecessary if you only need to display one plot

plt.plot(time, X, label='x-values')
plt.plot(time, Y, label='y-values')
plt.plot(time, Z, label='z-values')
plt.grid() # enables the grid
plt.legend() # enables the legend
plt.xlabel('Time') # creates a label for the x axis
plt.ylabel('Calculated Values') # creates a label for the 
plt.title('Example Title') # adds a title for the whole plot

plt.tight_layout() # prevents the titles from overlapping
plt.savefig("Example.png") # saves the figure as an image
Output:

Diagram of plt.subplot():

Use of matshow:

heatmap = []
for i in range(100):
    heatmap.append([])
    for j in range(100):
        heatmap[i].append(i*j)

plt.matshow(heatmap)

StatsModels

Linear Fit:

x_with_cnst = sm.add_constant(x_values) # creates an array of x values with a column of 1s
linear_model = sm.OLS(y_values, x_with_cnst) # creates the model
results = linear_model.fit() # calculates the best fit parameters

results.params # Returns the intercept and slope
results.summary() # Returns the a summary of the model
results.predict() # Returns an array of predicted y_values

Linear OLS Model:

model = sm.OLS(data["x_values"], data.drop(columns=["x_values"]))
result = model.fit()
result.summary()

Plot Regression Info:

fig = plt.figure(figsize=(15,8))
fig = sm.graphics.plot_regress_exog(results, "x1", fig=fig)

SK-Learn

Metrics:

print("Confusion matrix:\n",metrics.confusion_matrix(test_labels, test_predict))
print("Classification report:\n",metrics.classification_report(test_labels, test_predict))
print("Accuracy:",metrics.accuracy_score(test_labels, test_predict))
print("Precision:",metrics.precision_score(test_labels, test_predict))
print("Recall:",metrics.recall_score(test_labels, test_predict))
print("F_1 Score:",metrics.f1_score(test_labels, test_predict))

ROC Curve plot:

fpr, tpr, thresholds = metrics.roc_curve(test_labels, test_predict)
plt.plot(fpr,tpr)
plt.plot([0,1],[0,1],label="chance line")
plt.legend()
print(metrics.auc(fpr,tpr))

Sample Datasets:

vectors, labels, centers = datasets.make_blobs(n_samples=100, n_features=2, centers=2, random_state=2614, return_centers=True)
vectors, labels = datasets.make_circles(n_samples=100, random_state=655)

sk_data = datasets.load_digits();
vectors, labels, categories = (sk_data.data, sk_data.target, sk_data.target_names)

sk_data = datasets.fetch_lfw_people(min_faces_per_person=50, resize=0.4)
vectors, labels, categories = (sk_data.data, sk_data.target, sk_data.target_names)

Train test split:

train_vectors, test_vectors, train_labels, test_labels = train_test_split(vectors, labels)

Logistic Model:

logit_model = sm.Logit(train_labels, sm.add_constant(train_vectors))
result = logit_model.fit()
test_predict = result.predict(sm.add_constant(test_vectors_dropped))

Standard Scalar preprocessing:

scalar = StandardScaler()
vectors = scalar.fit_transform(vectors)

KNN Model:

knn = KNeighborsClassifier(n_neighbors=5)
result = knn.fit(train_vectors, train_labels)

Principle Component Analysis preprocessing:

n_components = 17 # This is much less than the original n_features

print("Extracting the top %d eigenfaces from %d faces" % (n_components, train_vectors.shape[0]))

#Set up the pca object with the number of compoents we want to find
pca = PCA(n_components=n_components, whiten=True)

#Fit the training data to the pca model.
result = pca.fit(train_vectors)

#Transform the original data
pca_train_vectors = pca.transform(train_vectors)
pca_test_vectors = pca.transform(test_vectors)

pca.explained_variance_ratio_ #Returns a list of the components and there represented variance

SVM Model:

print("Fitting the classifier to the training set")
param_grid = {'C': [.001, .005, .0001, .0005, .00001],
    'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
clf = GridSearchCV(SVC(kernel='linear', class_weight='balanced'), param_grid, n_jobs=-1)
result = clf.fit(train_vectors, train_labels)