Beta
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv("peak_4.csv", index_col = 0)
print(df.columns)
print(df.dtypes)
from sklearn.linear_model import Lasso
X = df.drop('uRNA ', axis = 1).values
y = df['uRNA '].values
names = df.drop('uRNA ', axis = 1).columns
lasso = Lasso(alpha = 1)
lasso_coef = lasso.fit(X, y).coef_
plt.bar(names, lasso_coef)
plt.xticks(rotation = 45)
plt.show()
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
#prepare data
X = df.drop('uRNA ', axis = 1).values
y = df['uRNA '].values
#split data
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size= 0.3, random_state= 42)
names = df.drop('uRNA ', axis = 1).columns
scores = []
for alpha in [0.01, 1.0, 10.0, 20.0, 50.0]:
lasso = Lasso(alpha = alpha)
lasso.fit(X_train, y_train)
lasso_pred = lasso.predict(X_test)
scores.append(lasso.score(X_test, y_test))
print(scores)
#trying cross validation
from sklearn.model_selection import cross_val_score, KFold
from sklearn.linear_model import LinearRegression
kf = KFold(n_splits = 6, shuffle = True, random_state = 51)
reg = LinearRegression()
cv_results = cross_val_score(reg, X, y, cv = kf)
print(cv_results)
example of a standardization
from numpy import asarray from sklearn.preprocessing import StandardScaler
define data
data = asarray([[100, 0.001], [8, 0.05], [50, 0.005], [88, 0.07], [4, 0.1]]) print(data)
define standard scaler
scaler = StandardScaler()
transform data
scaled = scaler.fit_transform(data) print(scaled)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled = scaler.fit_transform(X)
lasso = Lasso(alpha = 1)
lasso_coef = lasso.fit(scaled, y).coef_
plt.bar(names, lasso_coef)
plt.xticks(rotation = 45)
plt.show()
Final code for factorial optimization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
#preparing data
df = pd.read_csv("peak_4.csv", index_col = 0)
df['NTP_to_mg'] = (df['NTP']*4)/df['Magnesium']
df['T7_to_template'] = df['T7']/df['Template']
X = df.drop('uRNA ', axis = 1).values #features
y = df['uRNA '].values #dependable
names = df.drop('uRNA ', axis = 1).columns #names of features
scaler = StandardScaler() #creating model for scaling
scaled = scaler.fit_transform(X) # creating transformed set of features
# fitting the model
lasso = Lasso(alpha = 0.1)
lasso_coef = lasso.fit(scaled, y).coef_ # extracting coefficients of features impact
# plotting the features impact
plt.bar(names, lasso_coef)
plt.xticks(rotation = 45)
plt.show()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
#preparing data
df = pd.read_csv("peak_5_data.csv", index_col = 0)
df['NTP_to_mg'] = (df['NTP']*4)/df['Magnesium']
df['T7_to_template'] = df['T7']/df['Template']
X = df.drop('uRNA', axis = 1).values #features
y = df['uRNA'].values #dependable
names = df.drop('uRNA', axis = 1).columns #names of features
scaler = StandardScaler() #creating model for scaling
scaled = scaler.fit_transform(X) # creating transformed set of features
# fitting the model
lasso = Lasso(alpha = 0.1)
lasso_coef = lasso.fit(scaled, y).coef_ # extracting coefficients of features impact
# plotting the features impact
plt.bar(names, lasso_coef)
plt.xticks(rotation = 45)
plt.show()