Gimy: 6月 2020

2020年6月20日星期六

python df heatmap

heatmap = df[["city","class"]]

heatmap["cnt"] = 1

heatmap = heatmap.groupby(["city","class"]).sum().unstack().fillna(0)

2020年6月19日星期五

sigmoid

def sigmoid(X): # define activation: sigmoid

output = 1 / (1 + np.exp(-X))

return output

def sigmoid_gradient(X):

output = sigmoid(X)*(1-sigmoid(X))

return output

def softmax(X): # define activation: softmax

return np.exp(X) / np.sum(np.exp(X), axis=1, keepdims=True)

def cross_entropy(p, q):

epsilon = 1e-15

H = 0

for i in range(len(p)):

H += -p[i]*np.log(q[i]+epsilon)

H = H.sum()/p.shape[0]

return H

from tensorflow import keras

# 做 One-hot encoding

y = keras.utils.to_categorical(array)

2020年6月18日星期四

SQL (MySQL)

查詢table更新時間

select UPDATE_TIME FROM information_schema.TABLES where TABLE_NAME='{}'

SHOW VARIABLES LIKE '%group_concat%';

SET GLOBAL group_concat_max_len=102400;

SET SESSION group_concat_max_len=102400;

ON DUPLICATE KEY UPDATE

ALTER TABLE Persons ADD UNIQUE (Id_P)

GROUP_CONCAT()

update update_test A inner join (select PAY_DATE,sum(EXPENSE) as U1,max(EXPENSE) as U2 from expense where PAY_DATE >= '2020/4/1' group by PAY_DATE) B on A.PAY_DATE=B.PAY_DATE set A.SUM_PAY=B.U1,A.MAX_PAY=B.U2;

數字轉字串
CONVERT(1,CHAR)

修改表格名
ALTER TABLE `原表格名` RENAME TO `新表格名`

改欄位名稱
ALTER TABLE `表格名` CHANGE COLUMN `舊欄位名` `新欄位名` 欄位類型
ALTER TABLE mfg_skill CHANGE COLUMN STAGE DEPT varchar(6)

2020年6月11日星期四

GitLab

Git global setup

git config --global user.name "智銘 吳"
git config --global user.email "wugimy@gmail.com"

Create a new repository

git clone https://gitlab.aiacademy.tw/at091013/my-first-project.git
cd my-first-project
touch README.md
git add README.md
git commit -m "add README"
git push -u origin master

Existing folder

cd existing_folder
git init
git remote add origin https://gitlab.aiacademy.tw/at091013/my-first-project.git
git add .
git commit -m "Initial commit"
git push -u origin master

Existing Git repository

cd existing_repo
git remote rename origin old-origin
git remote add origin https://gitlab.aiacademy.tw/at091013/my-first-project.git
git push -u origin --all
git push -u origin --tags

2020年6月6日星期六

python 中央極限定理 CLT

import numpy as np

import matplotlib.pyplot as plt

#每次擲n次骰子取平均

n = 20

#執行t次

t = 10000

data = []

for i in range(0,t):

s = np.random.randint(1, 7, n)

data.append(s.mean())

# 直方圖

plt.hist(data,bins=20)

plt.show()

print("平均:",np.mean(data))

python xbgoost

import xgboost as xgb

from xgboost.sklearn import XGBClassifier

xgbc = XGBClassifier()

xgbc.fit(X_train, y_train)

pred = xgbc.predict(X_test)

from sklearn.metrics import accuracy_score

accuracy_score(pred,y_test)

2020年6月5日星期五

python RandomForest

from sklearn import datasets

iris = datasets.load_iris()

x = iris.data #feature

y = iris.target # Label

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(x,y,test_size=0.3, random_state=88)

from sklearn.ensemble import RandomForestClassifier

#from sklearn.datasets import make_classification

clf = RandomForestClassifier(max_depth=2, random_state=0)

clf.fit(X_train, y_train)

pred = clf.predict(X_test)

from sklearn.metrics import accuracy_score

accuracy_score(pred,y_test)

my_function.py

def df_to_mysql(df,table_name):

import pyodbc

cn = pyodbc.connect('DRIVER={MySQL ODBC 8.0 ANSI Driver};SERVER=localhost;DATABASE=my_db;USER=;PASSWORD=;OPTION=3;')

cn.execute("truncate table " + table_name)

for row in df.values:

SQL = ""

for i in row:

if SQL == "":

SQL = "insert into " + table_name + " values ('" + str(i) + "'"

else:

SQL += ",'" + str(i) + "'"

SQL += ")"

#print(SQL)

cn.execute(SQL)

cn.commit()

cn.close()

return "df_to_mysql1 execute completed"

#執行前確認DaraFrame與DB Schema一致

from my_function import df_to_mysql

print(df_to_mysql(df,"iris"))

python training 資料準備 (以DataFrame匯入) KNN

# 一開始先被好資料(df格式)

# 決定切割欄位

cols = ["x1","x2","x3","x4"]

x = np.array(df[cols])

y = np.array(df["y"])

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(x,y,test_size=0.3, random_state=88)

#使用KNN演算法

from sklearn.neighbors import KNeighborsClassifier

#從k=1開始測試

knn = KNeighborsClassifier(n_neighbors=1)

knn.fit(X_train,y_train)

pred = knn.predict(X_test)

#使用決策樹

from sklearn import tree

clf = tree.DecisionTreeClassifier()

clf = clf.fit(X_train,y_train)

tree.plot_tree(clf)

#pred = clf.predict(X_test)

#測試資料與預測資料比較

from sklearn.metrics import classification_report,confusion_matrix

print(confusion_matrix(y_test,pred))

print(classification_report(y_test,pred))

python dict 字典

countries = ['China', 'Japan', 'North Korea', 'South Korea', 'Taiwan', 'Thailand']

capitals = ['Beijing', 'Tokyo', 'Pyongyang', 'Seoul', 'Taipei', 'Bangkok']

#將list合併後轉成字典

d = dict(zip(countries,capitals))

#增加

d["United States"] = "Washington D.C."

#刪除

del d["Thailand"]

建立新字典，將舊字典的 key,value互換

new_dict = {}

for key,value in d.items():

new_dict[value]=key

修改

new_dict["Washington D.C."] = "US"

python list

list to dict 字典

countries = ['China', 'Japan', 'North Korea', 'South Korea', 'Taiwan', 'Thailand']

capitals = ['Beijing', 'Tokyo', 'Pyongyang', 'Seoul', 'Taipei', 'Bangkok']

d = dict(zip(countries,capitals))

2020年6月4日星期四

MySQL DATE_FORMAT() 函數日期時間

DATE_FORMAT(NOW(),'%Y-%m-%T')

格式	描述
%a	缩写星期名
%b	缩写月名
%c	月，数值
%D	带有英文前缀的月中的天
%d	月的天，数值(00-31)
%e	月的天，数值(0-31)
%f	微秒
%H	小时 (00-23)
%h	小时 (01-12)
%I	小时 (01-12)
%i	分钟，数值(00-59)
%j	年的天 (001-366)
%k	小时 (0-23)
%l	小时 (1-12)
%M	月名
%m	月，数值(00-12)
%p	AM 或 PM
%r	时间，12-小时（hh:mm:ss AM 或 PM）
%S	秒(00-59)
%s	秒(00-59)
%T	时间, 24-小时 (hh:mm:ss)
%U	周 (00-53) 星期日是一周的第一天
%u	周 (00-53) 星期一是一周的第一天
%V	周 (01-53) 星期日是一周的第一天，与 %X 使用
%v	周 (01-53) 星期一是一周的第一天，与 %x 使用
%W	星期名
%w	周的天（0=星期日, 6=星期六）
%X	年，其中的星期日是周的第一天，4 位，与 %V 使用
%x	年，其中的星期一是周的第一天，4 位，与 %v 使用
%Y	年，4 位
%y	年，2 位

python 引用 py

a.py

name="weiwei"

age=18

def introduction():

print "hi everyone, I'm",name,",",age,"years ago!"

b.py

from mei import introduction

introduction()

2020年6月2日星期二

python 迴歸

import numpy as np

import matplotlib.pyplot as plt

#自訂x,y

x = np.array([1,2,3,4,5])

y = np.array([2,5,7,8,10])

#==========

# 散佈圖 + 迴歸

fig, ax = plt.subplots(figsize=(8, 4))

ax.scatter(x, y)

m, c = np.polyfit(x, y, 1) # 使用 Numpy 的 polyfit，參數 1 代表一維，算出 fit 直線斜率

ax.plot(x, m * x + c) # 使用 y = m * x + c 斜率和常數匯出直線

fig.suptitle('Scatter with regression')

plt.show()

#==========

predict = lambda x: m * x + c

print(predict(5))

2020年6月20日 星期六

2020年6月19日 星期五

2020年6月18日 星期四

數字轉字串CONVERT(1,CHAR)

修改表格名ALTER TABLE `原表格名` RENAME TO `新表格名`

改欄位名稱ALTER TABLE `表格名` CHANGE COLUMN `舊欄位名` `新欄位名` 欄位類型ALTER TABLE mfg_skill CHANGE COLUMN STAGE DEPT varchar(6)

2020年6月11日 星期四