2020年5月30日 星期六

python 物件 class

class Polygon:
    def __init__(self,*p):
        self.p = p
    def __repr__(self):
        return perimeter()
    def __str__(self):
        return "perimeter is " + "{:>3.2f}".format(self.perimeter())
    def perimeter(self):
        p = self.p
        #計算兩點距離(X1-X2)平方+(Y1-Y2)平方,開根號
        i = len(p) - 1
        d = ((p[0][0] - p[i][0]) ** 2 + (p[0][1] - p[i][1]) ** 2) ** 0.5
        for i in range(1,len(p)):
            d += ((p[i][0] - p[i-1][0]) ** 2 + (p[i][1] - p[i-1][1]) ** 2) ** 0.5
        return d
        
        
triangle = Polygon((0,0), (3,0), (0,4))
diamond = Polygon((-1,0), (0,-1), (1,0), (0,1))
print(triangle.perimeter())
print(diamond.perimeter())
print(triangle)
print(diamond)

2020年5月29日 星期五

python 信賴區間

import numpy as np
import scipy.stats as stats

# 估計 95% 信心水準下,老年人每星期看電視平均時間的信賴區間?
# alpha = 0.05
# 假設樣本平均時間的抽樣分配服從常態分佈: xbar ~ N(mu_xbar, sigma_xbar)
# 抽樣人數 n = 100 ; 樣本平均時間 xbar = 21.2 小時
# 母體看電視時間的標準差 sigma = 8 小時


def Interval_estimation(n, xbar, sigma, alpha):
    # standard error of the mean:
    se = sigma / np.sqrt(n)

    # margin of error:
    # Hint: Z_(0.05/2) for Two-tails
    E = stats.norm.ppf(1 - alpha/2, loc=0, scale=1) * se

    # (1 - alpha)% Confidence Interval of Population mean with Known Variance:
    Interval = [xbar - E, xbar + E]

    return Interval


Interval_estimation(n=100, xbar=21.2, sigma=8, alpha=0.05)

python pi 圓周率 機率 驗證

points = 10000
X = np.random.uniform(-1, 1, points)
Y = np.random.uniform(-1, 1, points)

# 落於圓內的點
inner_index = (X**2 + Y**2) <= 1

# 落於圓外的點
outer_index = ~inner_index

indices = (inner_index, outer_index)

X_inner, Y_inner = X[indices[0]], Y[indices[0]]
X_outer, Y_outer = X[indices[1]], Y[indices[1]]

[(X_inner, Y_inner), (X_outer, Y_outer)], indices

plt.scatter(X_inner, Y_inner,s=1,c='red',marker='o',alpha=0.8,label='C1')
plt.scatter(X_outer, Y_outer,s=1,c='blue',marker='o',alpha=0.8,label='C1')
plt.show()
percent = len(X_inner) / points
print("圈內機率:",percent)
print("圈內面積:",2 * 2 * percent)
print("實際pi:",np.pi)
print("gap:",np.pi - 4 * percent)

2020年5月28日 星期四

asp rs 操作

'=======================
'var categories = ['c1','c2','c3','c4','c5','c6','c7','c8','c9','c10','c11'];
'var data_plan = [10,20,30,40,40,30,30,30,30,30,30];
'var data_act = [11,32,33,34,39,43,33,33,35,33,33];
'=======================

categories = ""
data_plan = ""
data_act = ""
While Not rs.EOF    
    categories = categories & "'" & rs(0) & "'"
    data_plan = data_plan & "," & rs(1)
    data_act = data_act & "," & rs(2)
rs.MoveNext
categories = "[" & mid(categories,2) & "]"
data_plan = "[" & mid(data_plan,2) & "]"
data_act = "[" & mid(data_act,2) & "]"
Wend

python dataframe df

#by欄位計數
df["ITEM_NAME"].value_counts()

#繪圖
df["EXPENSE"].value_counts().plot(kind="bar")

#補值
df.interpolate(method ='linear', limit_direction ='backward', limit = 1)


df合併(欄位) (以下兩種方式)
df1.join(df2)
df = pd.concat([df1, df2], axis=1)


#欄位list分割
df["RecentDelays"].apply(pd.Series)

#分割欄位
new = df.From_To.str.split("_",expand=True)
new.columns = ["From","To"]
df = df.join(new)

#刪除欄位
df.drop("From_To", axis = 1)

#修改欄位名稱
df.rename(columns={'a':'A'})

python 統計 scipy.stats

import numpy as np
arr = np.array([1,3,2,5,4,4,5,453,43,45,43,5])
print("平均:",arr.mean())
print("標準差:",arr.std())
print("變異數:",arr.var())

print(np.sort(arr))
print("中位數:",np.median(arr))
#四分之一位數
np.quantile(e,.25)


#眾數
v = [1,2,3,4,5,3,3,3,3,3,3,3,3,4,4,4,4,4,4,5]
print(np.argmax(np.bincount(v)))


#共變異數 與 相關係數
def cov(X, Y):  # 嘗試手刻共變異數吧
    s = 0
    for i in range(0,len(X)):
        s += (X[i] - X.mean()) * (Y[i] - Y.mean())
    s /= (len(X) - 1)
    return s

def cor(X, Y):  # 嘗試手刻相關係數吧
    s,sx,sy = 0,0,0
    for i in range(0,len(X)):
        s += (X[i] - X.mean()) * (Y[i] - Y.mean())
        sx += (X[i] - X.mean()) ** 2
        sy += (Y[i] - Y.mean()) ** 2
    s /= (sx** 0.5) * (sy** 0.5)
    return s

X = np.array([1,2,3,4,5,6])
Y = np.array([3,5,6,7,8,9])

#使用自訂函式
print(cov(X,Y))
print(cor(X,Y))

#使用Numpy
print(np.cov(X, Y))
print(np.corrcoef(X, Y))








#==========================================================
import scipy.stats as stats
# 請查看 P(X <= -1),X服從標準常態分佈:
norm_cdf = stats.norm.cdf
print("P(X <= -1)=", norm_cdf(-1, loc=0, scale=1))

# 請畫出標準常態分佈的累積機率函數(CDF),範圍從-3至3:
x = np.linspace(-3, 3, 1000)
norm_cdf = stats.norm.cdf

plt.plot(x, norm_cdf(x, loc=0, scale=1))
plt.arrow(-1, 0, 0, norm_cdf(-1, loc=0, scale=1), head_width=0.02,
          width=0.005, head_length=0.02, color='r')
plt.arrow(-1, norm_cdf(-1, loc=0, scale=1), -3, 0, head_width=0.02,
          width=0.005, head_length=0.02, color='g')
plt.show()

# 請分別查看標準常態分佈的 2.5%, 50%, 97.5% 分位數:
norm_ppf = stats.norm.ppf

print("2.5% 分位數:", norm_ppf(0.025))
print("50% 分位數:", norm_ppf(0.5))
print("97.5% 分位數:", norm_ppf(0.975))

# 請以直方圖(histogram)觀察10000個標準常態分佈的隨機數結果:
norm_rvs = stats.norm.rvs(loc=0, scale=1, size=10000)
n, bins, patches = plt.hist(norm_rvs, 100, density=True, facecolor='green', alpha=0.6)

mu, sigma = 0, 1
x = np.linspace(stats.norm.ppf(0.0001), stats.norm.ppf(0.9999), 101)
y = stats.norm.pdf(bins)
l = plt.plot(bins, y, 'r--', linewidth=2)

plt.xlabel('norm_rvs')
plt.ylabel('Probability')
plt.title(r'$\mathrm{Histogram\ of\ Symmetric\ Distribution:}\ \mu=0,\ \sigma=1$')
plt.show()



#==========================================================
# 隨機抽樣(random sampling):
s0 = np.random.choice([1, 2, 3, 4, 5], 5, replace=False)

# 請利用隨機抽樣的方法,將20個樣本隨機分派至2種不同的群組:
s1 = np.random.choice(2, 20, replace=True)

# 請利用隨機抽樣的方法,從20個樣本中,隨機抽取10個子樣本分派至群組1:
s2 = np.random.choice(20, 10, replace=False)

print(s0)
print(s1)
print(s2)

2020年5月27日 星期三

AIA 20200527 python 小考

#第1題
cx = [1, 3, 5, 7, 2, 1, 8]
def less_than_5(array):
    y = []
    for i in array:
        if i < 5:
            y.append(i)
    return y
print(less_than_5(x))


#第2題
factory1 = {"條件1" :    30, "條件2" : 10, "溫度容許值" : 20}
factory2 = {"條件1" : 48763, "條件3" : 75, "溫度容許值" : 60}
def dict_merge_with_bigger(Dict1, Dict2):
    ND = Dict1
    for key in Dict2:
        if ND.get(key):
            if Dict2[key] > ND[key]:
                ND[key]=Dict2[key]
        else:
            ND[key]=Dict2[key]
    return ND
print(dict_merge_with_bigger(factory1, factory2))


#第3題
#對python物件不熟(待補)


#第4題
def min_max(array):
    min_arr = np.array([])
    max_arr = np.array([])
    for i in array.transpose():
        min_arr = np.append(min_arr,min(i))
        max_arr = np.append(max_arr,max(i))
    return (arr - min_arr) / (max_arr - min_arr)

import numpy as np
arr = np.array(
[[1,3,5],
 [4,5,6],
 [7,8,9]]
)
min_max(arr)

#第5題
def grouped_mean(df, column_name, threshold):
    dict1 = dict(df[df[column_name]<threshold].mean())
    dict2 = dict(df[df[column_name]>=threshold].mean())
    return (dict1,dict2)

import pandas as pd
data = {"height":[150,170,167,158,160],
        "weight":[38,80,59,60,50],
        "salary":[41, 15, 30, 15, 25]}
df = pd.DataFrame(data)
grouped_mean(df, "weight", 60)