[파이썬 02] series 함수, 타입, 파라미터

넘파이의 데이터 타입 종류

- bool

- int(8,16,32,64)

- uint(8,16,32,64)

- float(16,32,64)

- complex(64,128)

- string

arr = np.array([1,2,3], dtype=np.uint8)
print(arr.dtype)
print(arr.dtype.kind)
print(arr.dtype.alignment)

arr = arr.astype('int8')
print(arr.dtype)

- 무한대(inf), 결측치(NaN)

print(np.nan, type(np.nan))
print(np.inf, type(np.inf))
print(-np.inf, type(-np.inf))

함수와 메서드

arr = np.arange(8).reshape(4,2)

print(arr)
print(arr.sum(axis=0))
print(arr.sum(axis=1))

[[0 1]
[2 3]
[4 5]
[6 7]]

[12 16]

[ 1 5 9 13]

- random

arr = np.random.randint(10, size=5)
print(arr)

arr = np.random.randint(10, size=(2,5))
print(arr)

실행결과

[3 6]

[[6 7 0 8 1]
[1 2 0 2 7]]

# index 값 가져오기


import FinanceDataReader as fdr

-- 인덱스 값 1
s3 = Series([10,20,30], index=['a','b','c'])
print(s3.index[0])

# index, Open, High, Low, Close, Adj Close, Volume
# 인덱스 값을 가져오기 위해 형태가 바뀜
# 키를 가져와 값을 가져옴 loc 사용

df_stock = fdr.DataReader('MNQ=f','2024-01-01').sort_index(ascending=False)
for idx in df_stock.index:
    date = str(idx)[:10]
    dt = df_stock.index[i]
    Open = df_stock.loc[idx]['Open']
    High = df_stock.loc[idx]['High']
    Low = df_stock.loc[idx]['Low']
    Close= df_stock.loc[idx]['Close']
    Adj_Close= df_stock.loc[idx]['Adj Close']
    Volume = df_stock.loc[idx]['Volume']

    i = i + 1
    
    print(date)
    print(dt)

# index, Open, High, Low, Close, Adj Close, Volume
# 데이터의 range 값으로 전체 데이터 가져올수 있음
# 키를 사용하는 방법이 아닌 전체 건수로 판단

for idx in range(len(df_stock)):
    dt = df_stock.index[idx]
    Open1 = df_stock.iloc[idx]['Open']
    
    print(dt)
    print(Open1)

# 인덱스의 최고값과 최저값을 구해온다.

idxmax() : 최고값

idxmin() : 최저값

cumprod() : 누적값

cumprod().iloc[-1] : 누적 마지막 값

date = ["6/1","6/2","6/3","6/4","6/5"]
high = Series([42800,42700,42050,42950,43000], index=date)
low = Series([42150,42150,41300,42150,42350], index=date)

diff = high - low

print(diff.idxmax())
print(diff.idxmin())

profit = high/low

print(profit)
print(profit.cumprod())
print(profit.cumprod().iloc[-1])

# 유일값, group by

unique() : 유일값

value_counts() : 값의 카운트

data = {
    "삼성전자":"전지,전자",
    "LG전자":"전지,전자",
    "현대차":"운수장비",
    "NAVER":"서비스업",
    "카카오":"서비스업"
}

s = Series(data)

print(s.unique())

print(s.value_counts())

# 시리즈 사용자 함수사용

map : 사용자 함수 매핑

#사용자 함수 1
def remove_comma(x):
    print(x, 'in function')
    return int(x.replace(",",""))

s = Series(["1,234","5,678", "9,876"])
result = s.map(remove_comma)
print(result)


# 사용자 함수 2
def is_greater_than_5000(x):
    if x > 5000:
        return '크다'
    else:
        return '작다'

s = Series([1234,5678, 9876])
result = s.map(is_greater_than_5000)
print(result)

# 필터링

필터된 값에 필터링 추가

date = ["2019-05-31","2019-05-30","2019-05-29","2019-05-28","2019-05-27"]

close = Series([42500,42550,41800,42550,42650], index=date)
open = Series([42600,42200,41850,42550,42500], index=date)

cond = close > open

print(cond)
print(close[cond])

print(close[close > open])
print(close.index[close > open])
print(close[close > open].index)

diff = close - open

print(diff[close>open])

# 정렬

sort_value(ascending=False) : 오름차순 정렬

rank(ascending=False) : 오름차순 랭크

data = [3.1,2.0,10.1,5.1]

index = ["000010","000020","000030","000040"]

s = Series(data = data, index = index)

print(s)

s1 = s.sort_values()
print(s1)

s2 = s.sort_values(ascending=False)
print(s2)


print(s.rank())
print(s.rank(ascending=False))

저작자표시 비영리 변경금지 (새창열림)

'6. 프로그래밍 > 6.1 파이썬' 카테고리의 다른 글

[파이썬 판다스] fillna 결측치 (1)	2024.06.05
[파이썬 03] (0)	2024.06.05
[파이썬 01] 넘파이 기초 (1)	2024.05.29