Python统计时间段频次demo

admin
2022-10-19 / 0 评论 / 2 阅读 / 正在检测是否收录...
import datetime
import pandas as pd
# from pprint import pprint

from pytz import timezone,all_timezones

fmt = "%Y-%m-%d %H:%M:%S %Z%z"

# Current time in UTC
now_utc = datetime.datetime.now(timezone('UTC'))
print(now_utc.strftime(fmt))

# Convert to US/Pacific time zone
now_pacific = now_utc.astimezone(timezone('US/Pacific'))
print(now_pacific.strftime(fmt))


now_pabeijing = now_utc.astimezone(timezone('Asia/Shanghai'))
print(now_pabeijing.strftime(fmt))

# for zone in all_timezones:
#     if 'Asia' in zone:
#         print(zone)


def get_loacl(x):
    if type(x['时间']) == str:
        time_str = x['日期']+' '+x['时间']
        dt = datetime.datetime.strptime(time_str,'%Y/%m/%d %H:%M PDT')
        dt = timezone('US/Pacific').localize(dt)
        dt_beijing = dt.astimezone(timezone('Asia/Shanghai'))
        print(time_str,dt,dt_beijing)
        return dt_beijing
    else:
        return x['时间']


def calc_hours(df):
    # 生成24小时时间段
    times = []
    for i in range(24):
        times.append({'时间段': '{0}时'.format(i), 'section': (i, i + 1), '计数': 0})
    # 处理带PDT的时间
    
    df['hours'] = df.apply(lambda x: datetime.datetime.strptime(x['日期']+x['时间'],'%Y/%m/%d%H:%M PDT') if type(x['时间']) == str else x['时间'],axis=1)
    df['locals'] = df.apply(lambda x: get_loacl(x),axis=1)

    print(df)
    # 循环统计
    for i,r in df.iterrows():
        for tx in times:
            if tx['section'][0] <= r['locals'].hour < tx['section'][1]:
                tx['计数'] = tx['计数'] + 1
                break
    # 转为dataframe
    dt = pd.DataFrame(times).drop(['section'],axis=1)
    return dt




df = pd.read_excel('./亚马逊订单2022-10-19.xlsx',usecols=['日期','时间'],sheet_name='订单详情')

d = calc_hours(df)
print(d)
import datetime
import random
 
# 生成随机测试时间数量
from pprint import pprint
 
SAMPLE_COUNT = 10
SECTION = 'section'
SUM = 'sum'
 
 
def my_time():
    times = []
    for i in range(24):
        times.append({SECTION: (i, i + 1), SUM: 0})
 
    cnt = 0
    while True:
        h = random.randint(0, 23)
        m = random.randint(0, 59)
        t = datetime.time(hour=h, minute=m)
        for tx in times:
            if tx[SECTION][0] <= t.hour < tx[SECTION][1]:
                tx[SUM] = tx[SUM] + 1
                pprint(f'{t.strftime("%H:%M")} @ {tx[SECTION]}')
                break
 
        cnt = cnt + 1
        if cnt > SAMPLE_COUNT:
            break
 
    return times
 
 
if __name__ == '__main__':
    timex = my_time()
    pprint(timex)
0

评论 (0)

取消