select id, name from T where id = 1
select area,sum(amount) from T group by area
SELECT CODE, MAX(con_rise) AS longest_up_days FROM ( SELECT CODE, COUNT(*) AS con_rise FROM ( SELECT CODE, DT, SUM(updown_flag) OVER (PARTITION BY CODE ORDER BY CODE, DT) AS no_up_days FROM ( SELECT CODE, DT, CASE WHEN CL > LAG(CL) OVER (PARTITION BY CODE ORDER BY CODE, DT) THEN 0 ELSE 1 END AS updown_flag FROM stock ) ) GROUP BY CODE, no_up_days ) GROUP BY CODE
SQL没有设置断点、单步执行这些很常见的调试方法,嵌套多层时就要逐层拆分执行。
SPL显得更简单,不再需要循环语句
SELECT CODE, MAX(con_rise) AS longest_up_days FROM ( SELECT CODE, COUNT(*) AS con_rise FROM ( SELECT CODE, DT, SUM(updown_flag) OVER (PARTITION BY CODE ORDER BY CODE, DT) AS no_up_days FROM ( SELECT CODE, DT, CASE WHEN CL > LAG(CL) OVER (PARTITION BY CODE ORDER BY CODE, DT) THEN 0 ELSE 1 END AS updown_flag FROM stock ) ) GROUP BY CODE, no_up_days ) GROUP BY CODE
A | |
1 | =stock.sort(StockRecords.txt) |
2 | =T(A1).sort(DT) |
3 | =A2.group(CODE;~.group@i(CL< CL[-1]).max(~.len()):max_increase_days) |
import pandas as pd stock_file = "StockRecords.txt" stock_info = pd.read_csv(stock_file,sep="\t") stock_info.sort_values(by=['CODE','DT'],inplace=True) stock_group = stock_info.groupby(by='CODE') stock_info['label'] = stock_info.groupby('CODE')['CL'].diff().fillna(0).le(0).astype(int).cumsum() max_increase_days = {} for code, group in stock_info.groupby('CODE'): max_increase_days[code] = group.groupby('label').size().max() – 1 max_rise_df = pd.DataFrame(list(max_increase_days.items()), columns=['CODE', 'max_increase_days'])
A | |
1 | smallData.txt |
2 | =file(A1).import@t() |
3 | =A2.groups(state;sum(amount):amount) |
A | |
1 | bigData.txt |
2 | =file(A1).cursor@t() |
3 | =A2.groups(state;sum(amount):amount) |
A | |
1 | bigData.txt |
2 | =file(A1).cursor@t() |
3 | =A2.groups(state;sum(amount):amount) |
A | |
1 | bigData.txt |
2 | =file(A1).cursor@tm() |
3 | =A2.groups(state;sum(amount):amount) |
普通笔记本即可流畅运行,无须服务器集群
高压缩率文件存储数据,易于携带
import pandas as pd stock_file = "StockRecords.txt" stock_info = pd.read_csv(stock_file,sep="\t") stock_info.sort_values(by=['CODE','DT'],inplace=True) stock_group = stock_info.groupby(by='CODE') stock_info['label'] = stock_info.groupby('CODE')['CL'].diff().fillna(0).le(0).astype(int).cumsum() max_increase_days = {} for code, group in stock_info.groupby('CODE'): max_increase_days[code] = group.groupby('label').size().max() – 1 max_rise_df = pd.DataFrame(list(max_increase_days.items()), columns=['CODE', 'max_increase_days'])