1.1. Pandas分析步骤
- 载入数据
 - 将 外链点击数 进行 COUNT。类似如下SQL:
 
| 
 1 
2 
3 
4 
5 
6 
 | 
 SELECT reference_url, 
    count(*) 
FROM log 
GROUP BY reference_url 
ORDER BY count(*) 
LIMIT 0, 100; 
 | 
1.2. 代码
| 
 1 
2 
3 
4 
5 
6 
7 
8 
9 
10 
11 
12 
13 
14 
15 
16 
17 
18 
19 
20 
21 
22 
23 
24 
25 
26 
27 
28 
29 
30 
31 
32 
33 
34 
35 
36 
37 
38 
39 
40 
41 
42 
43 
44 
45 
46 
47 
48 
 | 
 cat pd_ng_log_stat.py 
#!/usr/bin/env python 
#-*- coding: utf-8 -*- 
from ng_line_parser import NgLineParser 
import pandas as pd 
import socket 
import struct 
class PDNgLogStat(object): 
    def __init__(self): 
        self.ng_line_parser = NgLineParser() 
    def _log_line_iter(self, pathes): 
        “”“解析文件中的每一行并生成一个迭代器”“” 
        for path in pathes: 
            with open(path, ‘r’) as f: 
                for index, line in enumerate(f): 
                    self.ng_line_parser.parse(line) 
                    yield self.ng_line_parser.to_dict() 
    def load_data(self, path): 
        “”“通过给的文件路径加载数据生成 DataFrame”“” 
        self.df = pd.DataFrame(self._log_line_iter(path)) 
    def url_ref_stat(self): 
        “”“统计外链点击情况”“” 
        group_by_cols = [‘reference_url’] # 需要分组的列,只计算和显示该列 
        # 直接统计次数 
        url_ref_grp = self.df[group_by_cols].groupby( 
                                     self.df[‘reference_url’]) 
        return url_ref_grp.agg([‘count’])[‘reference_url’].sort_values(by=‘count’, ascending=False) 
def main(): 
    file_pathes = [‘www.trustauth.cn.access.log’] 
    pd_ng_log_stat = PDNgLogStat() 
    pd_ng_log_stat.load_data(file_pathes) 
    # 统计外链点击情况 
    print pd_ng_log_stat.url_ref_stat() 
if __name__ == ‘__main__’: 
    main() 
 | 
运行统计和输出结果
| 
 1 
2 
3 
4 
5 
6 
7 
8 
9 
10 
11 
12 
13 
 | 
 python pd_ng_log_stat.py 
                                          count 
reference_url                                   
–                                        574546 
                           136 
[231 rows x 1 columns] 
 | 
文章转载来自:trustauth.cn
                                            
                    领取优惠
                
提交成功!