省市对应题目数

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import sys
sys.path.append(r'e:\Desktop\新环境\DataMachine_for_timer\src')
import pandas as pd
import numpy as np
import pymysql
from ML import data_suit,plot_suit
from utils import mysqlhelper,elapse,prestohelper
from config import config
from datetime import datetime , time

## 脱敏库
mysqlinstance_tm_new_hfjy = mysqlhelper(**config.tm_new_hfjy)
mysqlinstance_tm_new_bidata = mysqlhelper(**config.tm_new_bidata)

## dmart库
mysqlinstance_dmart_dmart = mysqlhelper(**config.dmart_dmart)
mysqlinstance_dmart_bidata = mysqlhelper(**config.dmart_bidata)

## hrm库
mysqlinstance_hrm_hrm_sale = mysqlhelper(**config.hrm_hrm_sale)
mysqlinstance_hrm_new_hrm = mysqlhelper(**config.hrm_new_hrm)
mysqlinstance_hrm_jobhunt = mysqlhelper(**config.hrm_jobhunt)

## hive库
prestoinstance = prestohelper(**config.hive_prosto)


# In[2]:


sql1 = '''
SELECT
    q.province,q.city,s.subject_name,q.quiz_source,
    count(q.quiz_id) quizCount
FROM
    quiz q
LEFT JOIN
    subject s ON q.subject_id = s.subject_id
GROUP BY
    q.province,q.city,s.subject_name,q.quiz_source
'''

df1 = mysqlinstance_tm_new_hfjy.get_df(sql1)

df1.head()


# In[3]:


#df1.to_excel(r'e:\Desktop\省市对应题数111.xlsx',index=False,encoding='GB18030')


# In[4]:


sql2 = '''
SELECT
    SUBSTRING(dist_id,1,2) province,
    prov_name
FROM
    nyd_district_detail
GROUP BY
    province
'''

df2 = mysqlinstance_tm_new_bidata.get_df(sql2)
df2.head()


# In[5]:


df_sub = pd.merge(df1,df2,how='left',on=['province'])
df_sub.head()


# In[6]:


sql3 = '''
SELECT
    SUBSTRING(dist_id,1,4) city,
    city_name
FROM
    nyd_district_detail
GROUP BY
    city
'''

df3 = mysqlinstance_tm_new_bidata.get_df(sql3)
df3.head()


# In[7]:


df_sub = pd.merge(df_sub,df3,how='left',on=['city'])
df_sub.head()


# In[8]:


df_sub.to_excel(r'e:\Desktop\省市对应题数v5.xlsx',index=False,encoding='GB18030')

文档更新时间: 2019-08-19 11:37   作者:admin