In [1]:
import pandas as pd
In [3]:
Insurance=pd.read_excel('PresCorp-Insurance-1.xlsx')
In [4]:
Insurance
Out[4]:
id | Gender | Age | Driving_License | Region_Code | Previously_Insured | Vehicle_Age | Vehicle_Damage | Annual_Premium | Policy_Sales_Channel | Vintage | Date of Lead | Response | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 166 | Male | 25 | 1 | 30 | 1 | < 1 Year | No | 29396 | 152 | 22 | 2017-01-01 | 0 |
1 | 442 | Female | 67 | 1 | 29 | 0 | 1-2 Year | Yes | 43753 | 124 | 30 | 2017-01-01 | 1 |
2 | 2702 | Male | 21 | 1 | 8 | 0 | < 1 Year | Yes | 32604 | 152 | 161 | 2017-01-01 | 0 |
3 | 5961 | Male | 21 | 1 | 50 | 1 | < 1 Year | No | 34607 | 160 | 176 | 2017-01-01 | 0 |
4 | 7184 | Male | 49 | 1 | 17 | 0 | 1-2 Year | Yes | 2630 | 156 | 76 | 2017-01-01 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
149995 | 143373 | Male | 71 | 1 | 33 | 1 | 1-2 Year | No | 34558 | 26 | 204 | 2020-04-14 | 0 |
149996 | 144405 | Male | 69 | 1 | 3 | 1 | 1-2 Year | No | 36979 | 26 | 252 | 2020-04-14 | 0 |
149997 | 146143 | Male | 24 | 1 | 35 | 1 | < 1 Year | No | 31775 | 152 | 118 | 2020-04-14 | 0 |
149998 | 148595 | Male | 63 | 1 | 41 | 0 | 1-2 Year | Yes | 43313 | 26 | 78 | 2020-04-14 | 0 |
149999 | 149122 | Male | 33 | 1 | 24 | 1 | 1-2 Year | No | 38221 | 157 | 293 | 2020-04-14 | 0 |
150000 rows × 13 columns
Problem statement-1¶
- Consider a date of 15th August 2024.Keeping this date as reference
find the number of days for converting a lead. You can use the vintage and date of lead column for solving this problem.
In [9]:
from datetime import datetime
Insurance['Date of Lead'] = pd.to_datetime(Insurance['Date of Lead'])
Insurance['Vintage'] = pd.to_datetime(Insurance['Vintage'])
In [11]:
Insurance['Date of Lead'].reset_index()
Out[11]:
index | Date of Lead | |
---|---|---|
0 | 0 | 2017-01-01 |
1 | 1 | 2017-01-01 |
2 | 2 | 2017-01-01 |
3 | 3 | 2017-01-01 |
4 | 4 | 2017-01-01 |
... | ... | ... |
149995 | 149995 | 2020-04-14 |
149996 | 149996 | 2020-04-14 |
149997 | 149997 | 2020-04-14 |
149998 | 149998 | 2020-04-14 |
149999 | 149999 | 2020-04-14 |
150000 rows × 2 columns
In [13]:
Insurance['Vintage'].reset_index()
Out[13]:
index | Vintage | |
---|---|---|
0 | 0 | 1970-01-01 00:00:00.000000022 |
1 | 1 | 1970-01-01 00:00:00.000000030 |
2 | 2 | 1970-01-01 00:00:00.000000161 |
3 | 3 | 1970-01-01 00:00:00.000000176 |
4 | 4 | 1970-01-01 00:00:00.000000076 |
... | ... | ... |
149995 | 149995 | 1970-01-01 00:00:00.000000204 |
149996 | 149996 | 1970-01-01 00:00:00.000000252 |
149997 | 149997 | 1970-01-01 00:00:00.000000118 |
149998 | 149998 | 1970-01-01 00:00:00.000000078 |
149999 | 149999 | 1970-01-01 00:00:00.000000293 |
150000 rows × 2 columns
In [17]:
Insurance['days_to_convert'] = (Insurance['Vintage'] - Insurance['Date of Lead']).dt.days
In [21]:
Insurance['days_to_convert'].reset_index()
Out[21]:
index | days_to_convert | |
---|---|---|
0 | 0 | -17167 |
1 | 1 | -17167 |
2 | 2 | -17167 |
3 | 3 | -17167 |
4 | 4 | -17167 |
... | ... | ... |
149995 | 149995 | -18366 |
149996 | 149996 | -18366 |
149997 | 149997 | -18366 |
149998 | 149998 | -18366 |
149999 | 149999 | -18366 |
150000 rows × 2 columns
In [23]:
reference_date = pd.to_datetime('2024-08-15')
Insurance['days_since_conversion'] = (reference_date - Insurance['Vintage']).dt.days
In [25]:
Insurance['days_since_conversion']
Out[25]:
0 19949 1 19949 2 19949 3 19949 4 19949 ... 149995 19949 149996 19949 149997 19949 149998 19949 149999 19949 Name: days_since_conversion, Length: 150000, dtype: int64
In [27]:
print(Insurance[['Date of Lead', 'Vintage', 'days_to_convert', 'days_since_conversion']])
Date of Lead Vintage days_to_convert \ 0 2017-01-01 1970-01-01 00:00:00.000000022 -17167 1 2017-01-01 1970-01-01 00:00:00.000000030 -17167 2 2017-01-01 1970-01-01 00:00:00.000000161 -17167 3 2017-01-01 1970-01-01 00:00:00.000000176 -17167 4 2017-01-01 1970-01-01 00:00:00.000000076 -17167 ... ... ... ... 149995 2020-04-14 1970-01-01 00:00:00.000000204 -18366 149996 2020-04-14 1970-01-01 00:00:00.000000252 -18366 149997 2020-04-14 1970-01-01 00:00:00.000000118 -18366 149998 2020-04-14 1970-01-01 00:00:00.000000078 -18366 149999 2020-04-14 1970-01-01 00:00:00.000000293 -18366 days_since_conversion 0 19949 1 19949 2 19949 3 19949 4 19949 ... ... 149995 19949 149996 19949 149997 19949 149998 19949 149999 19949 [150000 rows x 4 columns]
In [31]:
Insurance.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 150000 entries, 0 to 149999 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 150000 non-null int64 1 Gender 150000 non-null object 2 Age 150000 non-null int64 3 Driving_License 150000 non-null int64 4 Region_Code 150000 non-null int64 5 Previously_Insured 150000 non-null int64 6 Vehicle_Age 150000 non-null object 7 Vehicle_Damage 150000 non-null object 8 Annual_Premium 150000 non-null int64 9 Policy_Sales_Channel 150000 non-null int64 10 Vintage 150000 non-null datetime64[ns] 11 Date of Lead 150000 non-null datetime64[ns] 12 Response 150000 non-null int64 13 days_to_convert 150000 non-null int64 14 days_since_conversion 150000 non-null int64 dtypes: datetime64[ns](2), int64(10), object(3) memory usage: 17.2+ MB
In [ ]: