python matplotlib: xticks, tight_layout — pydata: Huiming's learning notes

Suppose in the data there is a column called yq, which is the year and quarter. we want to make this yq as the x tick in the plot. if you simply plt.plot(df.yq, df.actual), it will not work because of the scale of yq.

The solution is to set the size of the data as x in the plot, and then reset the tick by the yq value.

# carete the sample data
df = pd.DataFrame(columns = ['yq', 'actual', 'pred'])
df['yq'] = [200702, 200703, 200704, 200801, 200802, 200803, 200804, 200901, 200902, 200903]
np.random.seed(999)
df['actual'] = np.random.random(10)
df['pred'] = df['actual'] + np.random.normal(0, 0.1, 10)

plt.subplot()
# plot x from 1 to 10, like a time series
plt.plot(range(1, df.shape[0] + 1), df.actual, label = 'actual')
# also x from 1 to 10
plt.plot(range(1, df.shape[0] + 1), df.pred, label = 'pred')
# set yq as the x tick
plt.xticks(range(1, df.shape[0] + 1), [str(int(x)) for x in df.yq.tolist()], rotation = 'vertical')
plt.legend()

png

This is for the plot of the IFRS9 predicted data and actual data.

#1. make up code to generate the sample data
rating = ['A', 'B', 'C']
yq = [200701, 200702, 200703, 200704, 200801, 200802, 200803, 200804,
      200901, 200902, 200903, 200904, 201001, 201002, 201003, 201004]
# multiple index from two list combination
tindex =  pd.MultiIndex.from_tuples(list(itertools.product(rating, yq))*20, names = ['rating', 'yq'])
testdata = pd.DataFrame(columns = ['actual', 'pred'], index = tindex)
testdata['actual'] = np.random.random(len(tindex))
testdata['pred'] = testdata['actual'] + np.random.normal(0, 0.1, len(tindex))
# set index to columns and sort it
testdata.reset_index(inplace = True)
testdata = testdata.sort(['rating', 'yq'])
# add the cumulative qtr for the data, although I know it is 20 here. but in actual, it varies for different pdrr & yq
grouped = testdata.groupby(['rating', 'yq'])
qtrs = []
for name, group in grouped:
    qtrs = qtrs + range(1, group.shape[0] + 1)

testdata['qtrs'] = qtrs

# define the plot function
def vertical_compare(indata = testdata, qtr = 4):
    '''
    qtr=4 for 1 yr, 8 for 2 yrs, 12 for 3 yrs
    '''

    allpdrr = indata.rating.unique()
    allpdrr_len = len(allpdrr)

    for i in range(allpdrr_len):

        pdrr = allpdrr[i]

        plt.subplot(math.ceil(allpdrr_len / 2.0), 2, i + 1)

        df = indata.ix[(indata.rating == pdrr) & (indata.qtrs == qtr), :]

        # in the title with 'y=0.85' to adjust the position of the title; '>1' will be outside the plot
        plt.title(str(qtr/4) + " yr pd plot for rating --- " + pdrr, y = 0.85)
        plt.plot(range(1, df.shape[0] + 1), df.actual, label = 'actual')
        plt.plot(range(1, df.shape[0] + 1), df.pred, label = 'pred')
        # to make the tick on x plot as yq, which is one of the data column
        plt.xticks(range(1, df.shape[0] + 1), [str(int(x)) for x in df.yq.tolist()], rotation = 90)
        plt.legend()
        # with 'tight_layout()' the graph looks better
        plt.tight_layout()

    plt.show()

png

As you can see, it is a little crowd in the x ticks. Can we hide some ticks so that it is more clearly shown? The awnser is yes. We can adjust the xticks as below. if it is plot in fig, ax = plt.subplot(), then you can use set_visible(False) to adjust as shown in the reference.

# define the plot function
def vertical_compare(indata = testdata, qtr = 4):
    '''
    qtr=4 for 1 yr, 8 for 2 yrs, 12 for 3 yrs
    '''

    allpdrr = indata.rating.unique()
    allpdrr_len = len(allpdrr)

    for i in range(allpdrr_len):

        pdrr = allpdrr[i]

        plt.subplot(math.ceil(allpdrr_len / 2.0), 2, i + 1)

        df = indata.ix[(indata.rating == pdrr) & (indata.qtrs == qtr), :]

        # in the title with 'y=0.85' to adjust the position of the title; '>1' will be outside the plot
        plt.title(str(qtr/4) + " yr pd plot for rating --- " + pdrr, y = 0.85)
        plt.plot(range(1, df.shape[0] + 1), df.actual, label = 'actual')
        plt.plot(range(1, df.shape[0] + 1), df.pred, label = 'pred')
        # to make the tick on x plot as yq, which is one of the data column
        plt.xticks(range(1, df.shape[0] + 1)[::2], [str(int(x)) for x in df.yq.tolist()][::2], rotation = 90)
        plt.legend()
        # with 'tight_layout()' the graph looks better
        plt.tight_layout()

    plt.show()

png

Reference:

ticks_and_spines example code: ticklabels_demo_rotation.py

pylab_examples example code: subplots_demo.py

Changing the “tick frequency” on x or y axis in matplotlib?