Histograms in Excel with VBA macros
On this page I show how to make a histogram fast and easy in Excel with VBA macro code.
I also show how to make a bell shaped curve with normal distribution and the same standard deviation and mean value as the histogram.
A histogram is a graphical representation of data distribution. It could be the population's age distribution or quality data, and from the shape you get a pretty good idea if it is a normal distribution or not.
To be sure about the normal distribution-thing you need to test statistically, but I'll leave that to you - this is about VBA!
You can make histograms manually, if you have installed Analysis Toolpak, but it is tedious work (see a guide here) - it is much, much faster to use macros.
In the first example I show how to make a histogram with a bell shaped curve like in Microsoft's example (like the image at the top), and you don't need Analysis Toolpak.
The second example shows how to make a histogram with a user defined number of columns. The macro calculates interval size, counts frequencies and makes the histogram chart.
You can copy the code and paste it into a VBA module, but it is easier to download the zipped workbook with the code. It also contains a simple userform, which is only referred to on this page.
The VBA code on this page assumes that the source data for the histogram is in another workbook, and the histogram will be put on a new sheet in that workbook.
Histogram with bell shaped curve
In the first histogram the chart's number of columns (intervals) is fixed to 8 and the interval size is the data's standard deviation.
To make the bell shaped curve we use 2000 "random" numbers with the same standard deviation and mean value as the data for the histogram.
The trick in doing this (getting the same standard deviation etc.) is to use the worksheet function "NormInv" together with VBA's random numbers generator, "Rnd". You'll se how later, when we get to the procedure "RandomNumbers".
At module level you need to declare 2 public variables: Public bAbort as Boolean and Public bBell as Boolean
Sub BellShape()
bBell = True
OpenForm
If bAbort Then
bAbort = False
Exit Sub
End If
End Sub
Sub OpenForm()
On Error GoTo ErrorHandle
If Workbooks.Count = 1 Then
With Application.FileDialog(msoFileDialogOpen)
.Title = "Open workbook with values for histogram"
.AllowMultiSelect = False
.Filters.Add "Workbooks", "*.xl*", 1
.Show
If .SelectedItems.Count > 0 Then
Workbooks.Open (.SelectedItems(1))
Else
bAbort = True
Exit Sub
End If
End With
ElseIf Workbooks.Count = 2 Then
If Workbooks.Item(1).Name = ThisWorkbook.Name Then
Workbooks(2).Activate
Else
Workbooks(1).Activate
End If
ElseIf Workbooks.Count > 2 Then
Workbooks(Workbooks.Count).Activate
frmPickSheet.Show vbModeless
Exit Sub
ErrorHandle:
MsgBox Err.Description & " Procedure OpenForm"
End Sub
Private Sub CommandButton1_Click()
Unload Me
If bBell Then
Module1.Histogram2
Else
Module1.Histogram
End If
End Sub
Private Sub CommandButton2_Click()
Unload Me
End Sub
Sub Histogram2()
Dim bOK As Boolean
Dim dAvg As Double
Dim dStdev As Double
Dim dStep As Double
Dim dMax As Double
Dim dRangeMax As Double
Dim dMin As Double
Dim lCount As Long
Dim lLast As Long
Dim rCell As Range
Dim rInput As Range
Dim rBell As Range
Dim arData(1 To 8)
Dim arBackup()
Dim arIntervals(1 To 8, 1 To 2)
On Error Resume Next
Set rInput = Application.InputBox _
(prompt:="Select the first cell with a value in " & _
"the column with values for the histogram.", Type:=8)
If rInput Is Nothing Then Exit Sub
On Error GoTo ErrorHandle
With rInput
If .Count > 1 Then
MsgBox "Please select only 1 cell."
ThisWorkbook.Activate
GoTo BeforeExit
End If
If Len(.Value) = 0 Then
MsgBox "The cell is empty."
GoTo BeforeExit
End If
If IsNumeric(.Value) = False Then
If IsNumeric(.Offset(1, 0)) = False Then
MsgBox "It must be a number"
ThisWorkbook.Activate
GoTo BeforeExit
Else
Set rInput = rInput.Offset(1, 0)
End If
End If
If IsEmpty(.Offset(1, 0)) Then
MsgBox "You need more than 1 value for a histogram!"
GoTo BeforeExit
End If
End With
Set rInput = Range(rInput, rInput.End(xlDown))
For Each rCell In rInput
If IsNumeric(rCell.Value) = False Then
MsgBox "The value in cell " & rCell.Address & _
" is not numeric."
rCell.Select
GoTo BeforeExit
End If
Next
On Error GoTo ErrorHandle
Application.ScreenUpdating = False
arBackup = rInput.Value
dAvg = WorksheetFunction.Average(rInput)
dStdev = WorksheetFunction.StDev(rInput)
dMin = dAvg - 3 * dStdev
dRangeMax = WorksheetFunction.Max(rInput) + 1
dStep = dStdev
dMax = dMin + 6 * dStdev
arIntervals(1, 2) = dMin
For lCount = 2 To 8
arIntervals(lCount, 1) = arIntervals(lCount - 1, 2)
If lCount < 8 Then
arIntervals(lCount, 2) = arIntervals(lCount - 1, 2) + dStep
Else
arIntervals(8, 2) = dRangeMax
End If
Next
rInput.Sort Key1:=rInput.Item(1), _
Order1:=xlAscending, Header:=xlGuess, _
OrderCustom:=1, MatchCase:=False, _
Orientation:=xlTopToBottom, _
DataOption1:=xlSortNormal
lLast = 1
For Each rCell In rInput
With rCell
For lCount = lLast To 8
If lCount = 1 Then
If .Value < arIntervals(lCount, 2) Then
lLast = lCount
End If
Else
If .Value < arIntervals(lCount, 2) _
And .Value >= arIntervals(lCount, 1) Then
lLast = lCount
Exit For
End If
End If
Next
If lCount = 9 Then lCount = 8
arData(lCount) = arData(lCount) + 1
End With
Next
For lCount = 1 To 8
If Len(arData(lCount)) = 0 Then
arData(lCount) = 0
End If
Next
Sheets.Add , Worksheets.Item(Sheets.Count)
Set rCell = Range("B2")
For lCount = 0 To 7
With rCell
.Offset(lCount).Value = _
arData(lCount + 1) * 100 / rInput.Count
.Offset(lCount, 1).Value = arData(lCount + 1)
If lCount < 7 Then
.Offset(lCount, -1).Value = arIntervals(lCount + 1, 2)
Else
.Offset(lCount, -1).Value = "More"
End If
End With
Next
Range("A1").Value = "Intervals"
Range("B1").Value = "Percent"
Range("C1").Value = "Frequency"
Range("A1:B8").NumberFormat = "#0.00"
Columns("A:A").EntireColumn.AutoFit
RandomNumbers dAvg, dStdev
Set rBell = Range("M1")
Set rBell = Range(rBell, rBell.End(xlDown))
rBell.Sort Key1:=rBell.Item(1), _
Order1:=xlAscending, Header:=xlGuess, _
OrderCustom:=1, MatchCase:=False, _
Orientation:=xlTopToBottom, _
DataOption1:=xlSortNormal
For lCount = 1 To 8
arData(lCount) = 0
Next
lLast = 1
For Each rCell In rBell
With rCell
For lCount = lLast To 8
If lCount = 1 Then
If .Value < arIntervals(lCount, 2) Then
lLast = lCount
Exit For
End If
Else
If .Value < arIntervals(lCount, 2) _
And .Value >= arIntervals(lCount, 1) Then
lLast = lCount
Exit For
End If
End If
Next
If lCount = 9 Then lCount = 8
arData(lCount) = arData(lCount) + 1
End With
Next
Set rCell = Range("e2")
For lCount = 0 To 7
With rCell
.Offset(lCount, 0).Value = arData(lCount + 1)
If lCount < 7 Then
.Offset(lCount, -1).Value = arIntervals(lCount + 1, 2)
Else
.Offset(lCount, -1).Value = "More"
End If
End With
Next
MakeChart
BeforeExit:
On Error Resume Next
rInput.Value = arBackup
Set rBell = Nothing
Set rCell = Nothing
Set rInput = Nothing
Erase arData
Erase arBackup
Erase arIntervals
Application.ScreenUpdating = True
Exit Sub
ErrorHandle:
MsgBox Err.Description & " Procedure Histogram2"
Resume BeforeExit
End Sub
Sub RandomNumbers(ByVal dAvg As Double, ByVal dStdev As Double)
Dim lCount As Long
Dim dRandom As Double
Randomize
For lCount = 0 To 1999
dRandom = WorksheetFunction.NormInv(Rnd(), dAvg, dStdev)
Range("M1").Offset(lCount, 0).Value = dRandom
Next
End Sub
Sub MakeChart()
Dim sSheet As String
On Error GoTo ErrorHandle
sSheet = ActiveSheet.Name
Range("A1:A9,C1:C9,E1:E9").Select
Charts.Add
ActiveChart.ChartType = xlXYScatterSmoothNoMarkers
ActiveChart.SetSourceData Source:= _
Sheets(sSheet).Range("A1:A9,C1:C9,E1:E9")
ActiveChart.Location Where:=xlLocationAsObject, Name:=sSheet
ActiveChart.SeriesCollection(1).AxisGroup = 2
ActiveChart.SeriesCollection(1).ChartType = xlColumnClustered
ActiveChart.SeriesCollection(1).Select
With ActiveChart.ChartGroups(1)
.Overlap = 0
.GapWidth = 0
.HasSeriesLines = False
.VaryByCategories = False
End With
ActiveChart.PlotArea.Select
With Selection.Border
.ColorIndex = 16
.Weight = xlThin
.LineStyle = xlContinuous
End With
With Selection.Interior
.ColorIndex = 2
.PatternColorIndex = 1
.Pattern = xlSolid
End With
ActiveChart.SeriesCollection(2).Select
With Selection.Border
.ColorIndex = 57
.Weight = xlThick
.LineStyle = xlContinuous
End With
With Selection
.MarkerBackgroundColorIndex = xlNone
.MarkerForegroundColorIndex = xlNone
.MarkerStyle = xlNone
.Smooth = True
.MarkerSize = 3
.Shadow = False
End With
ActiveChart.Legend.Delete
Exit Sub
ErrorHandle:
MsgBox Err.Description & " Procedure MakeChart"
End Sub
That was the first histogram with a bell shaped curve and standard deviation as interval size, just like the example om Microsoft's page, but much easier and without using Analysis Toolpak.
Histogram with userdefined number of columns
In the next example we let the user decide the number of columns (intervals), and the interval size is calculated as:
(max value in data set - min value) / number of columns
We then count the frequency for each interval and make the histogram as a simple columns chart.
To identify the workbook with source data for the histogram we use the same OpenForm procedure as above, so I'll not repeat that.
Sub Simple()
bBell = False
OpenForm
If bAbort Then
bAbort = False
Exit Sub
End If
End Sub
Sub Histogram()
Dim bOK As Boolean
Dim rCell As Range
Dim rInput As Range
Dim dMax As Double
Dim dMin As Double
Dim dStep As Double
Dim lCount As Long
Dim lLast As Long
Dim sInterval As String
Dim arData()
Dim arBackup()
Dim vInput
Dim sSheet As String
On Error Resume Next
Set rInput = Application.InputBox(prompt:= _
"Select the first cell with a value in the " & _
"column with values for the histogram.", Type:=8)
If rInput Is Nothing Then Exit Sub
On Error GoTo ErrorHandle
With rInput
If .Count > 1 Then
MsgBox "Select only 1 cell."
ThisWorkbook.Activate
GoTo BeforeExit
End If
If Len(.Value) = 0 Then
MsgBox "The cell is empty."
GoTo BeforeExit
End If
If IsNumeric(.Value) = False Then
If IsNumeric(.Offset(1, 0)) = False Then
MsgBox "It must be a number"
ThisWorkbook.Activate
GoTo BeforeExit
Else
Set rInput = rInput.Offset(1, 0)
End If
End If
If IsEmpty(.Offset(1, 0)) Then
MsgBox "You need more than 1 value for a histogram."
GoTo BeforeExit
End If
End With
Set rInput = Range(rInput, rInput.End(xlDown))
For Each rCell In rInput
If IsNumeric(rCell.Value) = False Then
MsgBox "The value in cell " & rCell.Address & _
" isn't numeric."
rCell.Select
GoTo BeforeExit
End If
Next
Do Until bOK
vInput = InputBox("How many columns/intervals " & _
"do you want in the histogram?", "Number of columns")
If vInput = vbCancel Then
ThisWorkbook.Activate
GoTo BeforeExit
ElseIf Len(vInput) = 0 Or IsNumeric(vInput) = False Then
MsgBox "It must be a number"
ElseIf vInput < 3 Then
MsgBox vInput & " columns makes no sense for a histogram."
ElseIf vInput > rInput.Count Then
MsgBox "You can't have more columns than values"
Else
bOK = True
End If
Loop
On Error GoTo ErrorHandle
Application.ScreenUpdating = False
ReDim arData(1 To vInput)
arBackup = rInput.Value
dMax = WorksheetFunction.Max(rInput)
dMin = WorksheetFunction.Min(rInput)
dStep = (dMax - dMin) / vInput
rInput.Sort Key1:=rInput.Item(1), _
Order1:=xlAscending, Header:=xlGuess, _
OrderCustom:=1, MatchCase:=False, _
Orientation:=xlTopToBottom, _
DataOption1:=xlSortNormal
lLast = 2
For Each rCell In rInput
With rCell
If .Value < dMin + dStep Then
lCount = 1
Else
For lCount = lLast To vInput
If .Value < dMin + dStep * lCount Then
lLast = lCount
Exit For
End If
Next
End If
If lCount = vInput + 1 Then lCount = vInput
arData(lCount) = arData(lCount) + 1
End With
Next
Sheets.Add , Worksheets.Item(Sheets.Count)
Set rCell = Range("B2")
For lCount = 0 To vInput - 1
rCell.Offset(lCount).Value = _
Round(arData(lCount + 1) * 100 / rInput.Count, 2)
If lCount + 1 < UBound(arData) Then
sInterval = Str(Round(dMin + dStep * lCount, 2)) & _
"-" & Str(Round(dMin + dStep * (lCount + 1) - 0.01, 2))
rCell.Offset(lCount, 1).Value = arData(lCount + 1)
Else
sInterval = Str(Round(dMin + dStep * lCount, 2)) & _
"-" & Str(Round(dMax, 2))
rCell.Offset(lCount, 1).Value = arData(lCount + 1)
End If
rCell.Offset(lCount, -1).Value = sInterval
Next
Range("A1").Value = "Intervals"
Range("B1").Value = "Percent"
Range("C1").Value = "Frequency"
Range("E1").Value = "Average"
Range("E2").Value = "Standard dev."
Range("E3").Value = "Max"
Range("E4").Value = "Min"
Range("F1").Value = WorksheetFunction.Average(rInput)
Range("F2").Value = WorksheetFunction.StDev(rInput)
Range("F3").Value = dMax
Range("F4").Value = dMin
Range("F1:F4").NumberFormat = "#0.00"
Columns("A:A").EntireColumn.AutoFit
sSheet = ActiveSheet.Name
Set rCell = Range("A1:B" & vInput + 1)
Charts.Add
ActiveChart.ChartType = xlColumnClustered
ActiveChart.SetSourceData Source:=rCell, PlotBy:= _
xlColumns
ActiveChart.Location Where:=xlLocationAsObject, Name:=sSheet
With ActiveChart
.HasTitle = True
.ChartTitle.Characters.Text = "Procent"
.Axes(xlCategory, xlPrimary).HasTitle = False
.Axes(xlValue, xlPrimary).HasTitle = False
.HasLegend = False
End With
With ActiveChart.ChartGroups(1)
.Overlap = 0
.GapWidth = 0
.HasSeriesLines = False
.VaryByCategories = False
End With
BeforeExit:
On Error Resume Next
rInput.Value = arBackup
Set rCell = Nothing
Set rInput = Nothing
Erase arData
Erase arBackup
Application.ScreenUpdating = True
Exit Sub
ErrorHandle:
MsgBox Err.Description & " Procedure Histogram"
Resume BeforeExit
End Sub
That was it: Two types of histograms. Redo the chart's layout to your liking.
|