Jump to content

Visual Basic for Applications/Character Frequency Charts in Excel

From Wikibooks, open books for an open world

Summary

[edit | edit source]

VBA Code Listings

[edit | edit source]

At times it is useful to make an Excel chart from VBA. The code below makes a frequency bar chart based on a given string. It is shown in testing mode with a random string input. The user should replace that string with his own. There are various charting options.

Option Explicit

Sub Test()
    'run this to test the charting of this module
    
    Dim str As String, n As Long
    
    'make random mixed characters (for testing only)
    str = MakeLongMixedString(10000)
    
    'make a sorted frequency chart of the characters in str
    MakeCharaFreqChart str, 1, "n"
    
    MsgBox "Chart done"
    
End Sub

Function MakeLongMixedString(nNumChr As Long) As String
    'Makes a long capital letters string using rnd VBA function
    
    Dim n As Long, sChr As String, nAsc As Long
    Dim nSamp As Long, sAccum As String, c As Long
    
    '========================================================================
    ' Notes and Conclusions:
    ' The VBA function rnd is UNSUITED to generation of long random strings.
    ' Both length and number of repeats increases rapidly near 256 charas.
    ' Reasonable results can be obtained by keeping below 128 characters.
    ' For longer strings, consider hash-based methods of generation.
    '========================================================================
        
    Do Until c >= nNumChr
        DoEvents
        Randomize
        'A to Z corresponds to asci 65 to 90
        nSamp = Int((90 - 48 + 1) * Rnd + 48)
        If (nSamp >= 48 And nSamp <= 57) Or (nSamp >= 65 And nSamp <= 90) Then
           sChr = Chr(nSamp)
           sAccum = sAccum & sChr
           c = c + 1
        End If
    Loop
    
    'MsgBox sAccum
    
    MakeLongMixedString = sAccum

End Function

Sub MakeCharaFreqChart(str As String, bSort As Boolean, sYUnits As String)
    'For use in Excel
    'makes a character frequency chart using the parameter string str
    'bSort=True to sort the chart from highest (left) otherwise unsorted
    'sYUnits string sets measurement method, number charas, percentage total, or normalised to max value
    
    Dim vC As Variant, nRow As Long, vRet As Variant
    
    GetCharaCounts str, vC
    
    Select Case LCase(sYUnits)
    Case "n", "numbers", "number", "count", "#"
        nRow = 1
    Case "p", "percent", "percentage", "%"
        nRow = 2
    Case "r", "relative", "normalized", "normalised"
        nRow = 3
    End Select
    
    If bSort Then
        SortColumns vC, 1, 0, vRet
        ChartColumns vRet, 1, 0, nRow, 1, "Selective Distribution of a " & Len(str) & " Character String", _
        "Character Set of Interest", "Number of Each"
    Else
        ChartColumns vC, 1, 0, nRow, 1, "Selective Distribution of a " & Len(str) & " Character String", _
        "Character Set of Interest", "Number of Each"
    End If
    
End Sub

Sub GetCharaCounts(sIn As String, vR As Variant)
    'loads an array with character counts
    
    Dim vRef As Variant, LBC As Long, UBC As Long, LBR As Long, UBR As Long
    Dim vW() As Variant, X() As Variant, Y() As Variant, vRet As Variant
    Dim sUC As String, nC As Long, n As Long, sS As String, ValMax As Variant
    
    'Notes for vR and vW loads
    'Row 0: the ref chara set from vRef
    'Row 1: the number of hits found in str for each chara in ref set
    'Row 2: the percentage that hits rep of total charas in str
    'Row 3: the normalized values for each chara with max as unity
    
    If sIn = "" Then
        MsgBox "Empty input string - closing"
        Exit Sub
    End If
    
    'load the intended x-axis display set here...add to it or subtract as required
    vRef = Array("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", _
    "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", _
    "0", "1", "2", "3", "4", "5", "6", "7", "8", "9")       ' ,"(", ")", ":", ".", ",")
    
    LBC = LBound(vRef): UBC = UBound(vRef)
    ReDim vW(0 To 3, LBC To UBC)
    LBR = LBound(vW, 1): UBR = UBound(vW, 1)
    ReDim X(LBC To UBC)
    ReDim Y(LBC To UBC)
    
    sUC = UCase(sIn)
    nC = Len(sIn)
    For n = LBC To UBC
        vW(0, n) = vRef(n) 'all charas to first row
        sS = vW(0, n)
        'count hits in string for each chara in ref set
        vW(1, n) = UBound(Split(sUC, sS)) - LBound(Split(sUC, sS)) 'count hits
        'calculate hits as percentages of total chara count
        vW(2, n) = Round(((vW(1, n)) * 100 / nC), 1)
    Next n
    
    'find max value in array count
    SortColumns vW, 1, False, vRet
    ValMax = vRet(1, 0)
    
    'normalize to unity as max value
    For n = LBC To UBC
        vW(3, n) = Round(vW(1, n) / ValMax, 1)
    Next n
    
    vR = vW()
    
End Sub

Sub ChartColumns(ByVal VA As Variant, bColChart As Boolean, RowX As Long, RowY As Long, _
    Optional bXValueLabels As Boolean = 0, Optional sTitle As String = "", _
    Optional sXAxis As String, Optional sYAxis As String)
    'this is the actual chart procedure. It charts the array data in VA 
    'the array must contain two data rows for the chart; with x and y data
    'the chart can be column or scatter chart; RowX and RowY parameters identify the data rows for each axis.
    'optional parameters are included for value labels, chart title, x axis label, and y axis label
    
    Dim LBC As Long, UBC As Long, LBR As Long, UBR As Long, n As Long
    Dim X As Variant, Y As Variant, sX As String, sY As String, sT As String, oC As Chart
    
    LBR = LBound(VA, 1): UBR = UBound(VA, 1)
    LBC = LBound(VA, 2): UBC = UBound(VA, 2)
    ReDim X(LBC To UBC)
    ReDim Y(LBC To UBC)
    
    'labels for specific charts
    If sTitle = "" Then sT = "Title Goes Here" Else sT = sTitle
    If sXAxis = "" Then sX = "X Axis Label Goes Here" Else sX = sXAxis
    If sYAxis = "" Then sY = "Y Axis Label Goes Here" Else sY = sYAxis
    
    If RowX < LBR Or RowX > UBR Or RowY < LBC Or RowY > UBC Then
        MsgBox "Parameter data rows out of range in ChartColumns - closing"
        Exit Sub
    End If
    
    'transfer data to chart arrays
    For n = LBC To UBC
        X(n) = VA(RowX, n) 'x axis data
        Y(n) = VA(RowY, n) 'y axis data
    Next n
    
    'make chart
    Charts.Add
    
    'choose a column chart or a scatter chart
    If bColChart Then
        ActiveChart.ChartType = xlColumnClustered 'column chart
    Else
        ActiveChart.ChartType = xlXYScatterLinesNoMarkers 'line scatter chart
        'ActiveChart.ChartType = xlXYScatter 'point scatter chart
End If
    
    'assign the data and labels to a series
    With ActiveChart.SeriesCollection
        If .Count = 0 Then .NewSeries
            If bXValueLabels And bColChart Then
                .Item(1).ApplyDataLabels Type:=xlDataLabelsShowValue
                'item(1).DataLabels.Orientation = xlUpward
                .Item(1).DataLabels.Orientation = 60
            End If
            If Val(Application.Version) >= 12 Then
                .Item(1).Values = Y
                .Item(1).XValues = X
            Else
                .Item(1).Select
                Names.Add "_", X
                ExecuteExcel4Macro "series.x(!_)"
                Names.Add "_", Y
                ExecuteExcel4Macro "series.y(,!_)"
                Names("_").Delete
            End If
        End With
        
        'apply title string, x and y axis strings, and delete legend
        With ActiveChart
            .HasTitle = True
            .ChartTitle.Text = sT
            .SetElement (msoElementPrimaryCategoryAxisTitleAdjacentToAxis) 'X
            .Axes(xlCategory).AxisTitle.Text = sX
            .SetElement (msoElementPrimaryValueAxisTitleRotated) 'Y
            .Axes(xlValue).AxisTitle.Text = sY
            .Legend.Delete
        End With
        
        ActiveChart.ChartArea.Select
    
End Sub

Sub SortColumns(ByVal VA As Variant, nRow As Long, bAscend As Boolean, vRet As Variant)
    'bubblesorts the input array's columns using values in the specified row, ascending or descending, ret in vRet
    
    Dim i As Long, j As Long, bCond As Boolean, Y As Long, t As Variant
    Dim LBC As Long, UBC As Long, LBR As Long, UBR As Long
    
    LBR = LBound(VA, 1): UBR = UBound(VA, 1)
    LBC = LBound(VA, 2): UBC = UBound(VA, 2)
    
    For i = LBC To UBC - 1
        For j = LBC To UBC - 1
            If bAscend Then
                bCond = VA(nRow, j) > VA(nRow, j + 1)
            Else
                bCond = VA(nRow, j) < VA(nRow, j + 1)
            End If
            If bCond Then
                For Y = LBR To UBR
                    t = VA(Y, j)
                    VA(Y, j) = VA(Y, j + 1)
                    VA(Y, j + 1) = t
                Next Y
            End If
        Next j
    Next i
    
    vRet = VA
    
End Sub

Sub DeleteAllWorkbookCharts()
    'run this manually to delete all charts
    'not at this stage called in any procedure
        
    Dim oC
    
    Application.DisplayAlerts = False
    For Each oC In ThisWorkbook.Charts
        oC.Delete
    Next oC
    Application.DisplayAlerts = True
    
End Sub