VERSION 5.00
Begin VB.Form Form1 
   Caption         =   "Convert stresses and junctures to single symbols"
   ClientHeight    =   6405
   ClientLeft      =   60
   ClientTop       =   345
   ClientWidth     =   7455
   LinkTopic       =   "Form1"
   ScaleHeight     =   6405
   ScaleWidth      =   7455
   StartUpPosition =   3  'Windows Default
   Begin VB.CheckBox chkContinuousScansion 
      Caption         =   "Record juncture of end of line with special symbol at beginning of next"
      Height          =   615
      Left            =   1080
      TabIndex        =   7
      Top             =   4560
      Value           =   1  'Checked
      Width           =   5415
   End
   Begin VB.CheckBox chkTetrameters 
      Caption         =   "Don't bother to report too-short lines to user"
      Height          =   615
      Left            =   1080
      TabIndex        =   6
      Top             =   4080
      Value           =   1  'Checked
      Width           =   5175
   End
   Begin VB.CheckBox chkExtrametricals 
      Caption         =   "Include lines with extrametrical syllables"
      Height          =   615
      Left            =   1080
      TabIndex        =   5
      Top             =   3600
      Value           =   1  'Checked
      Width           =   5175
   End
   Begin VB.CheckBox chkReduceJuncture 
      Caption         =   "Reduce juncture to 3-level system"
      Height          =   495
      Left            =   1080
      TabIndex        =   4
      Top             =   3240
      Width           =   5415
   End
   Begin VB.CheckBox chkDoubleLineNumber 
      Caption         =   "This file has double line numbering (e.g., book + line)"
      Height          =   375
      Left            =   1080
      TabIndex        =   3
      Top             =   2880
      Width           =   5415
   End
   Begin VB.TextBox Text1 
      Alignment       =   2  'Center
      Height          =   375
      Left            =   1080
      TabIndex        =   2
      Text            =   "ShakespeareBruce"
      Top             =   2280
      Width           =   5415
   End
   Begin VB.CommandButton Command1 
      Caption         =   "Start"
      Height          =   735
      Left            =   2280
      TabIndex        =   0
      Top             =   480
      Width           =   3015
   End
   Begin VB.Label Label1 
      Caption         =   "File name (in InputFiles folder).  Do not include the suffix .txt, which should be part of the file name."
      Height          =   495
      Left            =   1080
      TabIndex        =   1
      Top             =   1800
      Width           =   5415
   End
End
Attribute VB_Name = "Form1"
Attribute VB_GlobalNameSpace = False
Attribute VB_Creatable = False
Attribute VB_PredeclaredId = True
Attribute VB_Exposed = False
Option Explicit

'Convert a file from the Hayes/Shisko scansion project and convert it to sequence
'of single symbols that the Hayes/Wilson maxent software can deal with.

    'Number of copies needed to be placed in the training data to get 3000 total.
        Dim mCopiesNeeded As Long

Private Sub Command1_Click()

    Static ButtonStatus As Boolean
    Dim MyLine As String
    Dim UrLine As String
    Dim LineText As String
    Dim CodedString As String
    Dim Stress(12) As Long, Juncture(12) As Long
    Dim SyllableIndex As Long
    Dim LineLength As Long
    Dim DiscardFlag As Boolean
    
    'Variables needed to express the juncture of the last syllable of the preceding line.
        Dim PoemInitial As Boolean
        Dim PrecedingJuncture As Long
    
    'Reuse the button.
        If ButtonStatus = True Then End
        Let ButtonStatus = True
        
    'Warn the user if they have picked an improbable junction of button.
        If chkExtrametricals.Value <> vbChecked And chkContinuousScansion.Value = vbChecked Then
            Select Case MsgBox("Caution:  you are skipping lines with extrametricals, but asking for continuous juncture evaluation.  This is likely to produce irrational results.  Click Yes if you really want to continue.", vbYesNo)
                Case vbYes
                    'do nothing
                Case vbNo
                    Exit Sub
            End Select
        End If
    
    'The software often likes to have at least 3000 lines to work with, which may be obtained
    '   by duplication.  Find out how many times to duplicate.
        Call FindCopiesNeeded
    
    'Open files.
        Open App.Path + "/OutputFiles/TrainingData" + Text1 + ".txt" For Output As #2
        Open App.Path + "/OutputFiles/TestingData" + Text1 + ".txt" For Output As #3
        Open App.Path + "/OutputFiles/DiscardedLinesFor" + Text1 + ".txt" For Output As #4
    
        'Begin with the salad, already preconverted by this program.
        '   We are currently (12/13/08) a bit weary of salads but may return to them later.
        '    Open App.Path + "/ConvertedSalad20000.txt" For Input As #1
        '    Do While Not EOF(1)
        '        Line Input #1, MyLine
        '        Print #3, MyLine
        '    Loop
        '    Close #1
    
    'Open the scansion file to convert its contents.
        Open App.Path + "/InputFiles/" + Text1 + ".txt" For Input As #1
    
    'Proceed through the whole file, digesting one line of verse at a time.
    
    Do While Not EOF(1)
        'Initialize flags (Stress() is used as a flag, plus the unusable-line flag):
            For SyllableIndex = 1 To 12
                Let Stress(SyllableIndex) = 0
            Next SyllableIndex
            Let DiscardFlag = False
            
        'Syllables (actually, just the line text)
SyllableRestart:
        Line Input #1, UrLine
        Let UrLine = s.TabTrim(UrLine)
        Let MyLine = UrLine
        If Trim(MyLine) = "" Then
            MsgBox "Caution:  blank line"
        End If
        'Ignore first column
            Let MyLine = s.Residue(MyLine)
        'If there is double line numbering, ignore the second column as well.
            If chkDoubleLineNumber.Value = vbChecked Then
                Let MyLine = s.Residue(MyLine)
            End If
        'Check that this is a Syllables line
            If LCase(s.Chomp(MyLine)) <> "syllables" Then
                MsgBox "Caution:  the line " + MyLine + " should begin syllables"
            End If
        'Ignore this column, having been checked.
            Let MyLine = s.Residue(MyLine)
        'Grab the line text
            Let LineText = s.Chomp(MyLine)
        
       
        
        
        'Read the stress line
StressRestart:
        Line Input #1, UrLine
        
        Let UrLine = s.TabTrim(UrLine)
        Let MyLine = UrLine
            If Trim(MyLine) = "" Then GoTo StressRestart
        'Ignore first column
            Let MyLine = s.Residue(MyLine)
        'If there is double line numbering, ignore the second column as well.
            If chkDoubleLineNumber.Value = vbChecked Then
                Let MyLine = s.Residue(MyLine)
            End If
        'Check that this is a Syllables line
            If LCase(s.Chomp(MyLine)) <> "stress" Then
                MsgBox "Caution:  the line " + UrLine + " should begin stress"
            End If
        'Make sure the line text matches
            Let MyLine = s.Residue(MyLine)
            If s.Chomp(MyLine) <> LineText Then
                MsgBox "Caution:  in the line " + UrLine + ", the line text varies across entries."
            End If
        'Ignore the column, having been checked.
            Let MyLine = s.Residue(MyLine)
            'Read the stress
                For SyllableIndex = 1 To 12
                    'Install this value.
                        Let Stress(SyllableIndex) = Val(s.Chomp(MyLine))
                    'Vet the value
                        If SyllableIndex <= 10 Then
                            If Stress(SyllableIndex) > 4 Or Stress(SyllableIndex) < 1 Then
                                'Warn if user requested you to.
                                    If chkTetrameters.Value <> vbChecked Then
                                        MsgBox "Illegal stress value in " + UrLine
                                    End If
                            End If
                        End If
                    'Get ready to input the next value.
                        Let MyLine = s.Residue(MyLine)
                Next SyllableIndex
        
        'Read the Juncture line
JunctureRestart:
        Line Input #1, UrLine
        Let UrLine = s.TabTrim(UrLine)
        Let MyLine = UrLine
            If Trim(MyLine) = "" Then GoTo JunctureRestart
        'Ignore first column
            Let MyLine = s.Residue(MyLine)
        'If there is double line numbering, ignore the second column as well.
            If chkDoubleLineNumber.Value = vbChecked Then
                Let MyLine = s.Residue(MyLine)
            End If
        'Check that this is a juncture line
            If LCase(s.Chomp(MyLine)) <> "juncture" Then
                MsgBox "Caution:  the line " + UrLine + " should begin juncture"
            End If
        'Make sure the line text matches
            Let MyLine = s.Residue(MyLine)
            If s.Chomp(MyLine) <> LineText Then
                MsgBox "Caution:  in the line " + UrLine + ", the line text varies across entries."
            End If
        'Ignore the column, having been checked.
            Let MyLine = s.Residue(MyLine)
            'Read the Juncture
                For SyllableIndex = 1 To 12
                    'Install this value.
                        Let Juncture(SyllableIndex) = Val(s.Chomp(MyLine))
                    'Vet the value
                        If SyllableIndex <= 10 Then
                            If Juncture(SyllableIndex) > 5 Or Juncture(SyllableIndex) < 1 Then
                                'Warn if user requested you to.
                                    If chkTetrameters.Value <> vbChecked Then
                                        MsgBox "Illegal juncture value in " + UrLine
                                    End If
                            End If
                        End If
                    'Get ready to input the next value.
                        Let MyLine = s.Residue(MyLine)
                Next SyllableIndex
        
        'Read the comment, and ignore it, to reach the next line.
CommentRestart:
        Line Input #1, UrLine
        Let UrLine = s.TabTrim(UrLine)
        Let MyLine = UrLine
            If Trim(MyLine) = "" Then GoTo CommentRestart
        'Ignore first column
            Let MyLine = s.Residue(MyLine)
        'If there is double line numbering, ignore the second column as well.
            If chkDoubleLineNumber.Value = vbChecked Then
                Let MyLine = s.Residue(MyLine)
            End If
        'Check that this is a comment line
            If LCase(s.Chomp(MyLine)) <> "comment" Then
                MsgBox "Caution:  the line " + UrLine + " should begin comment"
            End If
        'Make sure the line text matches
            Let MyLine = s.Residue(MyLine)
            If s.Chomp(MyLine) <> LineText Then
                MsgBox "Caution:  in the line " + UrLine + ", the line text varies across entries."
            End If
        'Check to see if this line has been marked as poem-initial.
            If Replace(LCase(UrLine), "poem initial", "") <> LCase(UrLine) Then
                Let PoemInitial = True
            ElseIf Replace(LCase(UrLine), "poem-initial", "") <> LCase(UrLine) Then
                Let PoemInitial = True
            ElseIf Replace(LCase(UrLine), "poeminitial", "") <> LCase(UrLine) Then
                Let PoemInitial = True
            Else
                Let PoemInitial = False
            End If
            'If so, then we must specify the preceding juncture as the maximum value of 5, since we
            '   are in fact Utterance-initial
                If PoemInitial = True Then
                    Let PrecedingJuncture = 5
                End If
                        
        'Discard inappropriate lines
            'Except in Pope, hexameters are probably outright errors.
                If Stress(12) > 0 Then
                    Let DiscardFlag = True
                    MsgBox "Hexameter:  " + LineText
                End If
            'with extrametricals:
                If Stress(11) > 0 Then
                    If chkExtrametricals.Value <> vbChecked Then
                        Let DiscardFlag = True
                    End If
                    'MsgBox "Extrametrical:  " + LineText
                End If
            'with fewer than ten syllables (like the octosyllabic sonnet of Shakespeare)
                If Stress(10) = 0 Then
                    Let DiscardFlag = True
                    'Warn the user, unless they asked you not to warn.
                        If chkTetrameters.Value <> vbChecked Then
                            MsgBox "Short line:  " + LineText
                        End If
                End If
        
        'Form the code and print it in both training and test files.
            Let CodedString = ""
            
            'If the user so requested, put in an initial symbol that includes the juncture of the
            '   preceding line.
                If chkContinuousScansion.Value = vbChecked Then
                    Let CodedString = " LB"
                    Let CodedString = CodedString + ProcessedJuncture(PrecedingJuncture)
                End If
            
            
            
            'Establish upper bound
                For SyllableIndex = 1 To 20
                    If Stress(SyllableIndex) = 0 Then
                        Let LineLength = SyllableIndex - 1
                        Exit For
                    End If
                Next SyllableIndex
            For SyllableIndex = 1 To LineLength
                'Put in the meter
                    If SyllableIndex / 2 = Int(SyllableIndex / 2) Then
                        Let CodedString = CodedString + " Ms"
                    Else
                        Let CodedString = CodedString + " Mw"
                    End If
                'Put in the stress.
                    If Stress(SyllableIndex) > 1 Then
                        Let CodedString = CodedString + "S"
                    Else
                        Let CodedString = CodedString + "U"
                    End If
                'Put in the ups, downs, levels of stress.
                    If SyllableIndex = LineLength Then
                        Let CodedString = CodedString + "l"
                    Else
                        If Stress(SyllableIndex) > Stress(SyllableIndex + 1) Then
                            Let CodedString = CodedString + "d"
                        ElseIf Stress(SyllableIndex) = Stress(SyllableIndex + 1) Then
                            Let CodedString = CodedString + "l"
                        Else
                            Let CodedString = CodedString + "u"
                        End If
                    End If
                'Put in the juncture
                    Let CodedString = CodedString + "J" + ProcessedJuncture(Juncture(SyllableIndex))
            Next SyllableIndex
                    
            'Trim back the initial space
                Let CodedString = Trim(Mid(CodedString, 2))
                
            'Print it to the appropriate output file.
                If DiscardFlag = True Then
                    Print #4, CodedString; Chr(9); LineText
                Else
                    'Print to the training data file.
                        Print #2, CodedString
                    'Obsolete bit from previous version of learner.
                        '; Chr(9); Trim(Str(mCopiesNeeded))
                    'Print the testing data file.
                        Print #3, CodedString; Chr(9); LineText; Chr(9); "real"
                End If
                
            'Remember the preceding juncture if you need to include it in the next line.
                Let PrecedingJuncture = Juncture(LineLength)
    
    Loop
    
    Close
    
    Let Command1.Caption = "Done!  Click again to exit."
    
End Sub


Sub FindCopiesNeeded()

    'The software often likes to have at least 3000 lines to work with, which may be obtained
    '   by duplication.  Find out how many times to duplicate.
    
    Dim MyLine As String
    Dim NumberOfLines As Long
    Dim LinesAccumulated As Long
    
    'Pre-read the input file, to count the lines in it.
        Open App.Path + "/InputFiles/" + Text1 + ".txt" For Input As #1
        Do While Not EOF(1)
            Line Input #1, MyLine
            Let NumberOfLines = NumberOfLines + 1
        Loop
        Close #1
    
    'Divide by four, since there are four fields per line.
        Let NumberOfLines = NumberOfLines / 4
    
    'Count off by batched of lines, cutting off when you have enough.
        Do
            Let LinesAccumulated = LinesAccumulated + NumberOfLines
            Let mCopiesNeeded = mCopiesNeeded + 1
            If LinesAccumulated > 3000 Then Exit Sub
        Loop
    
End Sub

Function ProcessedJuncture(MyJuncture As Long) As String

    If chkReduceJuncture.Value = vbChecked Then
        Select Case MyJuncture
            Case 1
                Let ProcessedJuncture = "1"
            Case 2, 3, 4
                Let ProcessedJuncture = "3"
            Case 5
                Let ProcessedJuncture = "5"
        End Select
    Else
        Let ProcessedJuncture = Trim(Str(MyJuncture))
    End If
        
End Function
