Upload 3 files

Browse files

Files changed (3) hide show

model/palocr.pth +3 -0
model/palocr.py +78 -0
model/palocr.yaml +9 -0

model/palocr.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:83c450861f064af31ee4c309c34e8712ac953527fdb533bd7ca9d70b00e7fa09
+size 15213813

model/palocr.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import torch.nn as nn
+class BidirectionalLSTM(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size):
+        super(BidirectionalLSTM, self).__init__()
+        self.rnn = nn.LSTM(input_size, hidden_size, bidirectional=True, batch_first=True)
+        self.linear = nn.Linear(hidden_size * 2, output_size)
+    def forward(self, input):
+        """
+        input : visual feature [batch_size x T x input_size]
+        output : contextual feature [batch_size x T x output_size]
+        """
+        try: # multi gpu needs this
+            self.rnn.flatten_parameters()
+        except: # quantization doesn't work with this
+            pass
+        recurrent, _ = self.rnn(input)  # batch_size x T x input_size -> batch_size x T x (2*hidden_size)
+        output = self.linear(recurrent)  # batch_size x T x output_size
+        return output
+class VGG_FeatureExtractor(nn.Module):
+    def __init__(self, input_channel, output_channel=256):
+        super(VGG_FeatureExtractor, self).__init__()
+        self.output_channel = [int(output_channel / 8), int(output_channel / 4),
+                               int(output_channel / 2), output_channel]
+        self.ConvNet = nn.Sequential(
+            nn.Conv2d(input_channel, self.output_channel[0], 3, 1, 1), nn.ReLU(True),
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(self.output_channel[0], self.output_channel[1], 3, 1, 1), nn.ReLU(True),
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(self.output_channel[1], self.output_channel[2], 3, 1, 1), nn.ReLU(True),
+            nn.Conv2d(self.output_channel[2], self.output_channel[2], 3, 1, 1), nn.ReLU(True),
+            nn.MaxPool2d((2, 1), (2, 1)),
+            nn.Conv2d(self.output_channel[2], self.output_channel[3], 3, 1, 1, bias=False),
+            nn.BatchNorm2d(self.output_channel[3]), nn.ReLU(True),
+            nn.Conv2d(self.output_channel[3], self.output_channel[3], 3, 1, 1, bias=False),
+            nn.BatchNorm2d(self.output_channel[3]), nn.ReLU(True),
+            nn.MaxPool2d((2, 1), (2, 1)),
+            nn.Conv2d(self.output_channel[3], self.output_channel[3], 2, 1, 0), nn.ReLU(True))
+    def forward(self, input):
+        return self.ConvNet(input)
+class Model(nn.Module):
+    def __init__(self, input_channel, output_channel, hidden_size, num_class):
+        super(Model, self).__init__()
+        """ FeatureExtraction """
+        self.FeatureExtraction = VGG_FeatureExtractor(input_channel, output_channel)
+        self.FeatureExtraction_output = output_channel
+        self.AdaptiveAvgPool = nn.AdaptiveAvgPool2d((None, 1))
+        """ Sequence modeling"""
+        self.SequenceModeling = nn.Sequential(
+            BidirectionalLSTM(self.FeatureExtraction_output, hidden_size, hidden_size),
+            BidirectionalLSTM(hidden_size, hidden_size, hidden_size))
+        self.SequenceModeling_output = hidden_size
+        """ Prediction """
+        self.Prediction = nn.Linear(self.SequenceModeling_output, num_class)
+    def forward(self, input, text):
+        """ Feature extraction stage """
+        visual_feature = self.FeatureExtraction(input)
+        visual_feature = self.AdaptiveAvgPool(visual_feature.permute(0, 3, 1, 2))
+        visual_feature = visual_feature.squeeze(3)
+        """ Sequence modeling stage """
+        contextual_feature = self.SequenceModeling(visual_feature)
+        """ Prediction stage """
+        prediction = self.Prediction(contextual_feature.contiguous())
+        return prediction

model/palocr.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+network_params:
+  input_channel: 1
+  output_channel: 256
+  hidden_size: 256
+imgH: 64
+lang_list:
+         - 'en'
+         - 'th'
+character_list: 0123456789!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ €กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮฤฦะาำเแโใไๆ๏๐๑๒๓๔๕๖๗๘๙๚๛ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz